In [51]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import tensorflow as tf
from tensorflow.keras import layers, regularizers, callbacks
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder

In [52]:
df = pd.read_csv('cardiovascular_risk_dataset.csv')

In [53]:
df

Unnamed: 0,Patient_ID,age,bmi,systolic_bp,diastolic_bp,cholesterol_mg_dl,resting_heart_rate,smoking_status,daily_steps,stress_level,physical_activity_hours_per_week,sleep_hours,family_history_heart_disease,diet_quality_score,alcohol_units_per_week,heart_disease_risk_score,risk_category
0,1,62,25.0,142,93,247,72,Never,11565,3,5.6,8.2,No,7,0.7,28.1,Medium
1,2,54,29.7,158,101,254,74,Current,4036,8,0.5,6.7,No,5,4.5,63.0,High
2,3,46,36.2,170,113,276,80,Current,3043,9,0.4,4.0,No,1,20.8,73.1,High
3,4,48,30.4,153,98,230,73,Former,5604,5,0.6,8.0,No,4,8.5,39.5,Medium
4,5,46,25.3,139,87,206,69,Current,7464,1,2.0,6.1,No,5,3.6,29.3,Medium
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5495,5496,19,26.0,121,75,185,84,Never,6724,3,2.9,7.2,No,7,0.0,0.0,Low
5496,5497,18,30.9,128,82,235,75,Never,3661,4,0.0,5.5,No,1,9.6,16.8,Low
5497,5498,63,29.5,142,92,239,69,Never,6643,5,4.1,6.9,No,6,2.4,31.8,Medium
5498,5499,46,27.5,138,91,237,65,Never,3279,3,2.4,5.8,Yes,5,2.3,29.4,Medium


In [54]:
if 'Patient_ID' in df.columns:
    df = df.drop('Patient_ID', axis=1)

In [71]:
risk_labels = {0: "Low Risk", 1: "Medium Risk", 2: "High Risk"}

In [72]:
le_target = LabelEncoder()
df['risk_category'] = le_target.fit_transform(df['risk_category'])
le_features = LabelEncoder()
for col in df.select_dtypes(include=['object']).columns:
    if col != 'risk_category':
        df[col] = le_features.fit_transform(df[col].astype(str))

In [73]:
X = df.drop('risk_category', axis=1).values
y = df['risk_category'].values

In [74]:
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train).astype(np.float32)
X_val = scaler.transform(X_val).astype(np.float32)

In [78]:
input_dim = X_train.shape[1] 
num_classes = len(le_target.classes_)
class_names = le.classes_
print(f"Кол-во колонок: {input_dim:.4f}")
print(f"Классы: {num_classes:.4f}")
for code, name in risk_labels.items():
    print(f"Класс {code} —  {name}")

Кол-во колонок: 15.0000
Классы: 3.0000
Класс 0 —  Low Risk
Класс 1 —  Medium Risk
Класс 2 —  High Risk


In [79]:
tf_model = tf.keras.Sequential([
    tf.keras.layers.Input(shape=(input_dim,)),
    tf.keras.layers.Dense(64, kernel_regularizer=tf.keras.regularizers.l1_l2(l1=1e-5, l2=1e-4)),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.Activation('relu'),
    tf.keras.layers.Dropout(0.3),
    tf.keras.layers.Dense(32, kernel_regularizer=tf.keras.regularizers.l2(1e-4)),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.Activation('relu'),
    tf.keras.layers.Dropout(0.2),
    
    tf.keras.layers.Dense(num_classes, activation='softmax')
])

tf_model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
tf_es = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)

In [80]:
tf_model.fit(X_train, y_train, validation_data=(X_val, y_val), 
             epochs=50, batch_size=32, callbacks=[tf_es], verbose=0)
tf_metrics = tf_model.evaluate(X_val, y_val, verbose=0)
print(f"Точность TensorFlow: {tf_metrics[1]:.4f}")

Точность TensorFlow: 0.9836


In [81]:
X_t, y_t = torch.tensor(X_train), torch.tensor(y_train).long()
X_v, y_v = torch.tensor(X_val), torch.tensor(y_val).long()

In [82]:
pt_model = nn.Sequential(
    nn.Linear(input_dim, 64),
    nn.BatchNorm1d(64),
    nn.ReLU(),
    nn.Dropout(0.3),
    nn.Linear(64, 32),
    nn.BatchNorm1d(32),
    nn.ReLU(),
    nn.Dropout(0.2),
    nn.Linear(32, num_classes)
)

optimizer = torch.optim.Adam(pt_model.parameters(), lr=0.001, weight_decay=1e-4)
criterion = nn.CrossEntropyLoss()
best_v_loss = float('inf')
trigger = 0
l1_lambda = 1e-5

for epoch in range(100):
    pt_model.train()
    optimizer.zero_grad()
    outputs = pt_model(X_t)
    loss = criterion(outputs, y_t)
    l1_reg = sum(p.abs().sum() for p in pt_model.parameters())
    (loss + l1_lambda * l1_reg).backward()
    optimizer.step()
    

    pt_model.eval()
    with torch.no_grad():
        v_loss = criterion(pt_model(X_v), y_v)
        if v_loss < best_v_loss:
            best_v_loss = v_loss
            trigger = 0
            best_weights = pt_model.state_dict()
        else:
            trigger += 1 
        if trigger >= 10:
            print(f"Early Stopping на эпохе {epoch}")
            break

In [83]:
pt_model.load_state_dict(best_weights)
pt_model.eval()
with torch.no_grad():
    preds = pt_model(X_v)
    pt_acc = (preds.argmax(1) == y_v).float().mean()
    print(f"Точность PyTorch: {pt_acc.item():.4f}")

Точность PyTorch: 0.9464
