In [1]:
import pandas as pd
import numpy as np
import joblib
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from imblearn.over_sampling import SMOTE

In [2]:
# 1. Φόρτωση
df = pd.read_csv('framingham.csv')

In [3]:
# 2. Imputation (Γέμισμα κενών)
df = df.fillna(df.median(numeric_only=True))

In [4]:
# 3. Handling Outliers (Capping / Winsorization)
# Περιορίζουμε τις ακραίες τιμές στο 1% και 99% για να μην επηρεάζουν το scaling
def handle_outliers(df, columns):
    for col in columns:
        lower_limit = df[col].quantile(0.01)
        upper_limit = df[col].quantile(0.99)
        df[col] = np.clip(df[col], lower_limit, upper_limit)
    return df

outlier_cols = ['totChol', 'sysBP', 'diaBP', 'BMI', 'heartRate', 'glucose']
df = handle_outliers(df, outlier_cols)

In [5]:
# 4. Feature Engineering
df['pulse_pressure'] = df['sysBP'] - df['diaBP']

print("Outliers handled and features created.")

Outliers handled and features created.


In [6]:
# 1. Features/Target
X = df.drop('TenYearCHD', axis=1)
y = df['TenYearCHD']

In [7]:
# 2. Split (ΠΡΙΝ το scaling)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

In [8]:
# 3. Scaling (Fit μόνο στο Train)
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [9]:
# 4. SMOTE (Μόνο στο Scaled Train)
sm = SMOTE(random_state=42)
X_train_res, y_train_res = sm.fit_resample(X_train_scaled, y_train)

In [10]:
# Αποθήκευση Scaler
joblib.dump(scaler, 'scaler.pkl')

['scaler.pkl']

In [11]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, BatchNormalization, Input
from tensorflow.keras.callbacks import EarlyStopping

2026-01-23 10:15:21.625152: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2026-01-23 10:15:21.793699: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX512F AVX512_VNNI, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [12]:
model = Sequential([
    Input(shape=(X_train_res.shape[1],)),
    Dense(32, activation='relu'),
    BatchNormalization(),
    Dropout(0.3),
    Dense(16, activation='relu'),
    Dropout(0.2),
    Dense(1, activation='sigmoid')
])

In [13]:
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy', tf.keras.metrics.Recall()])

In [14]:
early_stop = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)

In [15]:
history = model.fit(
    X_train_res, y_train_res,
    epochs=60,
    batch_size=32,
    validation_split=0.2,
    callbacks=[early_stop],
    verbose=1
)

Epoch 1/60
[1m144/144[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step - accuracy: 0.5894 - loss: 0.7298 - recall: 0.3598 - val_accuracy: 0.3666 - val_loss: 0.7954 - val_recall: 0.3666
Epoch 2/60
[1m144/144[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.6407 - loss: 0.6461 - recall: 0.3812 - val_accuracy: 0.3805 - val_loss: 0.8027 - val_recall: 0.3805
Epoch 3/60
[1m144/144[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.6567 - loss: 0.6159 - recall: 0.4032 - val_accuracy: 0.4474 - val_loss: 0.7905 - val_recall: 0.4474
Epoch 4/60
[1m144/144[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.6683 - loss: 0.5999 - recall: 0.3975 - val_accuracy: 0.4978 - val_loss: 0.7709 - val_recall: 0.4978
Epoch 5/60
[1m144/144[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.6746 - loss: 0.5953 - recall: 0.4374 - val_accuracy: 0.4700 - val_loss: 0.7721 - val_recall: 0.4700


In [16]:
model.save('best_heart_model.keras')