In [1]:
# Import libraries
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report, accuracy_score, recall_score
from imblearn.over_sampling import SMOTE
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.layers import Dense, Dropout
from keras.optimizers import SGD
import pandas as pd

2024-11-25 16:09:34.982279: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2024-11-25 16:09:34.999166: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1732547375.017694 2159356 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1732547375.023333 2159356 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2024-11-25 16:09:35.042500: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instr

In [2]:
import os
import tensorflow as tf

# Forceer geen GPU gebruik
os.environ["CUDA_VISIBLE_DEVICES"] = "-1"
tf.config.set_visible_devices([], 'GPU')

2024-11-25 16:09:36.513450: E external/local_xla/xla/stream_executor/cuda/cuda_driver.cc:152] failed call to cuInit: INTERNAL: CUDA error: Failed call to cuInit: CUDA_ERROR_NO_DEVICE: no CUDA-capable device is detected
2024-11-25 16:09:36.513490: I external/local_xla/xla/stream_executor/cuda/cuda_diagnostics.cc:137] retrieving CUDA diagnostic information for host: janssenp2.mads-han.src.surf-hosted.nl
2024-11-25 16:09:36.513495: I external/local_xla/xla/stream_executor/cuda/cuda_diagnostics.cc:144] hostname: janssenp2.mads-han.src.surf-hosted.nl
2024-11-25 16:09:36.513650: I external/local_xla/xla/stream_executor/cuda/cuda_diagnostics.cc:168] libcuda reported version is: 535.183.1
2024-11-25 16:09:36.513668: I external/local_xla/xla/stream_executor/cuda/cuda_diagnostics.cc:172] kernel reported version is: 535.183.1
2024-11-25 16:09:36.513672: I external/local_xla/xla/stream_executor/cuda/cuda_diagnostics.cc:259] kernel version seems to match DSO: 535.183.1


In [3]:
# Stel de padlocatie in van het Parquet-bestand
path_train =  '../../Mads_hackathon_dreamteam/data/raw/heart_big_train.parq'
path_valid = '../../Mads_hackathon_dreamteam/data/raw/heart_big_valid.parq'

#import dataframe
df_valid = pd.read_parquet(path_valid)
df_train = pd.read_parquet(path_train)

In [4]:
# Features (X) en target (y) splitsen
X_train = df_train.iloc[:, :-1].values  # Alle kolommen behalve 'target'
y_train = df_train.iloc[:, -1].values  # Laatste kolom is 'target'

X_valid = df_valid.iloc[:, :-1].values
y_valid = df_valid.iloc[:, -1].values

# Schalen van features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_valid = scaler.transform(X_valid)

# Klassenbalans corrigeren met SMOTE
smote = SMOTE(random_state=42)
X_train_balanced, y_train_balanced = smote.fit_resample(X_train, y_train)

# Functie om TP-rate per klasse te berekenen
def calculate_tp_rate(y_true, y_pred):
    class_recall = recall_score(y_true, y_pred, average=None)  # Recall per klasse
    return {f"Class {i}": recall for i, recall in enumerate(class_recall)}

# Evaluatie functie
def evaluate_model(model, X_valid, y_valid):
    y_pred = model.predict(X_valid)
    accuracy = accuracy_score(y_valid, y_pred)
    tp_rates = calculate_tp_rate(y_valid, y_pred)
    print(f"Accuracy: {accuracy}")
    print("TP Rates per class:", tp_rates)
    print("Classification Report:\n", classification_report(y_valid, y_pred))


In [5]:

# Zorg ervoor dat de data gebalanceerd is
smote = SMOTE(random_state=42)
X_train_balanced, y_train_balanced = smote.fit_resample(X_train, y_train)

# Neural Network Model met Dropout en EarlyStopping
print("\n--- Neural Network ---")
nn_model = Sequential()

# Eerste verborgen laag met Dropout
nn_model.add(Dense(128, activation='relu', input_shape=(X_train_balanced.shape[1],)))
nn_model.add(Dropout(0.3))  # Dropout laag om overfitting te voorkomen
# Tweede verborgen laag
nn_model.add(Dense(64, activation='relu'))
nn_model.add(Dropout(0.3))  # Dropout laag
# derde verborgen laag
nn_model.add(Dense(32, activation='selu'))
nn_model.add(Dropout(0.3))  # Dropout laag

# Output laag voor 5 klassen
nn_model.add(Dense(5, activation='softmax'))

# Compileer het model
nn_model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
# nn_model.compile(optimizer=SGD(learning_rate=0.01, momentum=0.9), loss='sparse_categorical_crossentropy', metrics=['accuracy'])


# Early stopping om overfitting te voorkomen
early_stopping = EarlyStopping(monitor='val_loss', patience=3, restore_best_weights=True)

# Train het model
nn_model.fit(X_train_balanced, y_train_balanced, epochs=50, batch_size=32, verbose=1, 
             validation_data=(X_valid, y_valid), callbacks=[early_stopping])

# Evaluatie van het Neural Network
y_pred_nn = nn_model.predict(X_valid).argmax(axis=1)

# Bereken accuracy en TP rate per klasse
accuracy_nn = accuracy_score(y_valid, y_pred_nn)
tp_rates_nn = calculate_tp_rate(y_valid, y_pred_nn)

# Print de resultaten
print(f"Neural Network Accuracy: {accuracy_nn}")
print("TP Rates per class:", tp_rates_nn)
print("Classification Report:\n", classification_report(y_valid, y_pred_nn))




--- Neural Network ---


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/50
[1m11324/11324[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m17s[0m 1ms/step - accuracy: 0.8246 - loss: 0.4806 - val_accuracy: 0.9046 - val_loss: 0.2840
Epoch 2/50
[1m11324/11324[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m19s[0m 2ms/step - accuracy: 0.9198 - loss: 0.2261 - val_accuracy: 0.9356 - val_loss: 0.2033
Epoch 3/50
[1m11324/11324[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m16s[0m 1ms/step - accuracy: 0.9355 - loss: 0.1853 - val_accuracy: 0.9393 - val_loss: 0.1935
Epoch 4/50
[1m11324/11324[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m16s[0m 1ms/step - accuracy: 0.9427 - loss: 0.1654 - val_accuracy: 0.9535 - val_loss: 0.1459
Epoch 5/50
[1m11324/11324[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m17s[0m 1ms/step - accuracy: 0.9469 - loss: 0.1535 - val_accuracy: 0.9362 - val_loss: 0.1812
Epoch 6/50
[1m11324/11324[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m19s[0m 2ms/step - accuracy: 0.9507 - loss: 0.1442 - val_accuracy: 0.9466 - val_loss: 0.153

In [6]:

# Zorg ervoor dat de data gebalanceerd is
smote = SMOTE(random_state=42)
X_train_balanced, y_train_balanced = smote.fit_resample(X_train, y_train)

# Neural Network Model met Dropout en EarlyStopping
print("\n--- Neural Network ---")
nn_model = Sequential()

# Eerste verborgen laag met Dropout
nn_model.add(Dense(128, activation='relu', input_shape=(X_train_balanced.shape[1],)))
nn_model.add(Dropout(0.3))  # Dropout laag om overfitting te voorkomen
# Tweede verborgen laag
nn_model.add(Dense(128, activation='relu'))
nn_model.add(Dropout(0.3))  # Dropout laag
# Derde verborgen laag
nn_model.add(Dense(64, activation='relu'))
nn_model.add(Dropout(0.3))  # Dropout laag
# Vierde verborgen laag
nn_model.add(Dense(32, activation='relu'))
nn_model.add(Dropout(0.3))  # Dropout laag

# Output laag voor 5 klassen
nn_model.add(Dense(5, activation='softmax'))

# Compileer het model
nn_model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
# nn_model.compile(optimizer=SGD(learning_rate=0.01, momentum=0.9), loss='sparse_categorical_crossentropy', metrics=['accuracy'])


# Early stopping om overfitting te voorkomen
early_stopping = EarlyStopping(monitor='val_loss', patience=3, restore_best_weights=True)

# Train het model
nn_model.fit(X_train_balanced, y_train_balanced, epochs=50, batch_size=32, verbose=1, 
             validation_data=(X_valid, y_valid), callbacks=[early_stopping])

# Evaluatie van het Neural Network
y_pred_nn = nn_model.predict(X_valid).argmax(axis=1)

# Bereken accuracy en TP rate per klasse
accuracy_nn = accuracy_score(y_valid, y_pred_nn)
tp_rates_nn = calculate_tp_rate(y_valid, y_pred_nn)

# Print de resultaten
print(f"Neural Network Accuracy: {accuracy_nn}")
print("TP Rates per class:", tp_rates_nn)
print("Classification Report:\n", classification_report(y_valid, y_pred_nn))




--- Neural Network ---


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/50
[1m11324/11324[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m19s[0m 2ms/step - accuracy: 0.8076 - loss: 0.5259 - val_accuracy: 0.9152 - val_loss: 0.2545
Epoch 2/50
[1m11324/11324[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m22s[0m 2ms/step - accuracy: 0.9200 - loss: 0.2351 - val_accuracy: 0.9348 - val_loss: 0.1926
Epoch 3/50
[1m11324/11324[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m19s[0m 2ms/step - accuracy: 0.9374 - loss: 0.1878 - val_accuracy: 0.9414 - val_loss: 0.1729
Epoch 4/50
[1m11324/11324[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m16s[0m 1ms/step - accuracy: 0.9457 - loss: 0.1657 - val_accuracy: 0.9331 - val_loss: 0.1980
Epoch 5/50
[1m11324/11324[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m22s[0m 2ms/step - accuracy: 0.9492 - loss: 0.1571 - val_accuracy: 0.9402 - val_loss: 0.1603
Epoch 6/50
[1m11324/11324[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m22s[0m 2ms/step - accuracy: 0.9541 - loss: 0.1453 - val_accuracy: 0.9525 - val_loss: 0.147