In [1]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.pipeline import Pipeline
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.wrappers.scikit_learn import KerasClassifier

In [2]:
train_data = pd.read_csv('data/train_cleaned.csv')

train_data.head()

Unnamed: 0,surgery,age,hospital_number,rectal_temp,pulse,respiratory_rate,temp_of_extremities,peripheral_pulse,mucous_membrane,capillary_refill_time,...,packed_cell_volume,total_protein,abdomo_appearance,abdomo_protein,surgical_lesion,lesion_1,lesion_2,lesion_3,cp_data,outcome
0,1.0,0.0,530001,38.1,132.0,24.0,1.0,3.0,2.0,2.0,...,57.0,8.5,2.0,3.4,1.0,2209,0,0,0.0,0.0
1,1.0,0.0,533836,37.5,88.0,12.0,1.0,2.0,4.0,2.0,...,33.0,64.0,2.0,2.0,1.0,2208,0,0,0.0,1.0
2,1.0,0.0,5262541,37.1,72.0,30.0,0.0,3.0,5.0,2.0,...,53.0,7.0,1.0,3.9,1.0,2208,0,0,1.0,2.0
3,0.0,0.0,5299629,38.0,52.0,48.0,2.0,2.0,3.0,1.0,...,47.0,7.3,1.0,2.6,0.0,0,0,0,1.0,2.0
4,0.0,0.0,529642,38.1,56.0,32.0,2.0,2.0,0.0,1.0,...,49.0,8.0,1.0,2.8,0.0,0,0,0,1.0,2.0


opdel data i features og labels

In [3]:
X = train_data.drop(columns=['outcome'])
y = train_data['outcome']

X_train, X_test,\
    y_train, y_test = train_test_split(X, y,
                                       test_size=0.2,
                                       random_state=42)

In [4]:
def create_model(optimizer='adam', neurons=64, epochs=50, batch_size=32, validation_split=0.2):
    model = keras.Sequential([
        keras.layers.Dense(neurons, activation='relu', input_shape=(X_train.shape[1],)),
        keras.layers.Dense(32, activation='relu'),
        keras.layers.Dense(3, activation='softmax')
    ])
    model.compile(optimizer=optimizer, loss='sparse_categorical_crossentropy', metrics=['accuracy'])
    model.fit(X_train, y_train, epochs=epochs, batch_size=batch_size, validation_split=validation_split, verbose=0)
    return model

In [5]:
model = KerasClassifier(build_fn=create_model, verbose=2)

pipeline = Pipeline([
    ('pca', PCA()),
    ('clf', model)
])

param_grid = {
    'clf__optimizer': ['adam', 'rmsprop'],
    'clf__neurons': [32, 64, 128],
    'clf__epochs': [50, 100],
    'clf__batch_size': [16, 32],
    'clf__validation_split': [0.2, 0.3],
    'pca__n_components': [2, 3, 4, 5]
}



  model = KerasClassifier(build_fn=create_model, verbose=2)
  ('pca', pca)
  ('pca', pca)
  ('pca', pca)
  ('pca', pca)


TypeError: 'tuple' object is not callable

In [None]:
grid = GridSearchCV(estimator=pipeline, param_grid=param_grid, cv=5, scoring='accuracy', n_jobs=-1)

grid_result = grid.fit(X_train, y_train)

Epoch 1/50
33/33 - 1s - loss: 1153.4252 - accuracy: 0.3821 - val_loss: 901.5860 - val_accuracy: 0.4773 - 526ms/epoch - 16ms/step
Epoch 2/50
33/33 - 0s - loss: 1502.7850 - accuracy: 0.4125 - val_loss: 1480.9338 - val_accuracy: 0.4773 - 73ms/epoch - 2ms/step
Epoch 3/50
33/33 - 0s - loss: 3087.2642 - accuracy: 0.4259 - val_loss: 3189.3613 - val_accuracy: 0.3106 - 72ms/epoch - 2ms/step
Epoch 4/50
33/33 - 0s - loss: 1625.2533 - accuracy: 0.3650 - val_loss: 1655.9772 - val_accuracy: 0.4773 - 70ms/epoch - 2ms/step
Epoch 5/50
33/33 - 0s - loss: 1808.3986 - accuracy: 0.3802 - val_loss: 1343.3945 - val_accuracy: 0.4773 - 67ms/epoch - 2ms/step
Epoch 6/50
33/33 - 0s - loss: 1284.4008 - accuracy: 0.3650 - val_loss: 1692.4102 - val_accuracy: 0.2045 - 67ms/epoch - 2ms/step
Epoch 7/50
33/33 - 0s - loss: 1519.6797 - accuracy: 0.3992 - val_loss: 1556.2324 - val_accuracy: 0.4773 - 59ms/epoch - 2ms/step
Epoch 8/50
33/33 - 0s - loss: 3142.5146 - accuracy: 0.3802 - val_loss: 746.2851 - val_accuracy: 0.4773 

KeyboardInterrupt: 

In [None]:
# Get the best model and evaluate on the test data
best_model = grid_result.best_estimator_

y_pred = best_model.predict(X_test)

# Evaluate multiple metrics on the test data
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred, average='macro')
recall = recall_score(y_test, y_pred, average='macro')
f1 = f1_score(y_test, y_pred, average='macro')
roc_auc = roc_auc_score(y_test, best_model.predict_proba(X_test), average='macro', multi_class='ovr')

# Print the best hyperparameters and test metrics
print("Best Hyperparameters:", grid_result.best_params_)
print("Test Metrics:")
print("  Accuracy:", accuracy)
print("  Precision:", precision)
print("  Recall:", recall)
print("  F1 Score:", f1)
print("  ROC AUC Score:", roc_auc)

Best Hyperparameters: {'batch_size': 16, 'epochs': 50, 'neurons': 128, 'optimizer': 'adam', 'validation_split': 0.2}
Test Metrics:
  Accuracy: 0.4939271255060729
  Precision: 0.49728997289972904
  Recall: 0.3395061728395062
  F1 Score: 0.2319213937742548
  ROC AUC Score: 0.5412229646803364


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
