In [1]:
import pandas as pd
import numpy as np

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, MinMaxScaler
from sklearn.svm import SVC
from sklearn.metrics import classification_report
from sklearn.model_selection import GridSearchCV



In [13]:
df = pd.read_csv('../data/clinical_training_data.csv')

# For converting categorical variables to ints
label_encoder = LabelEncoder()
scaler = MinMaxScaler()

# Split features / target
X = df.drop(columns=['AD_dx_in_5_yrs'])
y = df['AD_dx_in_5_yrs']

# Encode features
X["DX.bl"] = label_encoder.fit_transform(X["DX.bl"])
X["PTGENDER"] = label_encoder.fit_transform(X["PTGENDER"])
X["PTETHCAT"] = label_encoder.fit_transform(X["PTETHCAT"])
X["PTRACCAT"] = label_encoder.fit_transform(X["PTRACCAT"])

# Split data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Scale data
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Instantiate Model
svc = SVC()

## Grid Search

In [14]:
# Grid search
param_grid = {
    "C": [0.1, 1, 10],                
    "kernel": ["linear", "poly", "rbf", "sigmoid"],
    "gamma": ["scale", "auto", 0.1],
    "degree": [1, 2, 3, 4, 5]}

In [15]:
grid_search = GridSearchCV(estimator=svc, param_grid=param_grid, cv=5, scoring="accuracy")

In [16]:
grid_search.fit(X_train, y_train)

GridSearchCV(cv=5, estimator=SVC(),
             param_grid={'C': [0.1, 1, 10], 'degree': [1, 2, 3, 4, 5],
                         'gamma': ['scale', 'auto', 0.1],
                         'kernel': ['linear', 'poly', 'rbf', 'sigmoid']},
             scoring='accuracy')

In [17]:
# Get the best parameters and best accuracy score
best_params = grid_search.best_params_
best_score = grid_search.best_score_

# Print the best parameters and best score
print("Best Parameters:", best_params)
print("Best Accuracy:", best_score)

Best Parameters: {'C': 0.1, 'degree': 4, 'gamma': 'scale', 'kernel': 'poly'}
Best Accuracy: 0.777753164556962


## Train with Best Parameters

In [18]:
# Instantiate and fit model
svc = SVC(C=0.1, degree=4, gamma="scale", kernel="poly")

svc.fit(X_train, y_train)

SVC(C=0.1, degree=4, kernel='poly')

In [20]:
# Assess
y_pred = svc.predict(X_test)

# evaluate predictions
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       0.91      0.77      0.84        79
           1       0.44      0.70      0.54        20

    accuracy                           0.76        99
   macro avg       0.67      0.74      0.69        99
weighted avg       0.81      0.76      0.78        99

