In [None]:
### Train SVM

#Import libraries
import numpy as np
import matplotlib.pyplot as plt
import sklearn
import pandas as pd
import seaborn as sns
import argparse
import os
from sklearn.compose import ColumnTransformer
from sklearn.datasets import fetch_openml
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler, OneHotEncoder, label_binarize
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import make_scorer
from sklearn.metrics import accuracy_score
from sklearn.metrics import recall_score
from sklearn.metrics import precision_score
from sklearn.metrics import multilabel_confusion_matrix
import pickle
import logging
import traceback

# Import features
root_path = os.getcwd().replace("\\", "/") + "/Processing/CT/"
df = pd.read_excel(root_path + 'In vitro_DRR_features_no_id.xlsx')
df.head()
param = df.columns

# Get ground truth
df_truth = pd.read_excel(root_path + "In vitro_DRR_eq_ground truth.xlsx")

# make X and y
X = df.copy()
y = df_truth[['Target']]

# Standard scaler exceptions
exceptions = ["Vertebrae_L1","Vertebrae_L2","Vertebrae_L3","Vertebrae_L4","Sexe"]
cols = X.columns.difference(exceptions).values

numeric_features = X.columns.difference(exceptions).values
from sklearn.preprocessing import StandardScaler
ct = ColumnTransformer(transformers=[('num', StandardScaler(), numeric_features)], remainder="passthrough")

#Define classifier
from sklearn.svm import SVC
classifier = SVC(kernel='rbf', random_state = 0) # can modify kernel to other types liek linear

# make confusion  matrix scorer
from sklearn.metrics import make_scorer

scoring = {'precision_score': "precision_weighted", "accuracy_score": make_scorer(accuracy_score), "balanced_accuracy_score": "balanced_accuracy", "recall_score": "recall_weighted", "f1_score": "f1_weighted", "AUC_score": "roc_auc"} #"confusion_matrix": make_scorer(multilabel_confusion_matrix)

#Obtain confusion matrix and predictions
from sklearn.model_selection import cross_val_predict
from sklearn.model_selection import cross_validate
from sklearn.metrics import confusion_matrix
from sklearn.model_selection import KFold
from sklearn.metrics import ConfusionMatrixDisplay

#Obtain crossvalidation folds
k_fold = KFold(10,shuffle=True, random_state=0)
i=1
for train_index, test_index in k_fold.split(X):
    #Get data for fold
    X_train, X_test = X.iloc[train_index,:], X.iloc[test_index,:]
    y_train, y_test = y.iloc[train_index,:], y.iloc[test_index,:]
    X_train = ct.fit_transform(X_train)
    X_test = ct.transform(X_test)
    
    #Fit SVM classifier
    classifier.fit(X_train, y_train.values.ravel())
    
    #Make predictions on test set
    pred = classifier.predict(X_test)

    filename = root_path + 'DRR_SVM_rbf_fold_' + str(i) + '.sav'
    pickle.dump(classifier, open(filename, 'wb')) #saves classifier
    result = pd.DataFrame()# you create new dataframe
    result['predictions'] = pred# you create column with values
    #result["actual"] = y_test['Target']
    result.to_excel(root_path + 'DRR_SVM_rbf_predictions_fold_' + str(i) +  ".xlsx") 
    
    #Obtain confusion matrix (2 formats)
    print("Fold :" + str(i))
    print(cm.ravel())
    cm_display = ConfusionMatrixDisplay(cm).plot()
    
    #obtain feature importances
    features = list(df.columns)
    importances = classifier.coef_.ravel()
    f_imp = pd.DataFrame({'feature': features, 'importance': importances})
    excel_path = root_path + "DRR_SVM_rbf_FI_fold_" + str(i) + ".xlsx"
    results.to_excel(excel_path)
    i+=1

In [None]:
# obtain average performance scores for 10 folds
results = cross_validate(classifier,X.values,y.values.ravel(),cv=10,scoring=scoring,verbose=0)
#print(results)

#save average performance for 10 folds to excel
pd.DataFrame.from_dict(results).to_excel(root_path + 'DRR_SVM_rbf_results.xlsx')