In [None]:
import pandas as pd
import numpy as np
from sklearn.svm import SVC

In [None]:
X_df = pd.read_csv('data/dataset/dfu_features_dataset.csv', index_col=0)
y_df = pd.read_csv('data/dataset/dfu_labels_dataset.csv', index_col=0)

In [None]:
def test(X, y):
    from sklearn.svm import SVC
    from sklearn.model_selection import KFold

    k_folds = 5
    kfold = KFold(n_splits=k_folds, shuffle=True, random_state=42)

    y_true = []
    y_predict = [] 

    for fold, (train_ids, test_ids) in enumerate(kfold.split(X)):
        X_train, X_test = X[train_ids], X[test_ids]
        y_train, y_test = y[train_ids], y[test_ids]
        # svc = SVC(kernel='linear', C=1.0, random_state=0)
        svc = SVC(kernel='rbf', C=1.0, gamma=.1, random_state=0)
        svc.fit(X_train, y_train)

        y_true.extend(y_test)
        y_predict.extend(svc.predict(X_test))

        print(f'Acc. Fold {fold}: {svc.score(X_test, y_test)}')

    return y_true, y_predict

In [None]:
def plot_confusion_matrix(y_true, y_predict):
    from sklearn.metrics import ConfusionMatrixDisplay
    from matplotlib import pyplot as plt
    labels = ['CG', 'DM']
    cmp = ConfusionMatrixDisplay.from_predictions(y_true, y_predict, display_labels=labels, cmap='Blues', normalize='true')
    fig, ax = plt.subplots(figsize=(10,8))
    cmp.plot(ax=ax, cmap='Blues')
    # Increase labels size
    ax.tick_params(axis='both', which='major', labelsize=20)
    # Increase xlabel size
    ax.set_xlabel(ax.get_xlabel(), fontsize=18)
    ax.set_ylabel(ax.get_ylabel(), fontsize=20)
    #Increase values size
    for text in ax.texts:
        text.set_size(18)
    # Increase colorbar ticks size
    ax.figure.axes[-1].tick_params(labelsize=18)
    
    return fig


In [None]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

metrics_df = pd.DataFrame(columns=['Accuracy', 'Precision', 'Recall', 'F1-Score'])

# Random Forests Features

In [None]:
df = pd.read_csv('data/features_importance/original/random_forest.csv')
features = df.iloc[:10]['Features'].values
print(features)

In [None]:
X = X_df[features].to_numpy().astype(np.float32)
y = y_df.to_numpy().ravel()

In [None]:
y_true, y_predict = test(X, y)

# metrics
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

acc = accuracy_score(y_true, y_predict)
prec = precision_score(y_true, y_predict)
rec = recall_score(y_true, y_predict)
f1 = f1_score(y_true, y_predict)

# insert Random Forest metrics as index
metrics_df.loc['Random Forest'] = [acc, prec, rec, f1]

In [None]:
figure = plot_confusion_matrix(y_true, y_predict)
figure.savefig('data/classifiers/imgs/RF.pdf', bbox_inches='tight')

# LASSO

In [None]:
df = pd.read_csv('data/features_importance/original/lasso.csv')
features = df.iloc[:10]['Features'].values
print(features)

In [None]:
X = X_df[features].to_numpy().astype(np.float32)
y = y_df.to_numpy().ravel()

In [None]:
y_true, y_predict = test(X, y)

# metrics
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

acc = accuracy_score(y_true, y_predict)
prec = precision_score(y_true, y_predict)
rec = recall_score(y_true, y_predict)
f1 = f1_score(y_true, y_predict)

# insert Random Forest metrics as index
metrics_df.loc['LASSO'] = [acc, prec, rec, f1]

In [None]:
figure = plot_confusion_matrix(y_true, y_predict)
figure.savefig('data/classifiers/imgs/LASSO.pdf', bbox_inches='tight')

# Concrete Dropout

In [None]:
df = pd.read_csv('data/features_importance/original/concrete_dropout.csv')
features = df.iloc[:10]['Features'].values
print(features)

In [None]:
X = X_df[features].to_numpy().astype(np.float32)
y = y_df.to_numpy().ravel()

In [None]:
y_true, y_predict = test(X, y)

# metrics
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

acc = accuracy_score(y_true, y_predict)
prec = precision_score(y_true, y_predict)
rec = recall_score(y_true, y_predict)
f1 = f1_score(y_true, y_predict)

# insert Random Forest metrics as index
metrics_df.loc['Concrete Dropout'] = [acc, prec, rec, f1]

In [None]:
figure = plot_confusion_matrix(y_true, y_predict)
figure.savefig('data/classifiers/imgs/Concrete.pdf', bbox_inches='tight')

# Variational Dropout

In [None]:
df = pd.read_csv('data/features_importance/original/variational_dropout.csv')
features = df.iloc[:10]['Features'].values
print(features)

In [None]:
X = X_df[features].to_numpy().astype(np.float32)
y = y_df.to_numpy().ravel()

In [None]:
y_true, y_predict = test(X, y)

# metrics
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

acc = accuracy_score(y_true, y_predict)
prec = precision_score(y_true, y_predict)
rec = recall_score(y_true, y_predict)
f1 = f1_score(y_true, y_predict)

# insert Random Forest metrics as index
metrics_df.loc['Variational Dropout'] = [acc, prec, rec, f1]

In [None]:
figure = plot_confusion_matrix(y_true, y_predict)
figure.savefig('data/classifiers/imgs/Variational.pdf', bbox_inches='tight')

# Common Features:
Quedamos que le mandaba urgente el orden de las features in coincidence ranked: L_MPA_min (5), L_LPA_std (9), R_LPA_min (10), R_MCA_std (12), Foot_ETD (21), L_MCA_std (27),  L_MPA_std (31),  MCA_ETD (38), R_LCA_kurtosis (38), L_MPA_NRT_C3 (39) 

In [None]:
common_features = ['L_MPA_min', 'L_LPA_std', 'R_LPA_min', 'R_MCA_std', 'Foot_ETD', 'L_MCA_std',  'L_MPA_std',  'MCA_ETD', 'R_LCA_kurtosis', 'L_MPA_NRT_C3']

In [None]:
X = X_df[common_features].to_numpy().astype(np.float32)
y = y_df.to_numpy().ravel()

In [None]:
y_true, y_predict = test(X, y)

# metrics
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

acc = accuracy_score(y_true, y_predict)
prec = precision_score(y_true, y_predict)
rec = recall_score(y_true, y_predict)
f1 = f1_score(y_true, y_predict)

# insert Random Forest metrics as index
metrics_df.loc['Common Features'] = [acc, prec, rec, f1]

In [None]:
figure = plot_confusion_matrix(y_true, y_predict)
figure.savefig('data/classifiers/imgs/CommonFeatures.pdf', bbox_inches='tight')

In [None]:
metrics_df

# Common Features (MixFeatures)

In [None]:
common_features = [['L_MPA_min', 'R_MPA_min'],
                ['L_LPA_std', 'R_LPA_std'],
                ['L_LPA_min', 'R_LPA_min'],
                ['L_MCA_std', 'R_MCA_std'],
                ['L_MPA_std', 'R_MPA_std'],
                ['L_LCA_kurtosis', 'R_LCA_kurtosis'],
                ['L_MPA_NRT_C3', 'R_MPA_NRT_C3'],
                ['L_MPA_skew', 'R_MPA_skew']]

other_features = ['Foot_ETD', 'MCA_ETD', ]

In [None]:
features = []
for l, r in common_features:
    features.append(X_df[[l, r]].mean(axis=1).to_numpy().astype(np.float32))

features = np.stack(features, axis=1)

In [None]:
other_features = X_df[other_features].to_numpy().astype(np.float32)

X = np.concatenate([features, other_features], axis=1)
y = y_df.to_numpy().ravel()

In [None]:
y = y_df.to_numpy().ravel()

In [None]:
y_true, y_predict = test(X, y)

# metrics
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

acc = accuracy_score(y_true, y_predict)
prec = precision_score(y_true, y_predict)
rec = recall_score(y_true, y_predict)
f1 = f1_score(y_true, y_predict)

# insert Random Forest metrics as index
metrics_df.loc['Common Features (Both Feet)'] = [acc, prec, rec, f1]

In [None]:
metrics_df

In [None]:
figure = plot_confusion_matrix(y_true, y_predict)
figure.savefig('data/classifiers/imgs/CommonFeatures_BothFeet.pdf', bbox_inches='tight')

# Save results

In [None]:
metrics_df.to_csv('data/classifiers/metrics.csv')