In [210]:
import os
import pandas as pd
import numpy as np

# Default packages for the minimum example
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import GroupKFold, train_test_split
from sklearn.metrics import accuracy_score, f1_score, roc_auc_score, confusion_matrix, multilabel_confusion_matrix
from sklearn.svm import SVC
from sklearn.preprocessing import StandardScaler

from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.tree import DecisionTreeClassifier

import matplotlib.pyplot as plt


import pickle #for saving/loading trained classifiers


In [211]:
# Finding our files - make sure to follow the same file/folder structure.
file_data = '.' + os.sep + 'data' + os.sep +'metadata.csv'
path_image = '.' + os.sep + 'data' + os.sep + 'images' + os.sep + 'imgs_part_1'

# read the metadata csv and find the diagnostic labels.
df = pd.read_csv(file_data)

# Find the features from the feature extraction.
file_features = 'features/features.csv'
feature_names = ['file_name','asymmetry','color','blue-white_veil']

# Load up the features in a separate dataframe to filter our metadata.
df_features = pd.read_csv(file_features)

# our_list for all images in the metadata that we also have in our features.csv
our_list = list(np.array(df_features["file_name"]))
filtered_data = df[df["img_id"].isin(our_list)]
label = np.array(filtered_data['diagnostic'])
image_id = list(filtered_data['img_id'])

In [212]:
# Make the dataset
x = np.array(df_features[feature_names[1:]])

y = np.zeros(len(label))  # Initialize the labels array with zeros
y[label == 'BCC'] = 1     # Set BCC samples to 1
y[label == 'SCC'] = 2     # Set SCC samples to 2
y[label == 'MEL'] = 3     # Set MEL samples to 3
patient_id = filtered_data['patient_id']



In [213]:
# Split the data in the first 80% train data and 20% true test data for the final validation.

x_train_data, x_test_data, y_train_data, y_test_data, patient_id_train_data, patient_id_test_data = train_test_split(
    x, y, patient_id, test_size=0.2, train_size=0.8, random_state=42, shuffle=True, stratify=y)

In [214]:
#Prepare cross-validation -
# GroupKFold makes sure patients with the same ID will not be split between the training and validation sets.

num_folds = 5
group_kfold = GroupKFold(n_splits=num_folds)
group_kfold.get_n_splits(x_train_data, y_train_data, patient_id_train_data)

5

In [291]:
# Our classifiers are defined here. We use K-NN

classifiers = [
    KNeighborsClassifier(1),
    KNeighborsClassifier(3),
    KNeighborsClassifier(5),
    KNeighborsClassifier(7),
    KNeighborsClassifier(9),
    LogisticRegression(multi_class='ovr', solver='lbfgs', max_iter=1000, random_state=42),
    RandomForestClassifier(100,max_depth=1,random_state=1907),
    DecisionTreeClassifier(random_state=1907)
]
num_classifiers = len(classifiers)  


In [292]:
# set up np arrays for the eventual accuracy- and F1-scores.
acc_val = np.empty([num_folds,num_classifiers])
f1_val = np.empty([num_folds, num_classifiers])
roc_auc_val = np.empty([num_folds, num_classifiers])
specificity_val = np.empty([num_folds, num_classifiers])

acc_list = []
f1_list = []
roc_auc_list = []
specificity_list = []
sensitivity_list = []

# Splits up our data into training and validation sets at a 80/20 ratio. The group_kfold does training across folds,
# with a default of 5 folds it will give us 5 outputs.
for i, (train_index, val_index) in enumerate(group_kfold.split(x_train_data, y_train_data, patient_id_train_data)):
    
    # x_train = 80%
    # y_train = truth for 80%
    # x_val = 20%
    # y_val = truth for 20%
    x_train = x[train_index,:]
    y_train = y[train_index]
    x_val = x[val_index,:]
    y_val = y[val_index]
    
    # Initialize StandardScaler
    scaler = StandardScaler()
 
    # Standardize features
    x_train_scaled = scaler.fit_transform(x_train)
    x_val_scaled = scaler.transform(x_val)
    
    
    acc_l = []
    f1_l = []
    roc_auc_l = []
    specificity = []
    sensitivity = []
    
    for j, clf in enumerate(classifiers): 
        # Train the classifier with the 80%.
        clf.fit(x_train_scaled, y_train)
        
        # Predict labels for validation data
        y_pred = clf.predict(x_val_scaled)
    
        # Evaluate accuracy score (mostly useless)
        accuracy = accuracy_score(y_val, y_pred)
        acc_val[i, j] = accuracy
        acc_l.append(accuracy)
        
        # Evaluate F1 score (Shows us the ratio of false positives and true negatives)
        multi_cm = multilabel_confusion_matrix(y_val, y_pred)
        
        f1 = f1_score(y_val, y_pred, average="weighted")  # Use macro-average for multi-class classification (We have 4 classes -> the 3 skin cancers and a category for none of them.)
        f1_val[i, j] = f1
        f1_l.append(f1)

        
        # Calculate ROC AUC score
        roc_auc = roc_auc_score(y_val, clf.predict_proba(x_val_scaled), multi_class='ovr')
        roc_auc_val[i, j] = roc_auc
        roc_auc_l.append(roc_auc)
        
        # Calculate specificity
        c1, c2, c3, c4 = confusion_matrix(y_val, y_pred)
        
        tn = c1[0] # true negative
        tp = c2[1] + c3[2] + c4[3] # true positive
        fp = np.sum(c1[1:]) + np.sum(c2[2:]) + c3[3] + c3[1] + np.sum(c4[1:2]) # false positive
        fn = c2[0] + c3[0] + c4[0] # false negative
        
        specificity.append(tn/(tn+fp))
        
        sensitivity.append(tp/(tp+fn))
        
        
        # Print our scores - Classifier 1 is K-NN(1) and Classifier 2 is K-NN(5):
        #print(f"F1 score (Fold {i + 1}, Classifier {j + 1}): {f1_l}")
        #print(f"ROC AUC score (Fold {i + 1}, Classifier {j + 1}): {roc_auc_l}")
        #print(f"Specificity (Fold {i + 1}, Classifier {j + 1}): {specificity}")
        #print(f"Sensitivity (Fold {i + 1}, Classifier {j + 1}): {sensitivity}")
        #print("")
        if len(acc_l) == len(classifiers):
            acc_list.append(acc_l)
        if len(f1_l) == len(classifiers):
            f1_list.append(f1_l)
        if len(roc_auc_l) == len(classifiers):
            roc_auc_list.append(roc_auc_l)
        if len(specificity) == len(classifiers):
            specificity_list.append(specificity)
        if len(sensitivity) == len(classifiers):
            sensitivity_list.append(sensitivity)
        



In [293]:
# Calculate means and standard deviations for each classifier across all folds
means = {}
std_devs = {}

for classifier_idx in range(len(classifiers)):
    classifier_name = f"Classifier {classifier_idx+1}"
    
    # Extract scores for the current classifier across all folds
    classifier_f1_scores = [fold[classifier_idx] for fold in f1_list]
    classifier_accuracy_scores = [fold[classifier_idx] for fold in acc_list]
    classifier_sensitivity_scores = [fold[classifier_idx] for fold in sensitivity_list]
    classifier_specificity_scores = [fold[classifier_idx] for fold in specificity_list]
    classifier_roc_auc_scores = [fold[classifier_idx] for fold in roc_auc_list]
    
    # Convert scores to NumPy arrays for easier calculations
    classifier_f1_scores_array = np.array(classifier_f1_scores)
    classifier_accuracy_scores_array = np.array(classifier_accuracy_scores)
    classifier_sensitivity_scores_array = np.array(classifier_sensitivity_scores)
    classifier_specificity_scores_array = np.array(classifier_specificity_scores)
    classifier_roc_auc_scores_array = np.array(classifier_roc_auc_scores)
    
    # Calculate means and standard deviations
    means[classifier_name] = {
        'F1 Score': np.mean(classifier_f1_scores_array),
        'Accuracy Score': np.mean(classifier_accuracy_scores_array),
        'Sensitivity Score': np.mean(classifier_sensitivity_scores_array),
        'Specificity Score': np.mean(classifier_specificity_scores_array),
        'ROC AUC Score': np.mean(classifier_roc_auc_scores_array)
    }

    std_devs[classifier_name] = {
        'F1 Score': np.std(classifier_f1_scores_array),
        'Accuracy Score': np.std(classifier_accuracy_scores_array),
        'Sensitivity Score': np.std(classifier_sensitivity_scores_array),
        'Specificity Score': np.std(classifier_specificity_scores_array),
        'ROC AUC Score': np.std(classifier_roc_auc_scores_array)
    }

# Print or use the means and standard deviations as needed
for classifier_name, scores in means.items():
    print(f"Mean scores for {classifier_name}: {scores}")

for classifier_name, scores in std_devs.items():
    print(f"Standard deviations for {classifier_name}: {scores}")

Mean scores for Classifier 1: {'F1 Score': 0.44989905628770666, 'Accuracy Score': 0.4513845947035892, 'Sensitivity Score': 0.39930469289164944, 'Specificity Score': 0.4879598662207358, 'ROC AUC Score': 0.5228965546107268}
Mean scores for Classifier 2: {'F1 Score': 0.4279156375908385, 'Accuracy Score': 0.46758972980239283, 'Sensitivity Score': 0.25853023909985934, 'Specificity Score': 0.6376446035832415, 'ROC AUC Score': 0.545869551242795}
Mean scores for Classifier 3: {'F1 Score': 0.4550785879785943, 'Accuracy Score': 0.5011896760317247, 'Sensitivity Score': 0.27837719074480777, 'Specificity Score': 0.6905503926729718, 'ROC AUC Score': 0.5628148902324653}
Mean scores for Classifier 4: {'F1 Score': 0.4291037607823343, 'Accuracy Score': 0.47801451808038714, 'Sensitivity Score': 0.2409718482252142, 'Specificity Score': 0.6753651699270282, 'ROC AUC Score': 0.5466740485676881}
Mean scores for Classifier 5: {'F1 Score': 0.43737805479650804, 'Accuracy Score': 0.488446027691894, 'Sensitivity S

In [298]:
# Convert the means dictionary to a DataFrame
means_df = pd.DataFrame.from_dict(means, orient='index')

# Print the DataFrame
print("Mean scores for each classifier:")
print(means_df)


Mean scores for each classifier:
              F1 Score  Accuracy Score  Sensitivity Score  Specificity Score  \
Classifier 1  0.449899        0.451385           0.399305           0.487960   
Classifier 2  0.427916        0.467590           0.258530           0.637645   
Classifier 3  0.455079        0.501190           0.278377           0.690550   
Classifier 4  0.429104        0.478015           0.240972           0.675365   
Classifier 5  0.437378        0.488446           0.239127           0.711086   
Classifier 6  0.404596        0.472207           0.156883           0.762489   
Classifier 7  0.398366        0.514982           0.108109           0.911650   
Classifier 8  0.383640        0.385428           0.354055           0.405009   

              ROC AUC Score  
Classifier 1       0.522897  
Classifier 2       0.545870  
Classifier 3       0.562815  
Classifier 4       0.546674  
Classifier 5       0.559348  
Classifier 6       0.569223  
Classifier 7       0.601285  
Classi

In [222]:
#Define classifers:

#train_score = {}
#test_score = {}
classifier = KNeighborsClassifier(5)


# Fit the scaler on the training data
scaler = StandardScaler()
scaler.fit(x_train_data)

# Transform both the training and testing data using the fitted scaler
x_train_data_scaled = scaler.transform(x_train_data)
x_test_data_scaled = scaler.transform(x_test_data)

# Fit the classifier on the scaled training data
classifier.fit(x_train_data_scaled, y_train_data)


y_train_pred = classifier.predict(x_train_data_scaled)
y_test_pred = classifier.predict(x_test_data_scaled)

# Calculate training and testing scores
train_score = classifier.score(x_train_data_scaled, y_train_data)
test_score = classifier.score(x_test_data_scaled, y_test_data)


# Evaluation scores, from earlier!
train_f1_score = f1_score(y_train_data, y_train_pred, average='weighted')
test_f1_score = f1_score(y_test_data, y_test_pred, average="weighted")  # Use macro-average for multi-class classification (We have 4 classes -> the 3 skin cancers and a category for none of them.)
        
# Calculate ROC AUC score
train_roc_auc = roc_auc_score(y_train_data, clf.predict_proba(x_train_data_scaled), multi_class='ovr')
test_roc_auc = roc_auc_score(y_test_data, clf.predict_proba(x_test_data_scaled), multi_class='ovr')
        
# Calculate specificity (first for train)
c1, c2, c3, c4 = confusion_matrix(y_train_data, y_train_pred)
        
tn = c1[0] # true negative
tp = c2[1] + c3[2] + c4[3] # true positive
fp = np.sum(c1[1:]) + np.sum(c2[2:]) + c3[3] + c3[1] + np.sum(c4[1:2]) # false positive
fn = c2[0] + c3[0] + c4[0] # false negative
        
train_specificity = (tn/(tn+fp))
        
train_sensitivity = (tp/(tp+fn))

# Calculate specificity (now for test)
c1, c2, c3, c4 = confusion_matrix(y_test_data, y_test_pred)
        
tn = c1[0] # true negative
tp = c2[1] + c3[2] + c4[3] # true positive
fp = np.sum(c1[1:]) + np.sum(c2[2:]) + c3[3] + c3[1] + np.sum(c4[1:2]) # false positive
fn = c2[0] + c3[0] + c4[0] # false negative
        
test_specificity = (tn/(tn+fp))
        
test_sensitivity = (tp/(tp+fn))

In [223]:
print(f"Accuracy score for train data: {train_score}")
print(f"Accuracy score for validation data: {test_score}")
print(f"Train F1 score: {train_f1_score}")
print(f"Test F1 score: {test_f1_score}")
print(f"Train ROC-AUC score: {train_roc_auc}")
print(f"Test ROC-AUC score: {test_roc_auc}")
print(f"Train specificity: {train_specificity}")
print(f"Test specificity: {test_specificity}")
print(f"Train sensitivity: {train_sensitivity}")
print(f"Test sensitivity: {test_sensitivity}")


Accuracy score for train data: 0.6435185185185185
Accuracy score for validation data: 0.5299539170506913
Train F1 score: 0.5998769825708896
Test F1 score: 0.4873802616313825
Train ROC-AUC score: 0.5791117261878536
Test ROC-AUC score: 0.5780310476181543
Train specificity: 0.8123667377398721
Test specificity: 0.7155172413793104
Train sensitivity: 0.4430379746835443
Test sensitivity: 0.31683168316831684


In [224]:
# Train our model on all of x and y now that we have validated our results.
classifier = classifier.fit(x,y)

#This is the classifier you need to save using pickle, add this to your zip file submission
filename = 'groupXY_classifier.sav'
pickle.dump(classifier, open(filename, 'wb'))