# Results

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from sklearn.metrics import precision_score, recall_score, accuracy_score, matthews_corrcoef 

### Load data

In [None]:
evaluation = "../evaluation"

# train
df1_train = pd.read_csv(f"{evaluation}/GCN_Train.csv").drop("ID", axis=1).head(10)
df2_train = pd.read_csv(f"{evaluation}/GAT_Train.csv").drop("ID", axis=1).head(10)
df3_train = pd.read_csv(f"{evaluation}/GT_Train.csv").drop("ID", axis=1).head(10)
df4_train = pd.read_csv(f"{evaluation}/NN_Train.csv").drop("ID", axis=1).head(10) 
df5_train = pd.read_csv(f"{evaluation}/Random_Forest_Train.csv").drop("ID", axis=1).head(10)
df6_train = pd.read_csv(f"{evaluation}/XGB_Train.csv").drop("ID", axis=1).head(10)  

In [None]:
# validation
df1_val = pd.read_csv(f"{evaluation}/GCN_Validation.csv").drop("ID", axis=1).head(10) 
df2_val = pd.read_csv(f"{evaluation}/GAT_Validation.csv").drop("ID", axis=1).head(10) 
df3_val = pd.read_csv(f"{evaluation}/GT_Validation.csv").drop("ID", axis=1).head(10) 
df4_val = pd.read_csv(f"{evaluation}/NN_Validation.csv").drop("ID", axis=1).head(10) 
df5_val = pd.read_csv(f"{evaluation}/Random_Forest_Validation.csv").drop("ID", axis=1).head(10) 
df6_val = pd.read_csv(f"{evaluation}/XGB_Validation.csv").drop("ID", axis=1).head(10) 

In [None]:
# test
df1_test = pd.read_csv(f"{evaluation}/GCN_Test.csv").drop("ID", axis=1).head(10) 
df2_test = pd.read_csv(f"{evaluation}/GAT_Test.csv").drop("ID", axis=1).head(10) 
df3_test = pd.read_csv(f"{evaluation}/GT_Test.csv").drop("ID", axis=1).head(10) 
df4_test = pd.read_csv(f"{evaluation}/NN_Test.csv").drop("ID", axis=1).head(10) 
df5_test = pd.read_csv(f"{evaluation}/Random_Forest_Test.csv").drop("ID", axis=1).head(10) 
df6_test = pd.read_csv(f"{evaluation}/XGB_Test.csv").drop("ID", axis=1).head(10) 

In [None]:
df6_test

### Calculate mean and standard deviation for the table

In [None]:
criteria = ["MCC", "Precision", "Recall", "Accuracy"]

def calculate_avg_std(criteria, name, *models):
    average = {}
    standard_deviation = {}
    for i in range(len(name)):  # initialize dictionary
        average[name[i]] = None
        standard_deviation[name[i]] = None

    for i in range(len(models)):  # append mean and std
        avg = models[i].mean(axis=0)
        std = models[i].std(axis=0)

        # add the avg and std to every model for each criteria
        average[name[i]] = [[criteria[j], avg[j]] for j in range(len(criteria))]
        standard_deviation[name[i]] = [[criteria[j], std[j]] for j in range(len(criteria))]
            
    print("Average:", average)
    print("*"*20)
    print("Standard deviation:", standard_deviation)
    print("*"*20)
    return average, standard_deviation

calculate_avg_std(criteria, ["GCN", "GAT", "Transformer", "NN", "RF", "XGB"], df1_train, df2_train, df3_train, df4_train, df5_train, df6_train)
calculate_avg_std(criteria, ["GCN", "GAT", "Transformer", "NN", "RF", "XGB"], df1_test, df2_test, df3_test, df4_test, df5_test, df6_test)
calculate_avg_std(criteria, ["GCN", "GAT", "Transformer", "NN", "RF", "XGB"], df1_val, df2_val, df3_val, df4_val, df5_val, df6_val)

### Data visualization

In [None]:
# take any number of baseline/non-baseline models and one criteria from the following: MCC, accuracy, precision, recall
# only one metric is taken at once because of readability
criteria = ["MCC", "Precision", "Recall", "Accuracy"]
def plot_results(name, criteria, *models):  # name of plot, evaluation criteria and any number of models to be plotted as a box plot
    labels = [name[i] for i in range(len(name))]

    # get the scores for all models specified
    results = []
    for i in range(len(models)):
        results.append(models[i][f"{criteria[0]}"])
    
    fig, ax = plt.subplots()
    ax.set_ylabel('Score')
    bplot = ax.boxplot(results, 
    patch_artist=True,  # fill with color
    tick_labels=labels, showmeans=True)  # will be used to label x-ticks
    plt.title(f"{criteria[0]}")
    plt.xticks(rotation=90)
    plt.subplots_adjust(bottom=0.3)  # so that the labels aren't cut off
    plt.savefig(f"{evaluation}/Evaluation result: {criteria[0]}.svg")
    plt.show()    

for i in range(len(criteria)):
    plot_results(["GCN", "GAT", "Graph Transformer", "Neural Network", "Random Forest", "XGBoost"], [criteria[i]], df1_test, df2_test, df3_test, df4_test, df5_test, df6_test)  