In [44]:
import os
import pandas as pd
from sklearn.metrics import accuracy_score, precision_recall_fscore_support
from IPython.display import display

In [45]:
COMMODITY = 'copper'

EDCR_RESULTS_PATH = f'{COMMODITY}/test/predictions/test'
OG_RESULTS_PATH = f'{COMMODITY}/test/results_test.csv'

In [46]:
def extract_info_from_filename(filename):
    parts = filename.split("Rule")
    epsilon = parts[0].split()[-1]
    models_base = parts[1].split("for")
    models = models_base[0].strip()
    base = models_base[1].split("_predictions")[0].strip()
    return base, models, epsilon

In [47]:
def calculate_metrics(df):
    y_true = df['True']
    y_pred = df['Predicted']
    accuracy = accuracy_score(y_true, y_pred)
    precision, recall, f1, _ = precision_recall_fscore_support(y_true, y_pred, average=None, labels=[0, 1])
    prior = pd.read_csv(OG_RESULTS_PATH, nrows=1)['Prior'].iloc[0]
    return accuracy, precision, recall, f1, prior

In [48]:
def process_files(path):
    all_results = []  
    for filename in os.listdir(path):
        if filename.startswith('Confident') and filename.endswith("_predictions.csv"):
            base, models, epsilon = extract_info_from_filename(filename)
            df = pd.read_csv(os.path.join(path, filename))
            accuracy, precision, recall, f1, prior = calculate_metrics(df)
            row = {
                'Base': base,
                'Models': models,
                'Epsilon': epsilon,
                'Accuracy': accuracy,
                'Precision (0)': precision[0],
                'Recall (0)': recall[0],
                'F1 (0)': f1[0],
                'Precision (1)': precision[1],
                'Recall (1)': recall[1],
                'F1 (1)': f1[1],
                'Prior': prior  
            }
            all_results.append(row)
    
    # Convert the list of results to a DataFrame
    return pd.DataFrame(all_results)

In [49]:
results_df = process_files(EDCR_RESULTS_PATH)

In [50]:
results_df_one = results_df[results_df['Models'] != 'all']
results_df_one

Unnamed: 0,Base,Models,Epsilon,Accuracy,Precision (0),Recall (0),F1 (0),Precision (1),Recall (1),F1 (1),Prior
3,CNNA_64_3,CNNA_32_3,0.5,0.738739,0.860335,0.823529,0.841530,0.232558,0.285714,0.256410,0.16
4,CNN_256_3,CNNA_32_7,0.5,0.765766,0.857143,0.866310,0.861702,0.242424,0.228571,0.235294,0.16
5,CNN_128_3,CNN_32_3,0.5,0.765766,0.857143,0.866310,0.861702,0.242424,0.228571,0.235294,0.16
6,CNN_256_3,CNN_32_3,0.5,0.783784,0.856410,0.893048,0.874346,0.259259,0.200000,0.225806,0.16
7,CNN_128_3,LSTM_128,0.5,0.225225,0.758621,0.117647,0.203704,0.145078,0.800000,0.245614,0.16
...,...,...,...,...,...,...,...,...,...,...,...
203,CNN_256_3,RNN_128,0.9,0.815315,0.861386,0.930481,0.894602,0.350000,0.200000,0.254545,0.16
204,CNNA_256_5,RNN_32,0.9,0.810811,0.864322,0.919786,0.891192,0.347826,0.228571,0.275862,0.16
205,CNNA_64_3,RNN_32,0.9,0.797297,0.838095,0.941176,0.886650,0.083333,0.028571,0.042553,0.16
206,LSTM_64,RNN_32,0.9,0.828829,0.870647,0.935829,0.902062,0.428571,0.257143,0.321429,0.16


In [51]:
results_df_all = results_df[results_df['Models'] == 'all']
results_df_all

Unnamed: 0,Base,Models,Epsilon,Accuracy,Precision (0),Recall (0),F1 (0),Precision (1),Recall (1),F1 (1),Prior
0,CNNA_64_3,all,0.5,0.252252,0.888889,0.128342,0.224299,0.164103,0.914286,0.278261,0.16
1,CNN_128_3,all,0.5,0.225225,0.758621,0.117647,0.203704,0.145078,0.8,0.245614,0.16
2,CNN_256_3,all,0.5,0.225225,0.826087,0.101604,0.180952,0.155779,0.885714,0.264957,0.16
18,CNNA_64_3,all,0.6,0.554054,0.828358,0.593583,0.691589,0.136364,0.342857,0.195122,0.16
19,CNN_128_3,all,0.6,0.432432,0.790476,0.44385,0.568493,0.111111,0.371429,0.171053,0.16
20,CNN_256_3,all,0.6,0.635135,0.858108,0.679144,0.758209,0.189189,0.4,0.256881,0.16
56,CNNA_64_3,all,0.7,0.797297,0.858586,0.909091,0.883117,0.291667,0.2,0.237288,0.16
57,CNN_128_3,all,0.7,0.792793,0.854271,0.909091,0.880829,0.26087,0.171429,0.206897,0.16
58,CNN_256_3,all,0.7,0.788288,0.857143,0.898396,0.877285,0.269231,0.2,0.229508,0.16
94,CNNA_64_3,all,0.8,0.815315,0.861386,0.930481,0.894602,0.35,0.2,0.254545,0.16


In [54]:
og_df = pd.read_csv(OG_RESULTS_PATH)
og_df = og_df.drop('Unnamed: 0', axis=1)
pd.set_option('display.max_rows', 200)
og_df

Unnamed: 0,Name,Params,Accuracy,Precision (0),Recall (0),F1 (0),Precision (1),Recall (1),F1 (1),Prior
0,LSTM,256 layers,0.238739,0.78125,0.13369,0.228311,0.147368,0.8,0.248889,0.16
1,LSTM,128 layers,0.783784,0.85641,0.893048,0.874346,0.259259,0.2,0.225806,0.16
2,LSTM,64 layers,0.653153,0.857143,0.705882,0.774194,0.191176,0.371429,0.252427,0.16
3,LSTM,32 layers,0.675676,0.840237,0.759358,0.797753,0.150943,0.228571,0.181818,0.16
4,CNN with Attention,"32 filters, kernel size 7",0.77027,0.857895,0.871658,0.864721,0.25,0.228571,0.238806,0.16
5,CNN with Attention,"32 filters, kernel size 5",0.490991,0.803279,0.524064,0.634304,0.11,0.314286,0.162963,0.16
6,CNN with Attention,"32 filters, kernel size 3",0.765766,0.846154,0.882353,0.863874,0.185185,0.142857,0.16129,0.16
7,CNN with Attention,"64 filters, kernel size 7",0.693694,0.843931,0.780749,0.811111,0.163265,0.228571,0.190476,0.16
8,CNN with Attention,"64 filters, kernel size 5",0.684685,0.842105,0.770053,0.804469,0.156863,0.228571,0.186047,0.16
9,CNN with Attention,"64 filters, kernel size 3",0.509009,0.814516,0.540107,0.649518,0.122449,0.342857,0.180451,0.16
