In [88]:
import os
import pandas as pd
from sklearn.metrics import accuracy_score, precision_recall_fscore_support
from IPython.display import display

In [89]:
COMMODITY = 'copper_under'
METRIC = 'Recall'
ALGO = 'detection_correction'
# ALGO = 'correction'


EDCR_RESULTS_PATH = f'{COMMODITY}/test/predictions/test_{METRIC}_{ALGO}'
OG_RESULTS_PATH = f'{COMMODITY}/test/results_test.csv'

In [90]:
def extract_info_from_filename(filename):
    parts = filename.split("Rule")
    epsilon = parts[0].split()[-1]
    models_base = parts[1].split("for")
    models = models_base[0].strip()
    base = models_base[1].split("_predictions")[0].strip()
    return base, models, epsilon

In [91]:
def calculate_metrics(df):
    y_true = df['True']
    y_pred = df['Predicted']
    accuracy = accuracy_score(y_true, y_pred)
    precision, recall, f1, _ = precision_recall_fscore_support(y_true, y_pred, average=None, labels=[0, 1])
    prior = pd.read_csv(OG_RESULTS_PATH, nrows=1)['Prior'].iloc[0]
    return accuracy, precision, recall, f1, prior

In [92]:
def process_files(path):
    all_results = []  
    for filename in os.listdir(path):
        if filename.startswith('Confident') and filename.endswith("_predictions.csv"):
            base, models, confidence = extract_info_from_filename(filename)
            df = pd.read_csv(os.path.join(path, filename))
            accuracy, precision, recall, f1, prior = calculate_metrics(df)
            row = {
                'Base': base,
                'Models': models,
                'Confidence': confidence,
                'Accuracy': accuracy,
                'Precision (0)': precision[0],
                'Recall (0)': recall[0],
                'F1 (0)': f1[0],
                'Precision (1)': precision[1],
                'Recall (1)': recall[1],
                'F1 (1)': f1[1],
                'Prior': prior  
            }
            all_results.append(row)
    
    # Convert the list of results to a DataFrame
    return pd.DataFrame(all_results)

In [93]:
results_df = process_files(EDCR_RESULTS_PATH)

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_pr

In [94]:
# results_df_one = results_df[results_df['Models'] != 'all']
# # pd.set_option('display.max_rows', 200)
# results_df_one

In [95]:
og_df = pd.read_csv(OG_RESULTS_PATH)
og_df = og_df.drop('Unnamed: 0', axis=1)
og_df = og_df[(og_df['Name'] != 'EDCR')]
pd.set_option('display.max_rows', 400)
og_df

Unnamed: 0,Name,Params,Accuracy,Precision (0),Recall (0),F1 (0),Precision (1),Recall (1),F1 (1),Prior
0,LSTM,256 layers,0.77027,0.84,0.898396,0.868217,0.136364,0.085714,0.105263,0.16
1,LSTM,128 layers,0.842342,0.848624,0.989305,0.91358,0.5,0.057143,0.102564,0.16
2,LSTM,64 layers,0.459459,0.801802,0.475936,0.597315,0.117117,0.371429,0.178082,0.16
3,LSTM,32 layers,0.666667,0.846626,0.737968,0.788571,0.169492,0.285714,0.212766,0.16
4,CNN with Attention,"32 filters, kernel size 7",0.716216,0.848315,0.807487,0.827397,0.181818,0.228571,0.202532,0.16
5,CNN with Attention,"32 filters, kernel size 5",0.635135,0.839744,0.700535,0.763848,0.151515,0.285714,0.19802,0.16
6,CNN with Attention,"32 filters, kernel size 3",0.657658,0.84472,0.727273,0.781609,0.163934,0.285714,0.208333,0.16
7,CNN with Attention,"64 filters, kernel size 7",0.707207,0.850575,0.791444,0.819945,0.1875,0.257143,0.216867,0.16
8,CNN with Attention,"64 filters, kernel size 5",0.626126,0.837662,0.68984,0.756598,0.147059,0.285714,0.194175,0.16
9,CNN with Attention,"64 filters, kernel size 3",0.653153,0.839506,0.727273,0.77937,0.15,0.257143,0.189474,0.16


### Base Model

Generally for lower confidence (0.1-0.3), the rule is basically all '1'. It's always predicting spikes, so that's why lower confidence results look similar to spikes-dumb-model

Conversely, for higher confidence (0.9-0.95), the rule is becoming more like all '0'. So the higher we go, the less spikes we're predicting, hence making it look more like non-spikes-dumb-model

In [98]:
MODEL_NAME_OF_INTEREST = 'CNN'
MODEL_PARAM_OF_INTEREST = '32_5'

results_df_all = results_df[(results_df['Base'] == f'{MODEL_NAME_OF_INTEREST}_{MODEL_PARAM_OF_INTEREST}') & (results_df['Models'] == 'all') & (results_df['Base'] != "dumb_spikes") & (results_df['Base'] != "dumb_non_spikes")]
results_df_all

Unnamed: 0,Base,Models,Confidence,Accuracy,Precision (0),Recall (0),F1 (0),Precision (1),Recall (1),F1 (1),Prior
19,CNN_32_5,all,0.1,0.157658,0.0,0.0,0.0,0.157658,1.0,0.272374,0.16
213,CNN_32_5,all,0.2,0.157658,0.0,0.0,0.0,0.157658,1.0,0.272374,0.16
407,CNN_32_5,all,0.3,0.162162,0.666667,0.010695,0.021053,0.155251,0.971429,0.267717,0.16
601,CNN_32_5,all,0.45,0.198198,0.736842,0.074866,0.135922,0.147783,0.857143,0.252101,0.16
795,CNN_32_5,all,0.4,0.166667,0.583333,0.037433,0.070352,0.142857,0.857143,0.244898,0.16
989,CNN_32_5,all,0.55,0.441441,0.788991,0.459893,0.581081,0.106195,0.342857,0.162162,0.16
1183,CNN_32_5,all,0.5,0.22973,0.785714,0.117647,0.204651,0.149485,0.828571,0.253275,0.16
1377,CNN_32_5,all,0.6,0.563063,0.8,0.641711,0.712166,0.069444,0.142857,0.093458,0.16
1571,CNN_32_5,all,0.7,0.599099,0.79878,0.700535,0.746439,0.034483,0.057143,0.043011,0.16
1765,CNN_32_5,all,0.8,0.725225,0.828125,0.850267,0.83905,0.066667,0.057143,0.061538,0.16


### Dumb Model (Only Predicts Non-Spikes)

In [97]:
results_df_all = results_df[(results_df['Models'] == 'all') & (results_df['Base'] == "dumb_non_spikes")]
results_df_all

Unnamed: 0,Base,Models,Confidence,Accuracy,Precision (0),Recall (0),F1 (0),Precision (1),Recall (1),F1 (1),Prior
24,dumb_non_spikes,all,0.1,0.157658,0.0,0.0,0.0,0.157658,1.0,0.272374,0.16
218,dumb_non_spikes,all,0.2,0.157658,0.0,0.0,0.0,0.157658,1.0,0.272374,0.16
412,dumb_non_spikes,all,0.3,0.157658,0.0,0.0,0.0,0.157658,1.0,0.272374,0.16
606,dumb_non_spikes,all,0.45,0.202703,0.75,0.080214,0.144928,0.148515,0.857143,0.253165,0.16
800,dumb_non_spikes,all,0.4,0.162162,0.555556,0.026738,0.05102,0.14554,0.885714,0.25,0.16
994,dumb_non_spikes,all,0.55,0.427928,0.783019,0.44385,0.566553,0.103448,0.342857,0.15894,0.16
1188,dumb_non_spikes,all,0.5,0.225225,0.777778,0.112299,0.196262,0.148718,0.828571,0.252174,0.16
1382,dumb_non_spikes,all,0.6,0.563063,0.8,0.641711,0.712166,0.069444,0.142857,0.093458,0.16
1576,dumb_non_spikes,all,0.7,0.599099,0.79878,0.700535,0.746439,0.034483,0.057143,0.043011,0.16
1770,dumb_non_spikes,all,0.8,0.725225,0.828125,0.850267,0.83905,0.066667,0.057143,0.061538,0.16
