In [99]:
import os
import pandas as pd
from sklearn.metrics import accuracy_score, precision_recall_fscore_support
from IPython.display import display

In [100]:
COMMODITY = 'copper'
METRIC = 'Recall'
ALGO = 'detection_correction'
# ALGO = 'correction'


EDCR_RESULTS_PATH = f'{COMMODITY}/test/predictions/test_{METRIC}_{ALGO}'
OG_RESULTS_PATH = f'{COMMODITY}/test/results_test.csv'

In [101]:
def extract_info_from_filename(filename):
    parts = filename.split("Rule")
    epsilon = parts[0].split()[-1]
    models_base = parts[1].split("for")
    models = models_base[0].strip()
    base = models_base[1].split("_predictions")[0].strip()
    return base, models, epsilon

In [102]:
def calculate_metrics(df):
    y_true = df['True']
    y_pred = df['Predicted']
    accuracy = accuracy_score(y_true, y_pred)
    precision, recall, f1, _ = precision_recall_fscore_support(y_true, y_pred, average=None, labels=[0, 1])
    prior = pd.read_csv(OG_RESULTS_PATH, nrows=1)['Prior'].iloc[0]
    return accuracy, precision, recall, f1, prior

In [103]:
def process_files(path):
    all_results = []  
    for filename in os.listdir(path):
        if filename.startswith('Confident') and filename.endswith("_predictions.csv"):
            base, models, confidence = extract_info_from_filename(filename)
            df = pd.read_csv(os.path.join(path, filename))
            accuracy, precision, recall, f1, prior = calculate_metrics(df)
            row = {
                'Base': base,
                'Models': models,
                'Confidence': confidence,
                'Accuracy': accuracy,
                'Precision (0)': precision[0],
                'Recall (0)': recall[0],
                'F1 (0)': f1[0],
                'Precision (1)': precision[1],
                'Recall (1)': recall[1],
                'F1 (1)': f1[1],
                'Prior': prior  
            }
            all_results.append(row)
    
    # Convert the list of results to a DataFrame
    return pd.DataFrame(all_results)

In [104]:
results_df = process_files(EDCR_RESULTS_PATH)

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_pr

In [105]:
# results_df_one = results_df[results_df['Models'] != 'all']
# # pd.set_option('display.max_rows', 200)
# results_df_one

In [106]:
og_df = pd.read_csv(OG_RESULTS_PATH)
og_df = og_df.drop('Unnamed: 0', axis=1)
og_df = og_df[(og_df['Name'] != 'EDCR')]
pd.set_option('display.max_rows', 400)
og_df

Unnamed: 0,Name,Params,Accuracy,Precision (0),Recall (0),F1 (0),Precision (1),Recall (1),F1 (1),Prior
0,LSTM,256 layers,0.220721,0.769231,0.106952,0.187793,0.147959,0.828571,0.251082,0.16
1,LSTM,128 layers,0.283784,0.75,0.224599,0.345679,0.126506,0.6,0.208955,0.16
2,LSTM,64 layers,0.801802,0.862944,0.909091,0.885417,0.32,0.228571,0.266667,0.16
3,LSTM,32 layers,0.468468,0.785124,0.508021,0.616883,0.089109,0.257143,0.132353,0.16
4,CNN with Attention,"32 filters, kernel size 7",0.545045,0.84127,0.566845,0.677316,0.15625,0.428571,0.229008,0.16
5,CNN with Attention,"32 filters, kernel size 5",0.599099,0.835616,0.652406,0.732733,0.144737,0.314286,0.198198,0.16
6,CNN with Attention,"32 filters, kernel size 3",0.472973,0.8125,0.486631,0.608696,0.127273,0.4,0.193103,0.16
7,CNN with Attention,"64 filters, kernel size 7",0.756757,0.84456,0.871658,0.857895,0.172414,0.142857,0.15625,0.16
8,CNN with Attention,"64 filters, kernel size 5",0.472973,0.8125,0.486631,0.608696,0.127273,0.4,0.193103,0.16
9,CNN with Attention,"64 filters, kernel size 3",0.572072,0.833333,0.614973,0.707692,0.142857,0.342857,0.201681,0.16


### Base Model

Generally for lower confidence (0.1-0.3), the rule is basically all '1'. It's always predicting spikes, so that's why lower confidence results look similar to spikes-dumb-model

Conversely, for higher confidence (0.9-0.95), the rule is becoming more like all '0'. So the higher we go, the less spikes we're predicting, hence making it look more like non-spikes-dumb-model

In [107]:
MODEL_NAME_OF_INTEREST = 'CNN'
MODEL_PARAM_OF_INTEREST = '32_5'

results_df_all = results_df[(results_df['Base'] == f'{MODEL_NAME_OF_INTEREST}_{MODEL_PARAM_OF_INTEREST}') & (results_df['Models'] == 'all') & (results_df['Base'] != "dumb_spikes") & (results_df['Base'] != "dumb_non_spikes")]
results_df_all

Unnamed: 0,Base,Models,Confidence,Accuracy,Precision (0),Recall (0),F1 (0),Precision (1),Recall (1),F1 (1),Prior
19,CNN_32_5,all,0.1,0.157658,0.0,0.0,0.0,0.157658,1.0,0.272374,0.16
213,CNN_32_5,all,0.2,0.157658,0.0,0.0,0.0,0.157658,1.0,0.272374,0.16
407,CNN_32_5,all,0.3,0.162162,1.0,0.005348,0.010638,0.158371,1.0,0.273438,0.16
601,CNN_32_5,all,0.45,0.184685,0.6875,0.058824,0.108374,0.145631,0.857143,0.248963,0.16
795,CNN_32_5,all,0.4,0.171171,0.636364,0.037433,0.070707,0.146919,0.885714,0.252033,0.16
989,CNN_32_5,all,0.55,0.351351,0.765432,0.331551,0.462687,0.113475,0.457143,0.181818,0.16
1183,CNN_32_5,all,0.5,0.202703,0.727273,0.085561,0.15311,0.145,0.828571,0.246809,0.16
1377,CNN_32_5,all,0.6,0.522523,0.818898,0.55615,0.66242,0.126316,0.342857,0.184615,0.16
1571,CNN_32_5,all,0.7,0.806306,0.86,0.919786,0.888889,0.318182,0.2,0.245614,0.16
1765,CNN_32_5,all,0.8,0.842342,0.851852,0.983957,0.913151,0.5,0.085714,0.146341,0.16


### Dumb Model (Only Predicts Non-Spikes)

In [108]:
results_df_all = results_df[(results_df['Models'] == 'all') & (results_df['Base'] == "dumb_non_spikes")]
results_df_all

Unnamed: 0,Base,Models,Confidence,Accuracy,Precision (0),Recall (0),F1 (0),Precision (1),Recall (1),F1 (1),Prior
24,dumb_non_spikes,all,0.1,0.157658,0.0,0.0,0.0,0.157658,1.0,0.272374,0.16
218,dumb_non_spikes,all,0.2,0.157658,0.0,0.0,0.0,0.157658,1.0,0.272374,0.16
412,dumb_non_spikes,all,0.3,0.166667,1.0,0.010695,0.021164,0.159091,1.0,0.27451,0.16
606,dumb_non_spikes,all,0.45,0.184685,0.6875,0.058824,0.108374,0.145631,0.857143,0.248963,0.16
800,dumb_non_spikes,all,0.4,0.175676,0.666667,0.042781,0.080402,0.147619,0.885714,0.253061,0.16
994,dumb_non_spikes,all,0.55,0.351351,0.765432,0.331551,0.462687,0.113475,0.457143,0.181818,0.16
1188,dumb_non_spikes,all,0.5,0.202703,0.727273,0.085561,0.15311,0.145,0.828571,0.246809,0.16
1382,dumb_non_spikes,all,0.6,0.522523,0.818898,0.55615,0.66242,0.126316,0.342857,0.184615,0.16
1576,dumb_non_spikes,all,0.7,0.806306,0.86,0.919786,0.888889,0.318182,0.2,0.245614,0.16
1770,dumb_non_spikes,all,0.8,0.842342,0.851852,0.983957,0.913151,0.5,0.085714,0.146341,0.16
