In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

In [2]:
df_exp1 = pd.read_csv("../../results/experiment_1/trained_models.csv", index_col=0)
loss_df_exp1 = pd.read_csv("../../results/experiment_1/losses.csv", index_col=0)

In [3]:
df_exp2 = pd.read_csv("../../results/experiment_2/test/exp2.csv", index_col=0)
loss_df_exp2 = pd.read_csv("../../results/experiment_2/test/losses.csv", index_col=0)
info_df_exp2 = pd.read_csv("../../results/experiment_2/test/info.csv", index_col=0)

In [4]:
MAX_TS_2014_15 = pd.Timestamp(year=2015, month=3, day=1, hour=23, minute=59)

ts_weeks = [MAX_TS_2014_15 - pd.Timedelta(weeks=i) for i in reversed(range(19))]
ts_week_start_end = list(zip(ts_weeks, ts_weeks[1:]))
from_week_6 = ts_week_start_end[6:]

In [8]:
strat_labels_df = {
    "all_sf_is": "\\StratSTIS{}",
    "all_sf_th": "\\StratSTTH{}",
    "all_st_is": "\\StratMTIS{}",
    "all_st_th": "\\StratMTTH{}",
    "all_lt_is": "\\StratLTIS{}",
    "all_lt_th": "\\StratLTTH{}",
    "sf_is": "\\StratSTIS{}",
    "sf_th": "\\StratSTTH{}",
    "st_is": "\\StratMTIS{}",
    "st_th": "\\StratMTTH{}",
    "lt_is": "\\StratLTIS{}",
    "lt_th": "\\StratLTTH{}",
}

strat_labels = {
    "sf_is": "\\StratSTIS{}",
    "sf_th": "\\StratSTTH{}",
    "st_is": "\\StratMTIS{}",
    "st_th": "\\StratMTTH{}",
    "lt_is": "\\StratLTIS{}",
    "lt_th": "\\StratLTTH{}",
}

model_labels = {
    "random_forest": "\\acs{rf}",
    "neural_network": "\\acs{ann}",
    "support_vector_machine": "\\acs{svm}",
    "xgboost": "\\acs{xgb}",
    "logistic_regression": "\\acs{lr}"
}


def latex_float(f):
    float_str = "{0:.2e}".format(f)
    if "e" in float_str:
        base, exponent = float_str.split("e")
        return r"{0}\times10^{{{1}}}".format(base, int(exponent))
    else:
        return float_str
    

def model_strat_to_label(model_name):
    parts = model_name.split("_")
    model = "_".join(parts[:-2])
    strat = "_".join(parts[-2:])
    model = model_labels[model]
    strat = strat_labels[strat]
    strat = "(%s)" % strat
    return "".join([model,strat])
    

campaigns_exp2 = info_df_exp2.fraud_type.tolist()
campaigns_per_weeks = list(zip(from_week_6, campaigns_exp2))
# ts_list = df.Timestamp.to_numpy()
df_exp2["Timestamp"] = pd.to_datetime(df_exp2.Timestamp)
xs = []
for s_e, c in campaigns_per_weeks:
    s, e = s_e
    x = df_exp2.loc[df_exp2.Timestamp < e].index[-1]
    best_model = loss_df_exp2[:x-1].max().idxmin()
    best_model_loss = loss_df_exp2[:x-1].max().min()
    mwu_loss = loss_df_exp2[:x-1].max()["MWU"]
    mean_loss = loss_df_exp2[:x-1].max()["MEAN"]
    maj_loss = loss_df_exp2[:x-1].max()["MAJORITY"]
    xs.append((x, c, best_model, best_model_loss, mwu_loss, mean_loss, maj_loss))


for n,strat,best,best_loss,mwu_loss,mean_loss,maj_loss in xs:
    strat = strat_labels_df[strat] 
    best = model_strat_to_label(best)
    best_loss = latex_float(best_loss)
    mwu_loss = latex_float(mwu_loss)
    mean_loss = latex_float(mean_loss)
    maj_loss = latex_float(maj_loss)
    print("%d %s %s $%s$ $%s$ $%s$ $%s$" % (n, strat, best, best_loss, mwu_loss, mean_loss, maj_loss))

33590 \StratSTIS{} \acs{rf}(\StratSTIS{}) $2.18\times10^{4}$ $8.82\times10^{4}$ $4.09\times10^{6}$ $5.20\times10^{6}$
64156 \StratSTTH{} \acs{ann}(\StratSTTH{}) $1.76\times10^{5}$ $2.83\times10^{5}$ $1.06\times10^{7}$ $1.58\times10^{7}$
80883 \StratMTIS{} \acs{lr}(\StratMTTH{}) $5.15\times10^{5}$ $6.32\times10^{5}$ $1.11\times10^{7}$ $1.63\times10^{7}$
95984 \StratMTIS{} \acs{svm}(\StratMTTH{}) $7.49\times10^{5}$ $8.66\times10^{5}$ $1.16\times10^{7}$ $1.69\times10^{7}$
115730 \StratMTIS{} \acs{lr}(\StratMTTH{}) $9.65\times10^{5}$ $1.09\times10^{6}$ $1.21\times10^{7}$ $1.75\times10^{7}$
138446 \StratMTIS{} \acs{svm}(\StratMTTH{}) $1.24\times10^{6}$ $1.37\times10^{6}$ $1.28\times10^{7}$ $1.83\times10^{7}$
155687 \StratMTIS{} \acs{svm}(\StratMTTH{}) $1.50\times10^{6}$ $1.63\times10^{6}$ $1.33\times10^{7}$ $1.90\times10^{7}$
185625 \StratMTIS{} \acs{svm}(\StratMTTH{}) $1.90\times10^{6}$ $2.03\times10^{6}$ $1.42\times10^{7}$ $2.00\times10^{7}$
218930 \StratMTIS{} \acs{svm}(\StratMTTH{}) $2.

In [9]:
campaigns_exp1 = [
    "st_is",
    "st_is",
    "st_th",
    "st_th",
    "lt_is",
    "lt_is",
    "lt_th",
    "lt_th",
    "sf_th",
    "sf_th",
    "sf_is",
    "sf_is"
]

campaigns_per_weeks = list(zip(from_week_6, campaigns_exp1))
# ts_list = df.Timestamp.to_numpy()
df_exp1["Timestamp"] = pd.to_datetime(df_exp1.Timestamp)
xs = []
for s_e, c in campaigns_per_weeks:
    s, e = s_e
    x = df_exp1.loc[df_exp1.Timestamp < e].index[-1]
    best_model = loss_df_exp1[:x-1].max().idxmin()
    best_model_loss = loss_df_exp1[:x-1].max().min()
    mwu_loss = loss_df_exp1[:x-1].max()["MWU"]
    mean_loss = loss_df_exp1[:x-1].max()["MEAN"]
    maj_loss = loss_df_exp1[:x-1].max()["MAJORITY"]
    xs.append((x, c, best_model, best_model_loss, mwu_loss, mean_loss, maj_loss))

for n,strat,best,best_loss,mwu_loss,mean_loss,maj_loss in xs:
    strat = strat_labels_df[strat] 
    best = model_strat_to_label(best)
    best_loss = latex_float(best_loss)
    mwu_loss = latex_float(mwu_loss)
    mean_loss = latex_float(mean_loss)
    maj_loss = latex_float(maj_loss)
    print("%d %s %s $%s$ $%s$ $%s$ $%s$" % (n, strat, best, best_loss, mwu_loss, mean_loss, maj_loss))

33670 \StratMTIS{} \acs{ann}(\StratMTIS{}) $4.73\times10^{4}$ $9.24\times10^{4}$ $9.13\times10^{5}$ $1.06\times10^{6}$
64475 \StratMTIS{} \acs{ann}(\StratMTIS{}) $8.67\times10^{4}$ $1.40\times10^{5}$ $1.74\times10^{6}$ $2.08\times10^{6}$
81070 \StratMTTH{} \acs{rf}(\StratMTIS{}) $5.05\times10^{5}$ $5.69\times10^{5}$ $2.36\times10^{6}$ $2.86\times10^{6}$
96037 \StratMTTH{} \acs{xgb}(\StratMTTH{}) $5.75\times10^{5}$ $6.47\times10^{5}$ $2.77\times10^{6}$ $3.41\times10^{6}$
115783 \StratLTIS{} \acs{xgb}(\StratMTTH{}) $7.26\times10^{5}$ $8.00\times10^{5}$ $2.90\times10^{6}$ $3.54\times10^{6}$
138499 \StratLTIS{} \acs{xgb}(\StratMTTH{}) $8.96\times10^{5}$ $9.70\times10^{5}$ $3.05\times10^{6}$ $3.69\times10^{6}$
155585 \StratLTTH{} \acs{xgb}(\StratMTTH{}) $9.77\times10^{5}$ $1.05\times10^{6}$ $3.14\times10^{6}$ $3.76\times10^{6}$
185245 \StratLTTH{} \acs{xgb}(\StratMTTH{}) $1.07\times10^{6}$ $1.15\times10^{6}$ $3.26\times10^{6}$ $3.84\times10^{6}$
218239 \StratSTTH{} \acs{svm}(\StratMTTH{}) $

In [10]:
f1_df = pd.read_csv("../../results/final_perf_eval/scores.csv", index_col=0)
f1_df

Unnamed: 0,model,campaign,score
0,logistic_regression_lt_is,all_lt_is,0.382769
1,neural_network_lt_is,all_lt_is,0.797163
2,random_forest_lt_is,all_lt_is,0.665643
3,support_vector_machine_lt_is,all_lt_is,0.449104
4,xgboost_lt_is,all_lt_is,0.625202
...,...,...,...
175,logistic_regression_sf_th,all_st_th,0.144837
176,neural_network_sf_th,all_st_th,0.209973
177,random_forest_sf_th,all_st_th,0.000000
178,support_vector_machine_sf_th,all_st_th,0.234671


In [13]:
f1_df.sort_values(by=["model"], inplace=True)

In [33]:
models = [
    'logistic_regression_sf_is','logistic_regression_sf_th','logistic_regression_st_is','logistic_regression_st_th','logistic_regression_lt_is','logistic_regression_lt_th',
    'neural_network_sf_is','neural_network_sf_th','neural_network_st_is','neural_network_st_th','neural_network_lt_is','neural_network_lt_th',
    'random_forest_sf_is','random_forest_sf_th','random_forest_st_is','random_forest_st_th','random_forest_lt_is','random_forest_lt_th',
    'support_vector_machine_sf_is','support_vector_machine_sf_th','support_vector_machine_st_is','support_vector_machine_st_th','support_vector_machine_lt_is','support_vector_machine_lt_th',
    'xgboost_sf_is','xgboost_sf_th','xgboost_st_is','xgboost_st_th','xgboost_lt_is','xgboost_lt_th',
]

In [34]:
cmpg = ["all_sf_is", "all_sf_th", "all_st_is", "all_st_th", "all_lt_is", "all_lt_th"]
for m in models:
    scores= []
    for c in cmpg:
        scores.append(f1_df[(f1_df.model == m) & (f1_df.campaign == c)]["score"].tolist()[0])
    scores= ["{:.2f}".format(f) for f in scores]
    scores = " ".join(scores)
    
    print(scores)

0.70 0.46 0.13 0.03 0.00 0.00
0.49 0.40 0.00 0.14 0.00 0.00
0.43 0.21 0.44 0.16 0.28 0.09
0.26 0.19 0.25 0.14 0.19 0.09
0.00 0.02 0.33 0.08 0.38 0.17
0.01 0.03 0.18 0.08 0.20 0.12
0.77 0.32 0.33 0.06 0.00 0.00
0.43 0.33 0.15 0.21 0.00 0.00
0.05 0.10 0.64 0.24 0.00 0.00
0.00 0.10 0.40 0.26 0.00 0.03
0.00 0.02 0.07 0.01 0.80 0.28
0.00 0.00 0.00 0.00 0.28 0.35
0.73 0.39 0.00 0.00 0.00 0.00
0.43 0.33 0.00 0.00 0.00 0.00
0.06 0.06 0.61 0.28 0.00 0.00
0.00 0.00 0.49 0.37 0.00 0.04
0.00 0.00 0.01 0.00 0.67 0.27
0.00 0.00 0.00 0.00 0.14 0.24
0.78 0.48 0.26 0.05 0.00 0.00
0.53 0.44 0.06 0.23 0.00 0.00
0.46 0.22 0.47 0.14 0.33 0.09
0.30 0.22 0.28 0.16 0.17 0.07
0.15 0.01 0.34 0.03 0.45 0.08
0.05 0.04 0.18 0.07 0.20 0.11
0.63 0.33 0.00 0.00 0.00 0.00
0.42 0.32 0.00 0.00 0.00 0.00
0.23 0.10 0.55 0.25 0.00 0.00
0.00 0.00 0.62 0.51 0.00 0.00
0.01 0.00 0.07 0.01 0.63 0.22
0.00 0.00 0.00 0.00 0.17 0.21
