In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

In [None]:
file_name = "../../results/experiment_2/test/exp2.csv"
df = pd.read_csv(file_name, index_col=0)
loss_file = "../../results/experiment_2/test/losses.csv"
loss_df = pd.read_csv(loss_file, index_col=0)

In [None]:
models = list(loss_df.columns)
models

In [None]:
losses = loss_df.drop(loss_df.index[0])

In [None]:
base_models = models.copy()
del base_models[-1]
del base_models[-1]
del base_models[-1]
base_models

In [None]:
MAX_TS_2014_15 = pd.Timestamp(year=2015, month=3, day=1, hour=23, minute=59)

ts_weeks = [MAX_TS_2014_15 - pd.Timedelta(weeks=i) for i in reversed(range(19))]
ts_week_start_end = list(zip(ts_weeks, ts_weeks[1:]))
from_week_6 = ts_week_start_end[6:]

campaigns =[
"st_is",
"st_is",
"st_th",
"st_th",
"lt_is",
"lt_is",
"lt_th",
"lt_th",
"sf_th",
"sf_th",
"sf_is",
"sf_is",
]


campaigns_per_weeks = list(zip(from_week_6, campaigns))
# ts_list = df.Timestamp.to_numpy()
df["Timestamp"] = pd.to_datetime(df.Timestamp)
xs = []
for s_e, c in campaigns_per_weeks:
    s, e = s_e
    x = df.loc[df.Timestamp < e].index[-1]
    best_model = loss_df[:x].max().idxmin()
    print(best_model)
    xs.append((x, c, best_model))
xs


In [None]:
loss_df.max().sort_values()

In [None]:
# max_loss = max(info_df[[c for c in info_df.columns if "loss" in c]].max())
max_loss = loss_df.max().max()
max_loss

In [None]:
losses = loss_df.to_dict(orient="list")
losses

In [None]:
color_names = ['Purples', 'Blues', "Reds", 'Greens', 'Oranges']
cmaps = [plt.get_cmap(c) for c in color_names]

colors = [cmap(np.linspace(0.3, 1.0, 6)) for cmap in cmaps]
colors = [c for cmap in colors for c in cmap]

In [None]:
fig = plt.figure(figsize=(10, 4))

loss_matrix = np.array([losses[m] for m in base_models])
min_loss_per_point = np.min(loss_matrix, axis=0)
max_loss_per_point = np.max(loss_matrix, axis=0)

x = np.arange(0, loss_df.shape[0], 1, dtype=int)
plt.fill_between(np.arange(0, loss_df.shape[0]), min_loss_per_point, max_loss_per_point, color="red", alpha=0.3)

markpoints = [i for i,c,b in xs]
plt.plot(x, losses["MWU"], marker="*", markersize=10, markevery=markpoints, label="MWU", color="blue")
plt.plot(x, losses["MEAN"], marker="*", markersize=10, markevery=markpoints, label="Mean", color="red")
plt.plot(x, losses["MAJORITY"], marker="*", markersize=10, markevery=markpoints, label="Majority voting", color="green")

    # plt.vlines(i, -2000, max_loss*1.1, color="red", linestyle="dotted")
plt.legend(loc="upper left")
        
# plt.plot(T[idx], L[idx], "*-", label="MWU", markevery=100)
plt.xticks([0]+markpoints, labels=np.arange(len(xs)+1))
plt.xlim(0, df.shape[0])
# plt.ticklabel_format(style='sci', axis='x', scilimits=(0,0))
plt.ylim(0, max_loss*1.1)
plt.ylabel("$l_T$")
plt.xlabel("Week")
plt.grid()
plt.savefig(file_name+"loss_area.pdf", bbox_inches='tight')


In [None]:
labels = {
    'logistic_regression_lt_is': "LR(LT,IS)",
    'logistic_regression_lt_th': "LR(LT,TH)",
    'logistic_regression_sf_is': "LR(ST,IS)",
    'logistic_regression_sf_th': "LR(ST,TH)",
    'logistic_regression_st_is': "LR(MT,IS)",
    'logistic_regression_st_th': "LR(MT,TH)",
    'neural_network_lt_is': "NN(LT,IS)",
    'neural_network_lt_th': "NN(LT,TH)",
    'neural_network_sf_is': "NN(ST,IS)",
    'neural_network_sf_th': "NN(ST,TH)",
    'neural_network_st_is': "NN(MT,IS)",
    'neural_network_st_th': "NN(MT,TH)",
    'random_forest_lt_is': "RF(LT,IS)",
    'random_forest_lt_th': "RF(LT,TH)",
    'random_forest_sf_is': "RF(ST,IS)",
    'random_forest_sf_th': "RF(ST,TH)",
    'random_forest_st_is': "RF(MT,IS)",
    'random_forest_st_th': "RF(MT,TH)",
    'support_vector_machine_lt_is': "SVM(LT,IS)",
    'support_vector_machine_lt_th': "SVM(LT,TH)",
    'support_vector_machine_sf_is': "SVM(ST,IS)",
    'support_vector_machine_sf_th': "SVM(ST,TH)",
    'support_vector_machine_st_is': "SVM(MT,IS)",
    'support_vector_machine_st_th': "SVM(MT,TH)",
    'xgboost_lt_is': "XGB(LT,IS)",
    'xgboost_lt_th': "XGB(LT,TH)",
    'xgboost_sf_is': "XGB(ST,IS)",
    'xgboost_sf_th': "XGB(ST,TH)",
    'xgboost_st_is': "XGB(MT,IS)",
    'xgboost_st_th': "XGB(MT,TH)",
}

In [None]:
fig, ax = plt.subplots(figsize=(10, 4))

prev_i = 0
for i, c, best in xs:
    # plt.text(prev_i+20, 2.3*10**7, c, fontdict=None)

    
    x = np.arange(prev_i, i, 1, dtype=int)
    
    for j, m in enumerate(sorted(base_models)):
    # print(m, i, i // 6, i % 6)
        ax.plot(x, losses[m][prev_i:i], label=labels[m], color=colors[j])

    ax.plot(x, losses["MWU"][prev_i:i], marker="*", markersize=10, markevery=[0, len(x)-1], label="MWU", color="blue")
    ax.plot(x, losses["MEAN"][prev_i:i], marker="*", markersize=10, markevery=[0, len(x)-1], label="Mean", color="red")
    ax.plot(x, losses["MAJORITY"][prev_i:i], marker="*", markersize=10, markevery=[0, len(x)-1], label="Majority Voting", color="green")

    # plt.vlines(i, -2000, max_loss*1.1, color="red", linestyle="dotted")
    if prev_i == 0:
        legend_fig = ax.get_legend_handles_labels()
        
    prev_i = i
    
ax.legend([])

ax.set_xticks([0]+[i for i,c,b in xs], labels=np.arange(len(xs)+1))
ax.set_xlim(-df.shape[0]*0.05, df.shape[0]*1.05)
# plt.ticklabel_format(style='sci', axis='x', scilimits=(0,0))
ax.set_ylim(0, max_loss*1.1)
ax.set_ylabel("$L_T$")
ax.set_xlabel("Week")

ax.grid()
plt.savefig(file_name+"loss_alt.pdf", bbox_inches='tight')

In [None]:
import matplotlib as mlp

r = mlp.patches.Rectangle((0,0), 1, 1, fill=False, edgecolor='none',
                                 visible=False)

for i in range(3):
    legend_fig[0].append(r)
    legend_fig[1].append("")

In [None]:
figl, axl = plt.subplots(figsize=(10, 2))
axl.axis(False)
axl.legend(*legend_fig, loc="center", bbox_to_anchor=(0.5, 0.5), ncol=3)
plt.savefig(file_name+"legend.pdf", bbox_inches='tight')