# Fine Tuning Modell M1 in Phase II

In [1]:
import arrow
import learn2learn as l2l
import numpy as np
import os
import pickle
import torch
from torch.utils.data import TensorDataset
from sklearn.model_selection import train_test_split
import pandas as pd
import matplotlib.pyplot as plt
import joblib

from utils.evalUtils import print_confusion_matrix

from sklearn.preprocessing import MinMaxScaler
from models.SimpleAutoEncoder import SimpleAutoEncoder

In [2]:
%run -i ./scripts/setConfigs.py

Set configs..


## Prepare the Data

#### Load data

In [3]:
data_fn = os.path.join(data_path, 'simulation_data_y_2020_2021_reduced.h5')
df_data_train = pd.read_hdf(data_fn, key='df')
print('Shape of X_train data: {}'.format(df_data_train.shape))

Shape of X_train data: (105216, 17)


In [4]:
anomalous_data_fn = os.path.join(data_path, 'anomalous_data_y_2022_reduced.h5')
df_x_test = pd.read_hdf(anomalous_data_fn, key='df')
print('Shape of X_test: {}'.format(df_x_test.shape))

Shape of X_test: (35040, 18)


In [5]:
drifted_data_fn = os.path.join(data_path, 'drifted_data_y_2023_reduced_more_cos_phi.h5')
df_x_drifted = pd.read_hdf(drifted_data_fn, key='df')
print('Shape of X_drifted: {}'.format(df_x_drifted.shape))

Shape of X_drifted: (35040, 18)


In [6]:
anomalous_drifted_data_fn = os.path.join(data_path, 'anomalous_drifted_data_y_2023_reduced_more_cos_phi.h5')
df_x_anormal_drifted = pd.read_hdf(anomalous_drifted_data_fn, key='df')
print('Shape of X_anomal,drifted : {}'.format(df_x_anormal_drifted.shape))

Shape of X_anomal,drifted : (35040, 19)


#### Split labels

In [7]:
y_anormal = [1 if x > 0 else 0 for x in df_x_test['label']]
df_x_test.drop('label', inplace=True, axis=1)

In [8]:
s_x_drifted_drift_labels = df_x_drifted['drift_labels']
df_x_drifted.drop('drift_labels',axis=1, inplace=True)
print('Shape of X_drifted : {}'.format(df_x_drifted.shape))

Shape of X_drifted : (35040, 17)


In [9]:
y_anormal_drifted = [1 if x > 0 else 0 for x in df_x_anormal_drifted['anomaly_labels']]
df_x_anormal_drifted.drop(['anomaly_labels', 'drift_labels'], inplace=True, axis=1)
print('Shape of X_anomal,drifted : {}'.format(df_x_anormal_drifted.shape))

Shape of X_anomal,drifted : (35040, 17)


#### Scale data

In [10]:
print('Scale data..')
scaler_train = MinMaxScaler((-1,1))
scaler_train = scaler_train.fit(df_data_train)
scaled_train = scaler_train.transform(df_data_train.to_numpy())

x_anormal = scaler_train.transform(df_x_test.to_numpy())
x_anormal_drifted = scaler_train.transform(df_x_anormal_drifted.to_numpy())
x_drifted = scaler_train.transform(df_x_drifted.to_numpy())

Scale data..


#### Build PyTorch Objects

In [11]:
x_anormal_torch_tensor = torch.from_numpy(x_anormal).type(torch.FloatTensor)
x_anormal_drifted_torch_tensor = torch.from_numpy(x_anormal_drifted).type(torch.FloatTensor)
x_drifted_torch_tensor = torch.from_numpy(x_drifted).type(torch.FloatTensor)
y_drifted_torch_tensor = torch.from_numpy(s_x_drifted_drift_labels.to_numpy()).type(torch.FloatTensor)

In [12]:
# build pytorch dataset from tensor
drifted_torch_dataset = TensorDataset(x_drifted_torch_tensor,y_drifted_torch_tensor)

In [16]:
df_top_3_best_fn.head()

Unnamed: 0,model_fn,logreg_fn,Precision_x_ano_drifted
0,/home/torge/dev/masterthesis_code/02_Experimen...,/home/torge/dev/masterthesis_code/02_Experimen...,40.733453
1,/home/torge/dev/masterthesis_code/02_Experimen...,/home/torge/dev/masterthesis_code/02_Experimen...,41.290094
2,/home/torge/dev/masterthesis_code/02_Experimen...,/home/torge/dev/masterthesis_code/02_Experimen...,41.425035


#### Read top 6 Models from Phase I

In [13]:
fn = '20200410_top_3_best_models_by_precision.csv'
best_fn = os.path.join(os.getcwd(), 'exp_data', 'selected_models_fn', fn)
df_top_3_best_fn = pd.read_csv(best_fn, sep=';')

In [14]:
model_best_4 = SimpleAutoEncoder(num_inputs=17, val_lambda=42)
model_best_4.load_state_dict(torch.load(df_top_3_best_fn['model_fn'][0]))
model_best_4.train()

model_best_5 = SimpleAutoEncoder(num_inputs=17, val_lambda=42)
model_best_5.load_state_dict(torch.load(df_top_3_best_fn['model_fn'][1]))
model_best_5.train()

model_best_6 = SimpleAutoEncoder(num_inputs=17, val_lambda=42)
model_best_6.load_state_dict(torch.load(df_top_3_best_fn['model_fn'][2]))
model_best_6.train()

SimpleAutoEncoder(
  (encoder): Sequential(
    (0): Linear(in_features=17, out_features=12, bias=True)
    (1): ReLU(inplace=True)
    (2): Linear(in_features=12, out_features=8, bias=True)
    (3): Tanh()
  )
  (decoder): Sequential(
    (0): Linear(in_features=8, out_features=12, bias=True)
    (1): ReLU(inplace=True)
    (2): Linear(in_features=12, out_features=17, bias=True)
    (3): Tanh()
  )
)

In [15]:
df_top_3_best_fn.head()

Unnamed: 0,model_fn,logreg_fn,Precision_x_ano_drifted
0,/home/torge/dev/masterthesis_code/02_Experimen...,/home/torge/dev/masterthesis_code/02_Experimen...,40.733453
1,/home/torge/dev/masterthesis_code/02_Experimen...,/home/torge/dev/masterthesis_code/02_Experimen...,41.290094
2,/home/torge/dev/masterthesis_code/02_Experimen...,/home/torge/dev/masterthesis_code/02_Experimen...,41.425035


In [16]:
model_fn = df_top_3_best_fn['logreg_fn'][0]
logreg_4 = joblib.load(model_fn)

model_fn = df_top_3_best_fn['logreg_fn'][1]
logreg_5 = joblib.load(model_fn)

model_fn = df_top_3_best_fn['logreg_fn'][2]
logreg_6 = joblib.load(model_fn)

In [18]:
fn = '20200410_top_3_worst_models_by_precision.csv'
worst_fn = os.path.join(os.getcwd(), 'exp_data', 'selected_models_fn', fn)
df_top_3_worst_fn = pd.read_csv(worst_fn, sep=';')

In [19]:
df_top_3_worst_fn.head()

Unnamed: 0,model_fn,logreg_fn,Precision_x_ano_drifted
0,/home/torge/dev/masterthesis_code/02_Experimen...,/home/torge/dev/masterthesis_code/02_Experimen...,42.272173
1,/home/torge/dev/masterthesis_code/02_Experimen...,/home/torge/dev/masterthesis_code/02_Experimen...,42.24198
2,/home/torge/dev/masterthesis_code/02_Experimen...,/home/torge/dev/masterthesis_code/02_Experimen...,42.231909


In [18]:
model_worst_1 = SimpleAutoEncoder(num_inputs=17, val_lambda=42)
model_worst_1.load_state_dict(torch.load(df_top_3_worst_fn['model_fn'][0]))
model_worst_1.train()

model_worst_2 = SimpleAutoEncoder(num_inputs=17, val_lambda=42)
model_worst_2.load_state_dict(torch.load(df_top_3_worst_fn['model_fn'][1]))
model_worst_2.train()

model_worst_3 = SimpleAutoEncoder(num_inputs=17, val_lambda=42)
model_worst_3.load_state_dict(torch.load(df_top_3_worst_fn['model_fn'][2]))
model_worst_3.train()

SimpleAutoEncoder(
  (encoder): Sequential(
    (0): Linear(in_features=17, out_features=12, bias=True)
    (1): ReLU(inplace=True)
    (2): Linear(in_features=12, out_features=8, bias=True)
    (3): Tanh()
  )
  (decoder): Sequential(
    (0): Linear(in_features=8, out_features=12, bias=True)
    (1): ReLU(inplace=True)
    (2): Linear(in_features=12, out_features=17, bias=True)
    (3): Tanh()
  )
)

In [19]:
model_fn = df_top_3_worst_fn['logreg_fn'][0]
logreg_1 = joblib.load(model_fn)

model_fn = df_top_3_worst_fn['logreg_fn'][1]
logreg_2 = joblib.load(model_fn)

model_fn = df_top_3_worst_fn['logreg_fn'][2]
logreg_3 = joblib.load(model_fn)

In [20]:
model_mapping_dict = {
    1: (model_worst_1, logreg_1, df_top_3_worst_fn['model_fn'][0], df_top_3_worst_fn['logreg_fn'][0]),
    2: (model_worst_2, logreg_2, df_top_3_worst_fn['model_fn'][1], df_top_3_worst_fn['logreg_fn'][1]),
    3: (model_worst_3, logreg_3, df_top_3_worst_fn['model_fn'][2], df_top_3_worst_fn['logreg_fn'][2]),
    4: (model_best_4, logreg_4, df_top_3_best_fn['model_fn'][0], df_top_3_best_fn['logreg_fn'][0]),
    5: (model_best_5, logreg_5, df_top_3_best_fn['model_fn'][1], df_top_3_best_fn['logreg_fn'][1]),
    6: (model_best_6, logreg_6, df_top_3_best_fn['model_fn'][2], df_top_3_best_fn['logreg_fn'][2])
}

In [21]:
fn = '20200410_teilfaktorieller_versuchsplan_phase2_m1.csv'
full_fn = os.path.join(exp_data_path, 'versuchsplan', fn)
versuchsplan = pd.read_csv(full_fn, sep=';')

In [22]:
versuchsplan.head()

Unnamed: 0,k_train,optimizer,num_iter,lr,filter_cde,model
0,20,Adam,128,0.001,"(1, 3)",6
1,5,Adam,64,0.1,"(1, 3)",5
2,20,Adam,1,0.1,"(1, 2)",3
3,20,Adam,1,0.1,"(1,)",3
4,10,Adam,64,0.01,"(1, 3)",3


In [23]:
len(versuchsplan)

226

## Start Fine-Tuning

In [24]:
from Experiments.FineTuneExperiment import FineTuneExperiment

In [25]:
def _build_filter_list(filter_cde_string):
    l = []

    if '1' in filter_cde_string:
        l.append(1)
    if '2' in filter_cde_string:
        l.append(2)
    if '3' in filter_cde_string:
        l.append(3)
        
    return l

In [26]:
arrow.now()

<Arrow [2020-04-10T11:59:17.687045+02:00]>

In [27]:
for i, vp in versuchsplan.iterrows():
    start_ts = arrow.now()
    optimizer_name = vp['optimizer']
    k = vp['k_train']
    num_iter = vp['num_iter']
    lr = vp['lr']
    filter_cde = _build_filter_list(vp['filter_cde'])
    name_model = vp['model']
    model = model_mapping_dict[vp['model']][0]
    logreg = model_mapping_dict[vp['model']][1]
    pretrained_model_fn = model_mapping_dict[vp['model']][2]
    logreg_fn = model_mapping_dict[vp['model']][3]
    
    string_lr = str(lr).replace('.', '_')
    
    fine_tuned_model_fn = '{}_FINE_TUNED_MODEL__K_{}_OPTIMIZER_{}_NUM_ITER_{}_LR_{}_FILTER_CDE_{}_MODEL_{}.pt'.format(arrow.now().format('YYYYMMDD'), k, optimizer_name,
                                                                                                                                            num_iter, string_lr, filter_cde, name_model)
    
    fine_tuned_model_fn = os.path.join(exp_models_path, 'fine_tuned_models', fine_tuned_model_fn)
    
    exp_name_fn = '{}_EXPERIMENT_FINE_TUNED_MODEL__K_{}_OPTIMIZER_{}_NUM_ITER_{}_LR_{}_FILTER_CDE_{}_MODEL_{}.csv'.format(arrow.now().format('YYYYMMDD'), k, optimizer_name,
                                                                                                                                            num_iter, string_lr, filter_cde, name_model)
    
    exp_name_fn = os.path.join(exp_data_path, 'experiment', 'fine_tuning', exp_name_fn)
    
    
    ft_exp = FineTuneExperiment(model=model, fine_tune_data_x_drifted=drifted_torch_dataset, eval_data_x_test=x_anormal_torch_tensor, eval_label_x_test=y_anormal,
                               eval_data_x_drifted_ano=x_anormal_drifted_torch_tensor, eval_label_x_drifted_ano=y_anormal_drifted, k=k, fine_tune_iterations=num_iter, optimizer_name=optimizer_name,
                               fine_tune_classes=filter_cde, classifier=logreg, lr=lr, name_pretrained_model=name_model, fine_tuned_model_fn=fine_tuned_model_fn, pretrained_model_fn= pretrained_model_fn,
                               logreg_fn=logreg_fn, exp_name_fn=exp_name_fn)
    
    ft_exp.run()
    end_ts = arrow.now()
    duration = end_ts - start_ts
    print('Current Iteration: {} of {}, Optimizer: {}, K: {}, LR: {}, Filter CDE: {}, Model Name: {}, Started: {}, Ended: {}, Duration: {}'.format(i+1, len(versuchsplan), optimizer_name, k, lr,
                                                                                                                                             filter_cde, name_model, start_ts.format('hh:mm:ss'), 
                                                                                                                                             end_ts.format('hh:mm:ss'), duration))

Current Iteration: 1 of 226, Optimizer: Adam, K: 20, LR: 0.001, Filter CDE: [1, 3], Model Name: 6, Started: 10.04 11:17, Ended: 10.04 12:04, Duration: 0:00:47.194977
Current Iteration: 2 of 226, Optimizer: Adam, K: 5, LR: 0.1, Filter CDE: [1, 3], Model Name: 5, Started: 10.04 12:04, Ended: 10.04 12:41, Duration: 0:00:36.644507
Current Iteration: 3 of 226, Optimizer: Adam, K: 20, LR: 0.1, Filter CDE: [1, 2], Model Name: 3, Started: 10.04 12:41, Ended: 10.04 12:25, Duration: 0:00:44.200702
Current Iteration: 4 of 226, Optimizer: Adam, K: 20, LR: 0.1, Filter CDE: [1], Model Name: 3, Started: 10.04 12:25, Ended: 10.04 12:12, Duration: 0:00:46.976701
Current Iteration: 5 of 226, Optimizer: Adam, K: 10, LR: 0.01, Filter CDE: [1, 3], Model Name: 3, Started: 10.04 12:12, Ended: 10.04 12:50, Duration: 0:00:37.352557
Current Iteration: 6 of 226, Optimizer: SGD, K: 10, LR: 0.1, Filter CDE: [2, 3], Model Name: 2, Started: 10.04 12:50, Ended: 10.04 12:36, Duration: 0:00:46.496620
Current Iteration:

In [28]:
exp = pd.read_csv(exp_name_fn, sep=';')

In [29]:
exp.head()

Unnamed: 0,optimizer,fine_tune_classes,name_pretrained_model,k,fine_tune_iterations,lr,model_fn,pretrained_model_fn,logreg_fn,TP_x_test,TN_x_test,FP_x_test,FN_x_test,TP_x_drifted_ano,TN_x_drifted_ano,FP_x_drifted_ano,FN_x_drifted_ano,Accuracy_x_test,Precision_x_test,Specifity_x_test,Sensitivity_x_test,Accuracy_x_drifted_ano,Precision_x_drifted_ano,Specifity_x_drifted_ano,Sensitivity_x_drifted_ano
0,Adam,[1],6,10,64,0.01,/home/torge/dev/masterthesis_code/02_Experimen...,/home/torge/dev/masterthesis_code/02_Experimen...,/home/torge/dev/masterthesis_code/02_Experimen...,2489,4242,28301,8,2495,3791,28752,2,19.209475,8.083793,63.021839,99.679616,17.939498,7.984767,60.308622,99.919904
