# Fine Tuning Modell M2 in Phase II

In [1]:
import arrow
import learn2learn as l2l
import numpy as np
import os
import pickle
import torch
from torch.utils.data import TensorDataset
from sklearn.model_selection import train_test_split
import pandas as pd
import matplotlib.pyplot as plt
import joblib

from utils.evalUtils import print_confusion_matrix

from sklearn.preprocessing import MinMaxScaler
from models.SimpleAutoEncoder import SimpleAutoEncoder

In [2]:
%run -i ./scripts/setConfigs.py

Set configs..


## Prepare the Data

#### Load data

In [3]:
data_fn = os.path.join(data_path, 'simulation_data_y_2020_2021_reduced.h5')
df_data_train = pd.read_hdf(data_fn, key='df')
print('Shape of X_train data: {}'.format(df_data_train.shape))

Shape of X_train data: (105216, 17)


In [4]:
anomalous_data_fn = os.path.join(data_path, 'anomalous_data_y_2022_reduced.h5')
df_x_test = pd.read_hdf(anomalous_data_fn, key='df')
print('Shape of X_test: {}'.format(df_x_test.shape))

Shape of X_test: (35040, 18)


In [5]:
drifted_data_fn = os.path.join(data_path, 'drifted_data_y_2023_reduced_more_cos_phi.h5')
df_x_drifted = pd.read_hdf(drifted_data_fn, key='df')
print('Shape of X_drifted: {}'.format(df_x_drifted.shape))

Shape of X_drifted: (35040, 18)


In [6]:
anomalous_drifted_data_fn = os.path.join(data_path, 'anomalous_drifted_data_y_2023_reduced_more_cos_phi.h5')
df_x_anormal_drifted = pd.read_hdf(anomalous_drifted_data_fn, key='df')
print('Shape of X_anomal,drifted : {}'.format(df_x_anormal_drifted.shape))

Shape of X_anomal,drifted : (35040, 19)


#### Split labels

In [7]:
y_anormal = [1 if x > 0 else 0 for x in df_x_test['label']]
df_x_test.drop('label', inplace=True, axis=1)

In [8]:
s_x_drifted_drift_labels = df_x_drifted['drift_labels']
df_x_drifted.drop('drift_labels',axis=1, inplace=True)
print('Shape of X_drifted : {}'.format(df_x_drifted.shape))

Shape of X_drifted : (35040, 17)


In [9]:
y_anormal_drifted = [1 if x > 0 else 0 for x in df_x_anormal_drifted['anomaly_labels']]
df_x_anormal_drifted.drop(['anomaly_labels', 'drift_labels'], inplace=True, axis=1)
print('Shape of X_anomal,drifted : {}'.format(df_x_anormal_drifted.shape))

Shape of X_anomal,drifted : (35040, 17)


#### Scale data

In [10]:
print('Scale data..')
scaler_train = MinMaxScaler((-1,1))
scaler_train = scaler_train.fit(df_data_train)
scaled_train = scaler_train.transform(df_data_train.to_numpy())

x_anormal = scaler_train.transform(df_x_test.to_numpy())
x_anormal_drifted = scaler_train.transform(df_x_anormal_drifted.to_numpy())
x_drifted = scaler_train.transform(df_x_drifted.to_numpy())

Scale data..


#### Build PyTorch Objects

In [11]:
x_anormal_torch_tensor = torch.from_numpy(x_anormal).type(torch.FloatTensor)
x_anormal_drifted_torch_tensor = torch.from_numpy(x_anormal_drifted).type(torch.FloatTensor)
x_drifted_torch_tensor = torch.from_numpy(x_drifted).type(torch.FloatTensor)
y_drifted_torch_tensor = torch.from_numpy(s_x_drifted_drift_labels.to_numpy()).type(torch.FloatTensor)

In [12]:
# build pytorch dataset from tensor
drifted_torch_dataset = TensorDataset(x_drifted_torch_tensor,y_drifted_torch_tensor)

#### Read Model M_2 and LogReg

In [13]:
num_inpus = 17
val_lambda = 42 * 0.0
1
model_name = '20200302_firstAE_model.pt'
model_fn = os.path.join(model_bib_path, model_name)

model = SimpleAutoEncoder(num_inputs=num_inpus, val_lambda=val_lambda)
model.load_state_dict(torch.load(model_fn))
model.train()

SimpleAutoEncoder(
  (encoder): Sequential(
    (0): Linear(in_features=17, out_features=12, bias=True)
    (1): ReLU(inplace=True)
    (2): Linear(in_features=12, out_features=8, bias=True)
    (3): Tanh()
  )
  (decoder): Sequential(
    (0): Linear(in_features=8, out_features=12, bias=True)
    (1): ReLU(inplace=True)
    (2): Linear(in_features=12, out_features=17, bias=True)
    (3): Tanh()
  )
)

In [14]:
model_fn = '20200303_LogRegModell.save'
logreg_filename = os.path.join(model_bib_path, model_fn)
logreg = joblib.load(logreg_filename)
print(logreg)

LogisticRegression(C=1.0, class_weight={1: 2.0}, dual=False, fit_intercept=True,
                   intercept_scaling=1, l1_ratio=None, max_iter=100,
                   multi_class='auto', n_jobs=None, penalty='l2',
                   random_state=42, solver='liblinear', tol=0.0001, verbose=0,
                   warm_start=False)




In [15]:
fn = '20200410_teilfaktorieller_versuchsplan_phase2_m2.csv'
full_fn = os.path.join(exp_data_path, 'versuchsplan', fn)
versuchsplan = pd.read_csv(full_fn, sep=';')

In [16]:
versuchsplan.head()

Unnamed: 0,k_train,optimizer,num_iter,lr,filter_cde
0,5,Adam,128,0.01,"(1,)"
1,20,Adam,1,0.01,"(1, 2, 3)"
2,20,SGD,128,0.01,"(2, 3)"
3,5,SGD,1,0.1,"(1, 3)"
4,10,Adam,1,0.01,"(1,)"


In [17]:
len(versuchsplan)

37

## Start Fine-Tuning

In [18]:
from Experiments.FineTuneExperiment import FineTuneExperiment

In [19]:
def _build_filter_list(filter_cde_string):
    l = []

    if '1' in filter_cde_string:
        l.append(1)
    if '2' in filter_cde_string:
        l.append(2)
    if '3' in filter_cde_string:
        l.append(3)
        
    return l

In [20]:
for i, vp in versuchsplan.iterrows():
    start_ts = arrow.now()
    optimizer_name = vp['optimizer']
    k = vp['k_train']
    num_iter = vp['num_iter']
    lr = vp['lr']
    filter_cde = _build_filter_list(vp['filter_cde'])
    name_model = 'M2'
    
    string_lr = str(lr).replace('.', '_')
    
    fine_tuned_model_fn = '{}_tVPII_M2_FINE_TUNED_MODEL__K_{}_OPTIMIZER_{}_NUM_ITER_{}_LR_{}_FILTER_CDE_{}_MODEL_{}.pt'.format(arrow.now().format('YYYYMMDD'), k, optimizer_name,
                                                                                                                                            num_iter, string_lr, filter_cde, name_model)
    
    fine_tuned_model_fn = os.path.join(exp_models_path, 'fine_tuned_models', fine_tuned_model_fn)
    
    exp_name_fn = '{}_tVPII_M2_EXPERIMENT_FINE_TUNED_MODEL__K_{}_OPTIMIZER_{}_NUM_ITER_{}_LR_{}_FILTER_CDE_{}_MODEL_{}.csv'.format(arrow.now().format('YYYYMMDD'), k, optimizer_name,
                                                                                                                                            num_iter, string_lr, filter_cde, name_model)
    
    exp_name_fn = os.path.join(exp_data_path, 'experiment', 'fine_tuning', exp_name_fn)
    
    
    ft_exp = FineTuneExperiment(model=model, fine_tune_data_x_drifted=drifted_torch_dataset, eval_data_x_test=x_anormal_torch_tensor, eval_label_x_test=y_anormal,
                               eval_data_x_drifted_ano=x_anormal_drifted_torch_tensor, eval_label_x_drifted_ano=y_anormal_drifted, k=k, fine_tune_iterations=num_iter, optimizer_name=optimizer_name,
                               fine_tune_classes=filter_cde, classifier=logreg, lr=lr, name_pretrained_model=name_model, fine_tuned_model_fn=fine_tuned_model_fn, pretrained_model_fn= model_fn,
                               logreg_fn=logreg_filename, exp_name_fn=exp_name_fn)
    
    ft_exp.run()
    end_ts = arrow.now()
    duration = end_ts - start_ts
    print('Current Iteration: {} of {}, Optimizer: {}, K: {}, LR: {}, Filter CDE: {}, Model Name: {}, Num Iter: {}, Started: {}, Ended: {}, Duration: {}'.format(i+1, len(versuchsplan), optimizer_name, k, lr,
                                                                                                                                             filter_cde, name_model, num_iter, start_ts.format('hh:mm:ss'), 
                                                                                                                                             end_ts.format('hh:mm:ss'), duration))

Current Iteration: 1 of 37, Optimizer: Adam, K: 5, LR: 0.01, Filter CDE: [1], Model Name: M2, Started: 02:26:23, Ended: 02:27:17, Duration: 0:00:54.479553
Current Iteration: 2 of 37, Optimizer: Adam, K: 20, LR: 0.01, Filter CDE: [1, 2, 3], Model Name: M2, Started: 02:27:17, Ended: 02:28:11, Duration: 0:00:53.563966
Current Iteration: 3 of 37, Optimizer: SGD, K: 20, LR: 0.01, Filter CDE: [2, 3], Model Name: M2, Started: 02:28:11, Ended: 02:29:12, Duration: 0:01:01.173906
Current Iteration: 4 of 37, Optimizer: SGD, K: 5, LR: 0.1, Filter CDE: [1, 3], Model Name: M2, Started: 02:29:12, Ended: 02:30:08, Duration: 0:00:56.735554
Current Iteration: 5 of 37, Optimizer: Adam, K: 10, LR: 0.01, Filter CDE: [1], Model Name: M2, Started: 02:30:08, Ended: 02:31:02, Duration: 0:00:53.238037
Current Iteration: 6 of 37, Optimizer: SGD, K: 5, LR: 0.01, Filter CDE: [1, 3], Model Name: M2, Started: 02:31:02, Ended: 02:31:46, Duration: 0:00:44.192998
Current Iteration: 7 of 37, Optimizer: Adam, K: 10, LR: 

In [21]:
exp = pd.read_csv(exp_name_fn, sep=';')

In [22]:
exp.head()

Unnamed: 0,optimizer,fine_tune_classes,name_pretrained_model,k,fine_tune_iterations,lr,model_fn,pretrained_model_fn,logreg_fn,TP_x_test,TN_x_test,FP_x_test,FN_x_test,TP_x_drifted_ano,TN_x_drifted_ano,FP_x_drifted_ano,FN_x_drifted_ano,Accuracy_x_test,Precision_x_test,Specifity_x_test,Sensitivity_x_test,Accuracy_x_drifted_ano,Precision_x_drifted_ano,Specifity_x_drifted_ano,Sensitivity_x_drifted_ano
0,Adam,"[2, 3]",M2,10,128,0.001,/home/torge/dev/masterthesis_code/02_Experimen...,20200303_LogRegModell.save,/home/torge/dev/masterthesis_code/02_Experimen...,2493,7068,25475,4,2491,6324,26219,6,27.285959,8.913759,73.925322,99.839808,25.156963,8.676419,71.74135,99.759712
