In [11]:
import numpy as np
import pandas as pd

import pickle

In [12]:
from src.config import config

In [13]:
from sklearn.linear_model import LogisticRegression

# Load data

#### Load train matrices

In [14]:
data_dir = config.DATA_DIR
model_dir = config.MODEL_DIR

In [15]:
processed_data_dir = data_dir / "processed/HPC_run_05/experiment_00/"

X_train_per_leak_exp_file_name = processed_data_dir / "X_train_per_leak_exp.p"
X_train_per_leak_exp = pickle.load( open( X_train_per_leak_exp_file_name, "rb" ) )

X_test_per_leak_exp_file_name = processed_data_dir / "X_test_per_leak_exp.p"
X_test_per_leak_exp = pickle.load( open( X_test_per_leak_exp_file_name, "rb" ) )

y_train_per_leak_exp_file_name = processed_data_dir / "y_train_per_leak_exp.p"
y_train_per_leak_exp = pickle.load( open( y_train_per_leak_exp_file_name, "rb" ) )

# Configuration

In [16]:
solver = 'saga' # handles elasticnet penalty
penalty = 'elasticnet'

Cs = [1e-4, 1e-3, 1e-2, 1e-1, 1e0, 1e1]
l1_ratios = [0.0, 0.2, 0.4, 0.6, 0.8, 1.0]

#Cs = [1e0]
#l1_ratios = [0.0]
#l1_ratios = [0.2, 0.4, 0.6, 0.8, 1.0]

# Train leak classifier for every leak experiment

In [17]:
log_reg_models_trained = {}

In [None]:
%%capture output

%%time

def write_line_to_progress_file(file_name, line):
    f = open(file_name, "a")
    f.writelines(line + "\n")
    f.close()    

for l1_ratio in l1_ratios:
    write_line_to_progress_file("train_progress.txt", "l1 ratio: " + str(l1_ratio))
    
    log_reg_models_trained[str(l1_ratio)] = {}
    
    for C in Cs:
        write_line_to_progress_file("train_progress.txt", "C: " + str(C))
        log_reg_models_trained[str(l1_ratio)][str(C)] = {}
        trained_models_per_leak_exp = {}

        for leak_location in list(X_train_per_leak_exp.keys()):
            X_train = X_train_per_leak_exp[leak_location]
            y_train = y_train_per_leak_exp[leak_location]
            log_reg_clf = LogisticRegression(C=C, penalty=penalty, solver=solver, l1_ratio=l1_ratio)
            log_reg_clf.fit(X_train, y_train)
            trained_models_per_leak_exp[leak_location] = log_reg_clf
            
        log_reg_models_trained[str(l1_ratio)][str(C)] = trained_models_per_leak_exp
        
        leak_classification_model_dir = model_dir / "HPC_run_05/leak_classification/trained_on_data/experiment_00/"

        trained_models_per_leak_exp_file_name = leak_classification_model_dir / "log_reg_models_trained_tmp.p"
        pickle.dump(log_reg_models_trained, open(trained_models_per_leak_exp_file_name, "wb" ))

# Write to disk

In [None]:
leak_classification_model_dir = model_dir / "HPC_run_05/leak_classification/trained_on_data/experiment_00/"

trained_models_per_leak_exp_file_name = leak_classification_model_dir / "log_reg_models_trained.p"
pickle.dump(log_reg_models_trained, open(trained_models_per_leak_exp_file_name, "wb" ))