In [1]:
import xgboost
import warnings
import itertools

import numpy as np
import pandas as pd

from datetime import datetime

from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

from costcla.metrics import cost_loss, savings_score
from costcla.models import BayesMinimumRiskClassifier, ThresholdingOptimization
from costcla.models import CostSensitiveDecisionTreeClassifier, CostSensitiveLogisticRegression
from costcla.models import CostSensitiveRandomForestClassifier, CostSensitiveBaggingClassifier, CostSensitivePastingClassifier, CostSensitiveRandomPatchesClassifier

RANDOM_STATE = 42
N_JOBS = -1

warnings.filterwarnings('ignore')
np.random.seed(RANDOM_STATE)



In [21]:
def create_cost_matrix(df, fp_cost, fn_cost, tp_cost, tn_cost):
    # false positives, false negatives, true positives, true negatives
    def generate_cost(df, cost):
        return df[cost] if type(cost) == str else cost
    
    cost_matrix = np.zeros((df.shape[0], 4))
    
    cost_matrix[:, 0] = generate_cost(df, fp_cost)
    cost_matrix[:, 1] = generate_cost(df, fn_cost)
    cost_matrix[:, 2] = generate_cost(df, tp_cost)
    cost_matrix[:, 3] = generate_cost(df, tn_cost)
    
    return cost_matrix


def generate_rf_models():

    max_depth = [None, 1, 2, 3, 4, 5]
    n_estimatiors = [10, 50, 100, 200, 500]

    rf_models = {
        f'CI-GS_RandomForest-n_est_{n_est}_md_{md}': RandomForestClassifier(
            random_state=RANDOM_STATE,
            n_estimators=n_est,
            max_depth=md,
            n_jobs=N_JOBS
        )
        for n_est, md in itertools.product(n_estimatiors, max_depth)
    }
    return rf_models


def generate_xgb_models():
    
    max_depth = [0, 1, 2, 3, 4, 5]
    subsample = [0.5, 0.75, 1]
    colsample_bytree = [0.5, 0.75, 1]

    xgb_models = {
        f'CI-GS_XGBoost-md_{md}_subs_{subs}_cs_bt_{cs_bt}': xgboost.XGBClassifier(
            random_state=RANDOM_STATE,
            verbosity=0,
            max_depth=md,
            subsample=subs,
            colsample_bytree=cs_bt,
            n_jobs=N_JOBS
        )
        for md, subs, cs_bt in itertools.product(max_depth, subsample, colsample_bytree)
    }
    return xgb_models


def generate_cost_sensitive_ensemble(model, name):
    
    combinations = ['majority_voting', 'weighted_voting', 'stacking', 'stacking_proba', 
                    'stacking_bmr', 'stacking_proba_bmr', 'majority_bmr', 'weighted_bmr']
    n_estimatiors = [10, 20, 30]
    
    cs_ensemble_models = {
        f'ECSDT-GS_{name}_{combination}-n_est_{n_est}': model(
            n_estimators=n_est,
            combination=combination,
            n_jobs=N_JOBS
        )
        for n_est, combination in itertools.product(n_estimatiors, combinations)
    }
    return cs_ensemble_models
    


def generate_models():
    
    csrfc = generate_cost_sensitive_ensemble(CostSensitiveRandomForestClassifier, 
                                             'CostSensitiveRandomForestClassifier')
    csbc = generate_cost_sensitive_ensemble(CostSensitiveBaggingClassifier,
                                           'CostSensitiveBaggingClassifier')
    cspc = generate_cost_sensitive_ensemble(CostSensitivePastingClassifier,
                                           'CostSensitivePastingClassifier')
    csrpc = generate_cost_sensitive_ensemble(CostSensitiveRandomPatchesClassifier,
                                            'CostSensitiveRandomPatchesClassifier')

    gs_rf_models = generate_rf_models()
    gs_xgb_models = generate_xgb_models()
    
    models = {
        'CI-LogisticRegression': LogisticRegression(), 
        'CI-DecisionTree': DecisionTreeClassifier(random_state=RANDOM_STATE), 
        'CI-RandomForest': RandomForestClassifier(random_state=RANDOM_STATE),
        'CI-XGBoost': xgboost.XGBClassifier(random_state=RANDOM_STATE,verbosity=0),
        'CST-CostSensitiveLogisticRegression': CostSensitiveLogisticRegression(),
        'CST-CostSensitiveDecisionTreeClassifier': CostSensitiveDecisionTreeClassifier()
    }
    models.update(csrfc)
    models.update(csbc)
    models.update(cspc)
    models.update(csrpc)
    models.update(gs_rf_models)
    models.update(gs_xgb_models)
    
    return models


def create_model_summary(model, name, X, y, cost_matrix):
    print(name)
    if name.startswith('BMR'):
        # BMR Model
        model, bmr = model
        y_hat_proba = model.predict_proba(X)
        y_hat = bmr.predict(y_hat_proba, cost_matrix)
    elif name.startswith('TO'):
        # Threshold Optimized Model
        model, threshold_opt = model
        y_hat_proba = model.predict_proba(X)
        y_hat = threshold_opt.predict(y_hat_proba)
    elif name.startswith('ECSDT'):
        y_hat = model.predict(X, cost_matrix)
    else:
        y_hat = model.predict(X)
        
        
    return {
        'Name': name,
        'Accuracy': accuracy_score(y, y_hat),
        'Precision': precision_score(y, y_hat),
        'Recall': recall_score(y, y_hat),
        'F1': f1_score(y, y_hat),
        'Cost': cost_loss(y, y_hat, cost_matrix),
        'Savings': savings_score(y, y_hat, cost_matrix)
    }


def create_bmr_model(model, name, X_val, y_val, calibration = True):
    
    y_hat_val_proba = model.predict_proba(X_val)

    bmr = BayesMinimumRiskClassifier(calibration = calibration)
    bmr.fit(y_val, y_hat_val_proba)
    
    prefix = 'BMR' + '_calibration_' if calibration else 'BMR_'
    name = prefix + name
    
    return (name, (model, bmr))


def create_threshold_optimized_model(model, name, X_train, y_train, cost_matrix_train, calibration = True):
        
    y_hat_train_proba = model.predict_proba(X_train)

    threshold_opt = ThresholdingOptimization(calibration = calibration)
    threshold_opt.fit(y_hat_train_proba, cost_matrix_train, y_train)
    
    prefix = 'TO' + '_calibration_' if calibration else 'TO_'
    name = prefix + name
    
    return (name, (model, threshold_opt))

In [3]:
OPERATIONAL_COST = 2.5

In [4]:
df = pd.read_csv('data/creditcard.csv')
cost_matrix = create_cost_matrix(df, OPERATIONAL_COST, 'Amount', OPERATIONAL_COST, 0)

X = df.drop(['Time', 'Amount', 'Class'], axis = 1)
y = df['Class']

In [5]:
X_train, X_test, y_train, y_test, cost_matrix_train, cost_matrix_test = train_test_split(X, y, cost_matrix, train_size = 0.5, stratify = y, random_state = RANDOM_STATE)
X_val, X_test, y_val, y_test, cost_matrix_val, cost_matrix_test = train_test_split(X_test, y_test, cost_matrix_test, train_size = 0.33, stratify = y_test, random_state = RANDOM_STATE)

In [6]:
# Downsampling
"""
X_train = pd.concat([X_train.reset_index(), pd.DataFrame(cost_matrix_train)], axis = 1).set_index('index')
X_train['Class'] = y_train

X_train = pd.concat([
    X_train[X_train['Class'] == 0].sample(frac = 0.05, random_state=RANDOM_STATE),
    X_train[X_train['Class'] == 1]
])

y_train = X_train['Class']
cost_matrix_train = X_train[[0, 1, 2, 3]].values
X_train = X_train.drop(['Class', 0, 1, 2, 3], axis = 1)
"""
pass

In [None]:
models = generate_models()

In [8]:
def filter_model_names(models, value):
    return [name for name in models.keys() if value in name]
    

standard_model_names = filter_model_names(models, 'GS_RandomForest') + ['CI-LogisticRegression', 'CI-DecisionTree', 'CI-RandomForest']
cost_sensitive_model_names = filter_model_names(models, 'CST') + filter_model_names(models, 'ECSDT')
xgb_model_names = filter_model_names(models, 'XGBoost')
calibration_model_names = standard_model_names + xgb_model_names

# Standard model training

for name in standard_model_names:
    print(name)
    models[name].fit(X_train.values, y_train.values)

    
for name in xgb_model_names:
    print(name)
    models[name].fit(
        X_train.values, y_train.values, 
        eval_set = [(X_val.values, y_val.values), (X_train.values, y_train.values)],
        eval_metric = 'aucpr',
        early_stopping_rounds = 50,
        verbose = False
    )       
    
    
for name in cost_sensitive_model_names:
    print(name)
    models[name].fit(X_train.values, y_train.values, cost_matrix_train)
    

CI-LogisticRegression
CI-DecisionTree
CI-RandomForest
CI-XGBoost
CI-GS_XGBoost-md_0_subs_0.5_cs_bt_0.5
CI-GS_XGBoost-md_0_subs_0.5_cs_bt_0.75
CI-GS_XGBoost-md_0_subs_0.5_cs_bt_1
CI-GS_XGBoost-md_0_subs_0.75_cs_bt_0.5
CI-GS_XGBoost-md_0_subs_0.75_cs_bt_0.75
CI-GS_XGBoost-md_0_subs_0.75_cs_bt_1
CI-GS_XGBoost-md_0_subs_1_cs_bt_0.5
CI-GS_XGBoost-md_0_subs_1_cs_bt_0.75
CI-GS_XGBoost-md_0_subs_1_cs_bt_1
CI-GS_XGBoost-md_1_subs_0.5_cs_bt_0.5
CI-GS_XGBoost-md_1_subs_0.5_cs_bt_0.75
CI-GS_XGBoost-md_1_subs_0.5_cs_bt_1
CI-GS_XGBoost-md_1_subs_0.75_cs_bt_0.5
CI-GS_XGBoost-md_1_subs_0.75_cs_bt_0.75
CI-GS_XGBoost-md_1_subs_0.75_cs_bt_1
CI-GS_XGBoost-md_1_subs_1_cs_bt_0.5
CI-GS_XGBoost-md_1_subs_1_cs_bt_0.75
CI-GS_XGBoost-md_1_subs_1_cs_bt_1
CI-GS_XGBoost-md_2_subs_0.5_cs_bt_0.5
CI-GS_XGBoost-md_2_subs_0.5_cs_bt_0.75
CI-GS_XGBoost-md_2_subs_0.5_cs_bt_1
CI-GS_XGBoost-md_2_subs_0.75_cs_bt_0.5
CI-GS_XGBoost-md_2_subs_0.75_cs_bt_0.75
CI-GS_XGBoost-md_2_subs_0.75_cs_bt_1
CI-GS_XGBoost-md_2_subs_1_cs_bt_0.

ECSDT-GS_CostSensitiveRandomPatchesClassifier_stacking_proba_bmr-n_est_30
ECSDT-GS_CostSensitiveRandomPatchesClassifier_majority_bmr-n_est_30
ECSDT-GS_CostSensitiveRandomPatchesClassifier_weighted_bmr-n_est_30


In [9]:
# Threshold Optimization training

for name in calibration_model_names:
    for calibration in [True, False]:
        print(name)
        model = models[name]
        name_threshold_opt, model_threshold_opt = create_threshold_optimized_model(model, name, X_train.values, y_train.values, cost_matrix_train, calibration = calibration)
        models[name_threshold_opt] = model_threshold_opt

CI-LogisticRegression
CI-LogisticRegression
CI-DecisionTree
CI-DecisionTree
CI-RandomForest
CI-RandomForest
CI-XGBoost
CI-XGBoost
CI-GS_XGBoost-md_0_subs_0.5_cs_bt_0.5
CI-GS_XGBoost-md_0_subs_0.5_cs_bt_0.5
CI-GS_XGBoost-md_0_subs_0.5_cs_bt_0.75
CI-GS_XGBoost-md_0_subs_0.5_cs_bt_0.75
CI-GS_XGBoost-md_0_subs_0.5_cs_bt_1
CI-GS_XGBoost-md_0_subs_0.5_cs_bt_1
CI-GS_XGBoost-md_0_subs_0.75_cs_bt_0.5
CI-GS_XGBoost-md_0_subs_0.75_cs_bt_0.5
CI-GS_XGBoost-md_0_subs_0.75_cs_bt_0.75
CI-GS_XGBoost-md_0_subs_0.75_cs_bt_0.75
CI-GS_XGBoost-md_0_subs_0.75_cs_bt_1
CI-GS_XGBoost-md_0_subs_0.75_cs_bt_1
CI-GS_XGBoost-md_0_subs_1_cs_bt_0.5
CI-GS_XGBoost-md_0_subs_1_cs_bt_0.5
CI-GS_XGBoost-md_0_subs_1_cs_bt_0.75
CI-GS_XGBoost-md_0_subs_1_cs_bt_0.75
CI-GS_XGBoost-md_0_subs_1_cs_bt_1
CI-GS_XGBoost-md_0_subs_1_cs_bt_1
CI-GS_XGBoost-md_1_subs_0.5_cs_bt_0.5
CI-GS_XGBoost-md_1_subs_0.5_cs_bt_0.5
CI-GS_XGBoost-md_1_subs_0.5_cs_bt_0.75
CI-GS_XGBoost-md_1_subs_0.5_cs_bt_0.75
CI-GS_XGBoost-md_1_subs_0.5_cs_bt_1
CI-GS_XG

In [10]:
# BMR training

for name in calibration_model_names:
    for calibration in [True, False]:
        print(name)
        model = models[name]
        name_bmr, model_bmr = create_bmr_model(model, name, X_val.values, y_val.values, calibration = calibration)
        models[name_bmr] = model_bmr

CI-LogisticRegression
CI-LogisticRegression
CI-DecisionTree
CI-DecisionTree
CI-RandomForest
CI-RandomForest
CI-XGBoost
CI-XGBoost
CI-GS_XGBoost-md_0_subs_0.5_cs_bt_0.5
CI-GS_XGBoost-md_0_subs_0.5_cs_bt_0.5
CI-GS_XGBoost-md_0_subs_0.5_cs_bt_0.75
CI-GS_XGBoost-md_0_subs_0.5_cs_bt_0.75
CI-GS_XGBoost-md_0_subs_0.5_cs_bt_1
CI-GS_XGBoost-md_0_subs_0.5_cs_bt_1
CI-GS_XGBoost-md_0_subs_0.75_cs_bt_0.5
CI-GS_XGBoost-md_0_subs_0.75_cs_bt_0.5
CI-GS_XGBoost-md_0_subs_0.75_cs_bt_0.75
CI-GS_XGBoost-md_0_subs_0.75_cs_bt_0.75
CI-GS_XGBoost-md_0_subs_0.75_cs_bt_1
CI-GS_XGBoost-md_0_subs_0.75_cs_bt_1
CI-GS_XGBoost-md_0_subs_1_cs_bt_0.5
CI-GS_XGBoost-md_0_subs_1_cs_bt_0.5
CI-GS_XGBoost-md_0_subs_1_cs_bt_0.75
CI-GS_XGBoost-md_0_subs_1_cs_bt_0.75
CI-GS_XGBoost-md_0_subs_1_cs_bt_1
CI-GS_XGBoost-md_0_subs_1_cs_bt_1
CI-GS_XGBoost-md_1_subs_0.5_cs_bt_0.5
CI-GS_XGBoost-md_1_subs_0.5_cs_bt_0.5
CI-GS_XGBoost-md_1_subs_0.5_cs_bt_0.75
CI-GS_XGBoost-md_1_subs_0.5_cs_bt_0.75
CI-GS_XGBoost-md_1_subs_0.5_cs_bt_1
CI-GS_XG

In [23]:
filepath = 'outputs/' + 'Training-results-' + datetime.now().isoformat('-', timespec = 'minutes') + '.csv'


temp = []
for name, model in models.items():
    try:
        temp.append(create_model_summary(model, name, X_test.values, y_test.values, cost_matrix_test))
    except:
        pass
results = pd.DataFrame(temp)


#results = pd.DataFrame([create_model_summary(model, name, X_test.values, y_test.values, cost_matrix_test) for name, model in models.items()])
results.to_csv(filepath, index=False)
results

CI-LogisticRegression
CI-DecisionTree
CI-RandomForest
CI-XGBoost
CST-LogisticRegression
CST-DecisionTreeClassifier
ECSDT-GS_CostSensitiveRandomForestClassifier_majority_voting-n_est_10
ECSDT-GS_CostSensitiveRandomForestClassifier_weighted_voting-n_est_10
ECSDT-GS_CostSensitiveRandomForestClassifier_stacking-n_est_10
ECSDT-GS_CostSensitiveRandomForestClassifier_stacking_proba-n_est_10
ECSDT-GS_CostSensitiveRandomForestClassifier_stacking_bmr-n_est_10
ECSDT-GS_CostSensitiveRandomForestClassifier_stacking_proba_bmr-n_est_10
ECSDT-GS_CostSensitiveRandomForestClassifier_majority_bmr-n_est_10
ECSDT-GS_CostSensitiveRandomForestClassifier_weighted_bmr-n_est_10
ECSDT-GS_CostSensitiveRandomForestClassifier_majority_voting-n_est_20
ECSDT-GS_CostSensitiveRandomForestClassifier_weighted_voting-n_est_20
ECSDT-GS_CostSensitiveRandomForestClassifier_stacking-n_est_20
ECSDT-GS_CostSensitiveRandomForestClassifier_stacking_proba-n_est_20
ECSDT-GS_CostSensitiveRandomForestClassifier_stacking_bmr-n_est_20


CI-GS_XGBoost-md_2_subs_0.5_cs_bt_1
CI-GS_XGBoost-md_2_subs_0.75_cs_bt_0.5
CI-GS_XGBoost-md_2_subs_0.75_cs_bt_0.75
CI-GS_XGBoost-md_2_subs_0.75_cs_bt_1
CI-GS_XGBoost-md_2_subs_1_cs_bt_0.5
CI-GS_XGBoost-md_2_subs_1_cs_bt_0.75
CI-GS_XGBoost-md_2_subs_1_cs_bt_1
CI-GS_XGBoost-md_3_subs_0.5_cs_bt_0.5
CI-GS_XGBoost-md_3_subs_0.5_cs_bt_0.75
CI-GS_XGBoost-md_3_subs_0.5_cs_bt_1
CI-GS_XGBoost-md_3_subs_0.75_cs_bt_0.5
CI-GS_XGBoost-md_3_subs_0.75_cs_bt_0.75
CI-GS_XGBoost-md_3_subs_0.75_cs_bt_1
CI-GS_XGBoost-md_3_subs_1_cs_bt_0.5
CI-GS_XGBoost-md_3_subs_1_cs_bt_0.75
CI-GS_XGBoost-md_3_subs_1_cs_bt_1
CI-GS_XGBoost-md_4_subs_0.5_cs_bt_0.5
CI-GS_XGBoost-md_4_subs_0.5_cs_bt_0.75
CI-GS_XGBoost-md_4_subs_0.5_cs_bt_1
CI-GS_XGBoost-md_4_subs_0.75_cs_bt_0.5
CI-GS_XGBoost-md_4_subs_0.75_cs_bt_0.75
CI-GS_XGBoost-md_4_subs_0.75_cs_bt_1
CI-GS_XGBoost-md_4_subs_1_cs_bt_0.5
CI-GS_XGBoost-md_4_subs_1_cs_bt_0.75
CI-GS_XGBoost-md_4_subs_1_cs_bt_1
CI-GS_XGBoost-md_5_subs_0.5_cs_bt_0.5
CI-GS_XGBoost-md_5_subs_0.5_cs_

BMR_calibration-CI-GS_XGBoost-md_1_subs_1_cs_bt_0.75
BMR-CI-GS_XGBoost-md_1_subs_1_cs_bt_0.75
BMR_calibration-CI-GS_XGBoost-md_1_subs_1_cs_bt_1
BMR-CI-GS_XGBoost-md_1_subs_1_cs_bt_1
BMR_calibration-CI-GS_XGBoost-md_2_subs_0.5_cs_bt_0.5
BMR-CI-GS_XGBoost-md_2_subs_0.5_cs_bt_0.5
BMR_calibration-CI-GS_XGBoost-md_2_subs_0.5_cs_bt_0.75
BMR-CI-GS_XGBoost-md_2_subs_0.5_cs_bt_0.75
BMR_calibration-CI-GS_XGBoost-md_2_subs_0.5_cs_bt_1
BMR-CI-GS_XGBoost-md_2_subs_0.5_cs_bt_1
BMR_calibration-CI-GS_XGBoost-md_2_subs_0.75_cs_bt_0.5
BMR-CI-GS_XGBoost-md_2_subs_0.75_cs_bt_0.5
BMR_calibration-CI-GS_XGBoost-md_2_subs_0.75_cs_bt_0.75
BMR-CI-GS_XGBoost-md_2_subs_0.75_cs_bt_0.75
BMR_calibration-CI-GS_XGBoost-md_2_subs_0.75_cs_bt_1
BMR-CI-GS_XGBoost-md_2_subs_0.75_cs_bt_1
BMR_calibration-CI-GS_XGBoost-md_2_subs_1_cs_bt_0.5
BMR-CI-GS_XGBoost-md_2_subs_1_cs_bt_0.5
BMR_calibration-CI-GS_XGBoost-md_2_subs_1_cs_bt_0.75
BMR-CI-GS_XGBoost-md_2_subs_1_cs_bt_0.75
BMR_calibration-CI-GS_XGBoost-md_2_subs_1_cs_bt_1
BMR-

Unnamed: 0,Accuracy,Cost,F1,Name,Precision,Recall,Savings
0,0.998858,4205.57,0.716883,CI-LogisticRegression,0.627273,0.836364,0.723834
1,0.994235,5087.57,0.325980,CI-DecisionTree,0.204301,0.806061,0.665916
2,0.998962,4495.93,0.733154,CI-RandomForest,0.660194,0.824242,0.704767
3,0.998952,4188.07,0.735450,CI-XGBoost,0.652582,0.842424,0.724983
4,0.475920,127275.55,0.005885,CST-LogisticRegression,0.002952,0.896970,-7.357770
5,0.994749,5533.53,0.281205,CST-DecisionTreeClassifier,0.184211,0.593939,0.636631
6,0.999109,4687.26,0.701754,ECSDT-GS_CostSensitiveRandomForestClassifier_m...,0.833333,0.606061,0.692203
7,0.998920,5013.27,0.628159,ECSDT-GS_CostSensitiveRandomForestClassifier_w...,0.776786,0.527273,0.670795
8,0.998271,15228.41,0.000000,ECSDT-GS_CostSensitiveRandomForestClassifier_s...,0.000000,0.000000,0.000000
9,0.998281,14973.69,0.023810,ECSDT-GS_CostSensitiveRandomForestClassifier_s...,0.666667,0.012121,0.016727


In [24]:
results.sort_values('Savings')

Unnamed: 0,Accuracy,Cost,F1,Name,Precision,Recall,Savings
25,0.002033,249373.08,0.000756,ECSDT-GS_CostSensitiveRandomForestClassifier_s...,0.000378,0.218182,-15.375517
49,0.002819,248527.66,0.001029,ECSDT-GS_CostSensitiveBaggingClassifier_stacki...,0.000515,0.296970,-15.320001
89,0.026213,244121.32,0.000839,ECSDT-GS_CostSensitiveRandomPatchesClassifier_...,0.000420,0.236364,-15.030651
72,0.001698,239786.93,0.003140,ECSDT-GS_CostSensitivePastingClassifier_stacki...,0.001573,0.909091,-14.746025
169,0.001729,238527.50,0.003453,TO-CI-GS_XGBoost-md_0_subs_0.5_cs_bt_1,0.001729,1.000000,-14.663323
172,0.001729,238527.50,0.003453,TO_calibration-CI-GS_XGBoost-md_0_subs_0.75_cs...,0.001729,1.000000,-14.663323
168,0.001729,238527.50,0.003453,TO_calibration-CI-GS_XGBoost-md_0_subs_0.5_cs_...,0.001729,1.000000,-14.663323
167,0.001729,238527.50,0.003453,TO-CI-GS_XGBoost-md_0_subs_0.5_cs_bt_0.75,0.001729,1.000000,-14.663323
173,0.001729,238527.50,0.003453,TO-CI-GS_XGBoost-md_0_subs_0.75_cs_bt_0.75,0.001729,1.000000,-14.663323
174,0.001729,238527.50,0.003453,TO_calibration-CI-GS_XGBoost-md_0_subs_0.75_cs...,0.001729,1.000000,-14.663323


### TODO:
- Second jupyter notebook with results analysis
- Cross Validation (?)
- Rewrite this notebook to script?
- Make whole experiment with respect to differenct Operational Cost