## BIOINFORMATICS THESIS: MULTIMODAL NEURAL NETWORK

# CELL LINE: H1

In [1]:
import pandas as pd 
import numpy as np
import os

import torch
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
import torch.nn as nn
import torch.optim as optim
from collections import defaultdict, OrderedDict
import pickle

import sqlite3
from sqlalchemy import create_engine

device = 'cuda' if torch.cuda.is_available() else 'cpu'

In [2]:
# create a database to store optuna studies with sqlite backend
#engine = create_engine('sqlite:///BIOINF_optuna_tuning.db')

In [3]:
from BIOINF_tesi.data_pipe import CELL_LINES, TASKS

In [4]:
cell_line = CELL_LINES[2]
cell_line

'H1'

---

In [5]:
from BIOINF_tesi.data_pipe import Load_Create_Task
from BIOINF_tesi.data_pipe import Build_DataLoader_Pipeline

In [6]:
from BIOINF_tesi.models import FFNN, CNN, CNN_LSTM
from BIOINF_tesi.models.utils import fit, Param_Search, Kfold_CV

In [7]:
with open ('results_dict.pickle', 'rb') as fin:
    results_dict = pickle.load(fin)
    results_dict = defaultdict(lambda: defaultdict(dict), results_dict)

## 1) ACTIVE ENHANCERS vs INACTIVE ENHANCERS

In [8]:
task = TASKS[0]
task

'active_E_vs_inactive_E'

In [9]:
pipe_data_load = Build_DataLoader_Pipeline(path_name=f'{task}.pickle')

Data Preprocessing Done!


### 1. FFNN

In [10]:
model=FFNN

In [11]:
# IMBALANCED
type_augm_genfeatures = 'smote'

kf_CV = Kfold_CV()

kf_CV(build_dataloader_pipeline = pipe_data_load,
                num_epochs = 100,
                n_folds=3,
                cell_line=cell_line,
                sequence=False,
                rebalancing=True,
                type_augm_genfeatures=type_augm_genfeatures,
                model = model,
                device = device,
                task=task,
                sampler = 'TPE',
                study_name = f'{cell_line}_{task}_{model.__name__}_{type_augm_genfeatures}',
                hp_model_path = f'{cell_line}_{task}_{model.__name__}_{type_augm_genfeatures}_HP',
                test_model_path = f'{cell_line}_{task}_{model.__name__}_{type_augm_genfeatures}_TEST')

  pruner=optuna.pruners.PatientPruner(optuna.pruners.MedianPruner(), patience=2),
[32m[I 2021-10-13 10:50:15,109][0m Using an existing study with name 'H1_active_E_vs_inactive_E_FFNN_smote_1' instead of creating a new one.[0m


>>> ITERATION N. 1





Study statistics: 
  Number of finished trials:  3
  Number of pruned trials:  0
  Number of complete trials:  3
Best trial:
  Value:  0.14576348237272108
  Params: 
    dropout_l0: 0.0
    dropout_l1: 0.3
    lr: 0.0002792136434678459
    n_layers: 2
    n_units_l0: 256
    n_units_l1: 16
    optimizer: RMSprop
    weight_decay: 0.0027848772546870955

AUPRC test score: 0.145671408407845


>>> ITERATION N. 2



  pruner=optuna.pruners.PatientPruner(optuna.pruners.MedianPruner(), patience=2),
[32m[I 2021-10-13 10:50:15,410][0m Using an existing study with name 'H1_active_E_vs_inactive_E_FFNN_smote_1_2' instead of creating a new one.[0m


Study statistics: 
  Number of finished trials:  4
  Number of pruned trials:  0
  Number of complete trials:  3
Best trial:
  Value:  0.14033573389992185
  Params: 
    dropout_l0: 0.3
    dropout_l1: 0.3
    dropout_l2: 0.0
    dropout_l3: 0.5
    lr: 0.0002710318165159902
    n_layers: 4
    n_units_l0: 64
    n_units_l1: 128
    n_units_l2: 32
    n_units_l3: 16
    optimizer: Adam
    weight_decay: 0.0005439133210530741

AUPRC test score: 0.13027470138568115


>>> ITERATION N. 3



  pruner=optuna.pruners.PatientPruner(optuna.pruners.MedianPruner(), patience=2),
[32m[I 2021-10-13 10:50:15,731][0m Using an existing study with name 'H1_active_E_vs_inactive_E_FFNN_smote_1_2_3' instead of creating a new one.[0m


Study statistics: 
  Number of finished trials:  3
  Number of pruned trials:  0
  Number of complete trials:  3
Best trial:
  Value:  0.14460005426053532
  Params: 
    dropout_l0: 0.4
    lr: 0.00033098226909741813
    n_layers: 1
    n_units_l0: 128
    optimizer: Nadam
    weight_decay: 0.004682740590130223

AUPRC test score: 0.13717944111635122



3-FOLD CROSS-VALIDATION AUPRC TEST SCORE: 0.13771


In [12]:
results_dict[cell_line][task][f'{model.__name__}_{type_augm_genfeatures}'] = kf_CV.scores_dict

In [13]:
with open ('results_dict.pickle', 'wb') as fout:
    pickle.dump(OrderedDict(results_dict), fout)

In [14]:
# IMBALANCED
type_augm_genfeatures = 'double'

kf_CV = Kfold_CV()

kf_CV(build_dataloader_pipeline = pipe_data_load,
                num_epochs = 100,
                n_folds=3,
                cell_line=cell_line,
                sequence=False,
                rebalancing=True,
                type_augm_genfeatures=type_augm_genfeatures,
                model = model,
                device = device,
                task=task,
                sampler = 'TPE',
                study_name = f'{cell_line}_{task}_{model.__name__}_{type_augm_genfeatures}',
                hp_model_path = f'{cell_line}_{task}_{model.__name__}_{type_augm_genfeatures}_HP',
                test_model_path = f'{cell_line}_{task}_{model.__name__}_{type_augm_genfeatures}_TEST')

  pruner=optuna.pruners.PatientPruner(optuna.pruners.MedianPruner(), patience=2),
[32m[I 2021-10-13 10:50:16,877][0m Using an existing study with name 'H1_active_E_vs_inactive_E_FFNN_double_1' instead of creating a new one.[0m
  pruner=optuna.pruners.PatientPruner(optuna.pruners.MedianPruner(), patience=2),
[32m[I 2021-10-13 10:50:17,002][0m Using an existing study with name 'H1_active_E_vs_inactive_E_FFNN_double_1_2' instead of creating a new one.[0m


>>> ITERATION N. 1

Study statistics: 
  Number of finished trials:  3
  Number of pruned trials:  0
  Number of complete trials:  3
Best trial:
  Value:  0.15334383490341033
  Params: 
    dropout_l0: 0.4
    dropout_l1: 0.0
    lr: 1.253072948190913e-05
    n_layers: 2
    n_units_l0: 128
    n_units_l1: 16
    optimizer: Nadam
    weight_decay: 0.0016254301389992494

AUPRC test score: 0.13928131486513817


>>> ITERATION N. 2



  pruner=optuna.pruners.PatientPruner(optuna.pruners.MedianPruner(), patience=2),
[32m[I 2021-10-13 10:50:17,135][0m Using an existing study with name 'H1_active_E_vs_inactive_E_FFNN_double_1_2_3' instead of creating a new one.[0m


Study statistics: 
  Number of finished trials:  3
  Number of pruned trials:  0
  Number of complete trials:  3
Best trial:
  Value:  0.13913836571283802
  Params: 
    dropout_l0: 0.2
    lr: 0.00024924548432218234
    n_layers: 1
    n_units_l0: 128
    optimizer: Nadam
    weight_decay: 0.006280417991924162

AUPRC test score: 0.1372390646023758


>>> ITERATION N. 3

Study statistics: 
  Number of finished trials:  3
  Number of pruned trials:  0
  Number of complete trials:  3
Best trial:
  Value:  0.15177493619535515
  Params: 
    dropout_l0: 0.4
    dropout_l1: 0.2
    lr: 0.02100121792008002
    n_layers: 2
    n_units_l0: 64
    n_units_l1: 32
    optimizer: Nadam
    weight_decay: 0.0002659498509965974

AUPRC test score: 0.13038272101745965



3-FOLD CROSS-VALIDATION AUPRC TEST SCORE: 0.13563


In [15]:
results_dict[cell_line][task][f'{model.__name__}_{type_augm_genfeatures}'] = kf_CV.scores_dict

In [16]:
with open ('results_dict.pickle', 'wb') as fout:
    pickle.dump(OrderedDict(results_dict), fout)

---
### 2. CNN

In [None]:
model=CNN

In [None]:
# IMBALANCED
kf_CV = Kfold_CV()

kf_CV(build_dataloader_pipeline = pipe_data_load,
                num_epochs = 100,
                n_folds=3,
                cell_line=cell_line,
                sequence=True,
                rebalancing=True,
                model = model,
                device = device,
                task=task,
                sampler = 'TPE',
                study_name = f'{cell_line}_{task}_{model.__name__}',
                hp_model_path = f'{cell_line}_{task}_{model.__name__}_HP',
                test_model_path = f'{cell_line}_{task}_{model.__name__}_TEST')

In [None]:
results_dict[cell_line][task][model.__name__] = kf_CV.scores_dict

In [None]:
with open ('results_dict.pickle', 'wb') as fout:
    pickle.dump(OrderedDict(results_dict), fout)

---

## 2) ACTIVE PROMOTERS vs INACTIVE PROMOTERS

In [17]:
task = TASKS[1]
task

'active_P_vs_inactive_P'

In [18]:
pipe_data_load = Build_DataLoader_Pipeline(path_name=f'{task}.pickle')

Data Preprocessing Done!


---

### 1. FFNN

In [19]:
model=FFNN

In [20]:
kf_CV = Kfold_CV()

kf_CV(build_dataloader_pipeline = pipe_data_load,
                num_epochs = 100,
                n_folds=3,
                cell_line=cell_line,
                sequence=False,
                rebalancing=False,
                model = model,
                device = device,
                task=task,
                sampler = 'TPE',
                study_name = f'{cell_line}_{task}_{model.__name__}',
                hp_model_path = f'{cell_line}_{task}_{model.__name__}_HP',
                test_model_path = f'{cell_line}_{task}_{model.__name__}_TEST')

  pruner=optuna.pruners.PatientPruner(optuna.pruners.MedianPruner(), patience=2),
[32m[I 2021-10-13 10:50:20,943][0m Using an existing study with name 'H1_active_P_vs_inactive_P_FFNN_1' instead of creating a new one.[0m


>>> ITERATION N. 1

Study statistics: 
  Number of finished trials:  3
  Number of pruned trials:  0
  Number of complete trials:  3
Best trial:
  Value:  0.3241264730308719
  Params: 
    dropout_l0: 0.0
    dropout_l1: 0.2
    dropout_l2: 0.0
    lr: 0.009037675517652628
    n_layers: 3
    n_units_l0: 64
    n_units_l1: 16
    n_units_l2: 32
    optimizer: Nadam
    weight_decay: 0.0005520243428649026

AUPRC test score: 0.31619000737035524


>>> ITERATION N. 2



  pruner=optuna.pruners.PatientPruner(optuna.pruners.MedianPruner(), patience=2),
[32m[I 2021-10-13 10:50:21,197][0m Using an existing study with name 'H1_active_P_vs_inactive_P_FFNN_1_2' instead of creating a new one.[0m
  pruner=optuna.pruners.PatientPruner(optuna.pruners.MedianPruner(), patience=2),
[32m[I 2021-10-13 10:50:21,327][0m Using an existing study with name 'H1_active_P_vs_inactive_P_FFNN_1_2_3' instead of creating a new one.[0m


Study statistics: 
  Number of finished trials:  3
  Number of pruned trials:  0
  Number of complete trials:  3
Best trial:
  Value:  0.3032875102016583
  Params: 
    dropout_l0: 0.2
    dropout_l1: 0.3
    lr: 0.0002157302945827545
    n_layers: 2
    n_units_l0: 32
    n_units_l1: 16
    optimizer: RMSprop
    weight_decay: 0.006730649826059255

AUPRC test score: 0.30335188810730684


>>> ITERATION N. 3

Study statistics: 
  Number of finished trials:  3
  Number of pruned trials:  0
  Number of complete trials:  3
Best trial:
  Value:  0.31308806893619645
  Params: 
    dropout_l0: 0.0
    dropout_l1: 0.3
    lr: 0.0026357951023454206
    n_layers: 2
    n_units_l0: 64
    n_units_l1: 32
    optimizer: Nadam
    weight_decay: 0.0005683050764990382

AUPRC test score: 0.3031252376140562



3-FOLD CROSS-VALIDATION AUPRC TEST SCORE: 0.30756


In [21]:
results_dict[cell_line][task][model.__name__] = kf_CV.scores_dict

In [22]:
with open ('results_dict.pickle', 'wb') as fout:
    pickle.dump(OrderedDict(results_dict), fout)

---
### 2. CNN

In [23]:
model=CNN

In [None]:
kf_CV = Kfold_CV()

kf_CV(build_dataloader_pipeline = pipe_data_load,
                num_epochs = 100,
                n_folds=3,
                cell_line=cell_line,
                sequence=True,
                rebalancing=False,
                model = model,
                device = device,
                task=task,
                sampler = 'TPE',
                study_name = f'{cell_line}_{task}_{model.__name__}',
                hp_model_path = f'{cell_line}_{task}_{model.__name__}_HP',
                test_model_path = f'{cell_line}_{task}_{model.__name__}_TEST')

In [None]:
results_dict[cell_line][task][model.__name__] = kf_CV.scores_dict

In [None]:
with open ('results_dict.pickle', 'wb') as fout:
    pickle.dump(OrderedDict(results_dict), fout)

---

## 3) ACTIVE ENHANCERS vs ACTIVE PROMOTERS

In [23]:
task = TASKS[2]
task

'active_E_vs_active_P'

In [24]:
pipe_data_load = Build_DataLoader_Pipeline(path_name=f'{task}.pickle')

Data Preprocessing Done!


---
### 1. FFNN

In [25]:
model=FFNN

In [26]:
kf_CV = Kfold_CV()

kf_CV(build_dataloader_pipeline = pipe_data_load,
                num_epochs = 100,
                n_folds=3,
                cell_line=cell_line,
                sequence=False,
                rebalancing=False,
                model = model,
                device = device,
                task=task,
                sampler = 'TPE',
                study_name = f'{cell_line}_{task}_{model.__name__}',
                hp_model_path = f'{cell_line}_{task}_{model.__name__}_HP',
                test_model_path = f'{cell_line}_{task}_{model.__name__}_TEST')

  pruner=optuna.pruners.PatientPruner(optuna.pruners.MedianPruner(), patience=2),
[32m[I 2021-10-13 10:50:24,845][0m Using an existing study with name 'H1_active_E_vs_active_P_FFNN_1' instead of creating a new one.[0m
  pruner=optuna.pruners.PatientPruner(optuna.pruners.MedianPruner(), patience=2),
[32m[I 2021-10-13 10:50:24,932][0m Using an existing study with name 'H1_active_E_vs_active_P_FFNN_1_2' instead of creating a new one.[0m


>>> ITERATION N. 1

Study statistics: 
  Number of finished trials:  3
  Number of pruned trials:  0
  Number of complete trials:  3
Best trial:
  Value:  0.5215143109913623
  Params: 
    dropout_l0: 0.3
    dropout_l1: 0.3
    lr: 0.00029092106191520785
    n_layers: 2
    n_units_l0: 64
    n_units_l1: 16
    optimizer: Nadam
    weight_decay: 0.0019222588041376612

AUPRC test score: 0.5289442208837357


>>> ITERATION N. 2

Study statistics: 
  Number of finished trials:  3
  Number of pruned trials:  0
  Number of complete trials:  3
Best trial:
  Value:  0.4584030939785722
  Params: 
    dropout_l0: 0.0
    dropout_l1: 0.3
    dropout_l2: 0.5
    lr: 2.5385159580588695e-05
    n_layers: 3
    n_units_l0: 256
    n_units_l1: 128
    n_units_l2: 16
    optimizer: Adam
    weight_decay: 0.00010734867803985216

AUPRC test score: 0.5437701611278858


>>> ITERATION N. 3



  pruner=optuna.pruners.PatientPruner(optuna.pruners.MedianPruner(), patience=2),
[32m[I 2021-10-13 10:50:25,036][0m Using an existing study with name 'H1_active_E_vs_active_P_FFNN_1_2_3' instead of creating a new one.[0m


Study statistics: 
  Number of finished trials:  3
  Number of pruned trials:  0
  Number of complete trials:  3
Best trial:
  Value:  0.5857303531652895
  Params: 
    dropout_l0: 0.2
    dropout_l1: 0.2
    dropout_l2: 0.5
    lr: 0.018021377720657743
    n_layers: 3
    n_units_l0: 64
    n_units_l1: 32
    n_units_l2: 64
    optimizer: RMSprop
    weight_decay: 0.001050481055350573

AUPRC test score: 0.4917285704849175



3-FOLD CROSS-VALIDATION AUPRC TEST SCORE: 0.52148


In [27]:
results_dict[cell_line][task][model.__name__] = kf_CV.scores_dict

In [28]:
with open ('results_dict.pickle', 'wb') as fout:
    pickle.dump(OrderedDict(results_dict), fout)

---
### 2. CNN

In [None]:
model=CNN

In [None]:
kf_CV = Kfold_CV()

kf_CV(build_dataloader_pipeline = pipe_data_load,
                num_epochs = 100,
                n_folds=3,
                cell_line=cell_line,
                sequence=True,
                rebalancing=False,
                model = model,
                device = device,
                task=task,
                sampler = 'TPE',
                study_name = f'{cell_line}_{task}_{model.__name__}',
                hp_model_path = f'{cell_line}_{task}_{model.__name__}_HP',
                test_model_path = f'{cell_line}_{task}_{model.__name__}_TEST')

In [None]:
results_dict[cell_line][task][model.__name__] = kf_CV.scores_dict

In [None]:
with open ('results_dict.pickle', 'wb') as fout:
    pickle.dump(OrderedDict(results_dict), fout)

---

## 4) INACTIVE ENHANCERS vs INACTIVE PROMOTERS

In [29]:
task = TASKS[3]
task

'inactive_E_vs_inactive_P'

In [30]:
pipe_data_load = Build_DataLoader_Pipeline(path_name=f'{task}.pickle')

Data Preprocessing Done!


---
### 1. FFNN

In [31]:
model=FFNN

In [32]:
kf_CV = Kfold_CV()

kf_CV(build_dataloader_pipeline = pipe_data_load,
                num_epochs = 100,
                n_folds=3,
                cell_line=cell_line,
                sequence=False,
                rebalancing=False,
                model = model,
                device = device,
                task=task,
                sampler = 'TPE',
                study_name = f'{cell_line}_{task}_{model.__name__}',
                hp_model_path = f'{cell_line}_{task}_{model.__name__}_HP',
                test_model_path = f'{cell_line}_{task}_{model.__name__}_TEST')

  pruner=optuna.pruners.PatientPruner(optuna.pruners.MedianPruner(), patience=2),
[32m[I 2021-10-13 10:50:28,721][0m Using an existing study with name 'H1_inactive_E_vs_inactive_P_FFNN_1' instead of creating a new one.[0m


>>> ITERATION N. 1

Study statistics: 
  Number of finished trials:  3
  Number of pruned trials:  0
  Number of complete trials:  3
Best trial:
  Value:  0.5539316174877712
  Params: 
    dropout_l0: 0.3
    lr: 1.963841167728838e-05
    n_layers: 1
    n_units_l0: 64
    optimizer: RMSprop
    weight_decay: 0.0004610935638552295

AUPRC test score: 0.5539860189418125


>>> ITERATION N. 2


  pruner=optuna.pruners.PatientPruner(optuna.pruners.MedianPruner(), patience=2),
[32m[I 2021-10-13 10:50:28,910][0m Using an existing study with name 'H1_inactive_E_vs_inactive_P_FFNN_1_2' instead of creating a new one.[0m



Study statistics: 
  Number of finished trials:  3
  Number of pruned trials:  0
  Number of complete trials:  3
Best trial:
  Value:  0.580835764183374
  Params: 
    dropout_l0: 0.2
    dropout_l1: 0.2
    dropout_l2: 0.0
    dropout_l3: 0.4
    lr: 7.226344950264085e-05
    n_layers: 4
    n_units_l0: 32
    n_units_l1: 128
    n_units_l2: 16
    n_units_l3: 32
    optimizer: Nadam
    weight_decay: 0.0001457182751477659

AUPRC test score: 0.587158240612605


>>> ITERATION N. 3



  pruner=optuna.pruners.PatientPruner(optuna.pruners.MedianPruner(), patience=2),
[32m[I 2021-10-13 10:50:29,075][0m Using an existing study with name 'H1_inactive_E_vs_inactive_P_FFNN_1_2_3' instead of creating a new one.[0m


Study statistics: 
  Number of finished trials:  3
  Number of pruned trials:  0
  Number of complete trials:  3
Best trial:
  Value:  0.5840213111118733
  Params: 
    dropout_l0: 0.0
    dropout_l1: 0.3
    dropout_l2: 0.0
    lr: 0.03678382860526816
    n_layers: 3
    n_units_l0: 64
    n_units_l1: 64
    n_units_l2: 4
    optimizer: Nadam
    weight_decay: 0.0001407247077991582

AUPRC test score: 0.5780008858403396



3-FOLD CROSS-VALIDATION AUPRC TEST SCORE: 0.57305


In [33]:
results_dict[cell_line][task][model.__name__] = kf_CV.scores_dict

In [34]:
with open ('results_dict.pickle', 'wb') as fout:
    pickle.dump(OrderedDict(results_dict), fout)

---
### 2. CNN

In [None]:
model=CNN

In [None]:
kf_CV = Kfold_CV()

kf_CV(build_dataloader_pipeline = pipe_data_load,
                num_epochs = 100,
                n_folds=3,
                cell_line=cell_line,
                sequence=True,
                rebalancing=False,
                model = model,
                device = device,
                task=task,
                sampler = 'TPE',
                study_name = f'{cell_line}_{task}_{model.__name__}',
                hp_model_path = f'{cell_line}_{task}_{model.__name__}_HP',
                test_model_path = f'{cell_line}_{task}_{model.__name__}_TEST')

In [None]:
results_dict[cell_line][task][model.__name__] = kf_CV.scores_dict

In [None]:
with open ('results_dict.pickle', 'wb') as fout:
    pickle.dump(OrderedDict(results_dict), fout)

---

## 5) ACTIVE ENHANCERS + ACTIVE PROMOTERS vs INACTIVE REST

In [35]:
task = TASKS[4]
task

'active_EP_vs_inactive_rest'

In [36]:
pipe_data_load = Build_DataLoader_Pipeline(path_name=f'{task}.pickle')

Data Preprocessing Done!


---
### 1. FFNN

In [37]:
model=FFNN

In [38]:
kf_CV = Kfold_CV()

kf_CV(build_dataloader_pipeline = pipe_data_load,
                num_epochs = 100,
                n_folds=3,
                cell_line=cell_line,
                sequence=False,
                rebalancing=False,
                model = model,
                device = device,
                task=task,
                sampler = 'TPE',
                study_name = f'{cell_line}_{task}_{model.__name__}',
                hp_model_path = f'{cell_line}_{task}_{model.__name__}_HP',
                test_model_path = f'{cell_line}_{task}_{model.__name__}_TEST')

  pruner=optuna.pruners.PatientPruner(optuna.pruners.MedianPruner(), patience=2),
[32m[I 2021-10-13 10:50:32,935][0m Using an existing study with name 'H1_active_EP_vs_inactive_rest_FFNN_1' instead of creating a new one.[0m


>>> ITERATION N. 1

Study statistics: 
  Number of finished trials:  3
  Number of pruned trials:  0
  Number of complete trials:  3
Best trial:
  Value:  0.2675668768085411
  Params: 
    dropout_l0: 0.0
    dropout_l1: 0.2
    lr: 0.05534607967855561
    n_layers: 2
    n_units_l0: 32
    n_units_l1: 128
    optimizer: Adam
    weight_decay: 0.012774346316148447

AUPRC test score: 0.2690144139939069


>>> ITERATION N. 2


  pruner=optuna.pruners.PatientPruner(optuna.pruners.MedianPruner(), patience=2),
[32m[I 2021-10-13 10:50:33,119][0m Using an existing study with name 'H1_active_EP_vs_inactive_rest_FFNN_1_2' instead of creating a new one.[0m



Study statistics: 
  Number of finished trials:  3
  Number of pruned trials:  0
  Number of complete trials:  3
Best trial:
  Value:  0.2673123279054717
  Params: 
    dropout_l0: 0.4
    dropout_l1: 0.2
    lr: 0.00015746420748457833
    n_layers: 2
    n_units_l0: 256
    n_units_l1: 16
    optimizer: Nadam
    weight_decay: 0.0020966559930635058



  pruner=optuna.pruners.PatientPruner(optuna.pruners.MedianPruner(), patience=2),
[32m[I 2021-10-13 10:50:33,263][0m Using an existing study with name 'H1_active_EP_vs_inactive_rest_FFNN_1_2_3' instead of creating a new one.[0m


AUPRC test score: 0.26152603458960505


>>> ITERATION N. 3

Study statistics: 
  Number of finished trials:  3
  Number of pruned trials:  0
  Number of complete trials:  3
Best trial:
  Value:  0.27797629861424483
  Params: 
    dropout_l0: 0.2
    dropout_l1: 0.2
    dropout_l2: 0.0
    dropout_l3: 0.0
    lr: 0.0003338933709186161
    n_layers: 4
    n_units_l0: 128
    n_units_l1: 16
    n_units_l2: 4
    n_units_l3: 16
    optimizer: Adam
    weight_decay: 0.0027890642111254786

AUPRC test score: 0.09906993116395493



3-FOLD CROSS-VALIDATION AUPRC TEST SCORE: 0.20987


In [39]:
results_dict[cell_line][task][model.__name__] = kf_CV.scores_dict

In [40]:
with open ('results_dict.pickle', 'wb') as fout:
    pickle.dump(OrderedDict(results_dict), fout)

---
### 2. CNN

In [None]:
model=CNN

In [None]:
kf_CV = Kfold_CV()

kf_CV(build_dataloader_pipeline = pipe_data_load,
                num_epochs = 100,
                n_folds=3,
                cell_line=cell_line,
                sequence=True,
                rebalancing=False,
                model = model,
                device = device,
                task=task,
                sampler = 'TPE',
                study_name = f'{cell_line}_{task}_{model.__name__}',
                hp_model_path = f'{cell_line}_{task}_{model.__name__}_HP',
                test_model_path = f'{cell_line}_{task}_{model.__name__}_TEST')

In [None]:
results_dict[cell_line][task][model.__name__] = kf_CV.scores_dict

In [None]:
with open ('results_dict.pickle', 'wb') as fout:
    pickle.dump(OrderedDict(results_dict), fout)