## BIOINFORMATICS THESIS: MULTIMODAL NEURAL NETWORK

# CELL LINE: H1

In [8]:
import pandas as pd 
import numpy as np
import os
np.seterr(divide='ignore', invalid='ignore')

import torch
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
import torch.nn as nn
import torch.optim as optim
from collections import defaultdict, OrderedDict
import pickle

import sqlite3
from sqlalchemy import create_engine

device = 'cuda' if torch.cuda.is_available() else 'cpu'

In [9]:
# create a database to store optuna studies with sqlite backend
#engine = create_engine('sqlite:///BIOINF_optuna_tuning.db')

In [10]:
from BIOINF_tesi.data_pipe import CELL_LINES, TASKS

In [11]:
cell_line = CELL_LINES[2]
cell_line

'H1'

---

In [12]:
from BIOINF_tesi.data_pipe import Load_Create_Task
from BIOINF_tesi.data_pipe import Build_DataLoader_Pipeline

In [13]:
from BIOINF_tesi.models import FFNN, CNN, CNN_LSTM
from BIOINF_tesi.models.utils import fit, Param_Search, Kfold_CV

In [14]:
with open ('results_dict.pickle', 'rb') as fin:
    results_dict = pickle.load(fin)
    results_dict = defaultdict(lambda: defaultdict(dict), results_dict)

## 1) ACTIVE ENHANCERS vs INACTIVE ENHANCERS

In [15]:
task = TASKS[0]
task

'active_E_vs_inactive_E'

In [16]:
pipe_data_load = Build_DataLoader_Pipeline(path_name=f'{task}.pickle')

Data Preprocessing Done!




### 1. FFNN

In [None]:
model=FFNN

In [None]:
kf_CV = Kfold_CV()

kf_CV(build_dataloader_pipeline = pipe_data_load,
                num_epochs = 100,
                n_folds=3,
                cell_line=cell_line,
                sequence=False,
                augmentation=True,
                model = model,
                device = device,
                study_name = f'{cell_line}_{task}_{model.__name__}',
                hp_model_path = f'{cell_line}_{task}_{model.__name__}_HP.pt',
                test_model_path = f'{cell_line}_{task}_{model.__name__}_TEST.pt')

In [None]:
results_dict[cell_line][task][model.__name__] = kf_CV.scores_dict

In [None]:
with open ('results_dict.pickle', 'wb') as fout:
    pickle.dump(OrderedDict(results_dict), fout)

---
### 2. CNN

In [32]:
model=CNN

In [33]:
kf_CV = Kfold_CV()

kf_CV(build_dataloader_pipeline = pipe_data_load,
                num_epochs = 100,
                n_folds=3,
                cell_line=cell_line,
                task=task,
                sequence=True,
                augmentation=True,
                model = model,
                device = device,
                sampler = 'TPE',
                study_name = f'{cell_line}_{task}_{model.__name__}',
                hp_model_path = f'{cell_line}_{task}_{model.__name__}_HP.pt',
                test_model_path = f'{cell_line}_{task}_{model.__name__}_TEST.pt')

>>> ITERATION N. 1



  pruner=optuna.pruners.PatientPruner(optuna.pruners.MedianPruner(), patience=2),
[32m[I 2021-10-01 11:40:19,974][0m A new study created in RDB with name: H1_active_E_vs_inactive_E_CNN_1[0m


Epochs:   0%|          | 0/100 [00:00<?, ?it/s]

EarlyStopping counter: 1 out of 5
EarlyStopping counter: 1 out of 5
EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 4 out of 5


[32m[I 2021-10-01 11:47:45,246][0m Trial 0 finished with value: 0.09625769016971096 and parameters: {'n_layers': 2, 'out_channels_l0': 16, 'kernel_size_l0': 5, 'dropout_l0': 0.2, 'out_channels_l1': 96, 'kernel_size_l1': 5, 'dropout_l1': 0.5, 'optimizer': 'Nadam', 'lr': 1.1465493564666274e-05, 'weight_decay': 0.0009246569089574612}. Best is trial 0 with value: 0.09625769016971096.[0m


EarlyStopping counter: 5 out of 5
Early stopping the training


Epochs:   0%|          | 0/100 [00:00<?, ?it/s]

EarlyStopping counter: 1 out of 5
EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 4 out of 5


[32m[I 2021-10-01 11:53:53,970][0m Trial 1 finished with value: 0.09206570978578021 and parameters: {'n_layers': 2, 'out_channels_l0': 64, 'kernel_size_l0': 15, 'dropout_l0': 0, 'out_channels_l1': 64, 'kernel_size_l1': 5, 'dropout_l1': 0, 'optimizer': 'Adam', 'lr': 0.0021447675032714732, 'weight_decay': 0.010983619369194349}. Best is trial 0 with value: 0.09625769016971096.[0m


EarlyStopping counter: 5 out of 5
Early stopping the training


Epochs:   0%|          | 0/100 [00:00<?, ?it/s]

EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 4 out of 5


[32m[I 2021-10-01 11:57:45,997][0m Trial 2 finished with value: 0.09256784684446241 and parameters: {'n_layers': 1, 'out_channels_l0': 32, 'kernel_size_l0': 11, 'dropout_l0': 0.2, 'optimizer': 'Nadam', 'lr': 0.00030666019560522884, 'weight_decay': 0.0028279063873762047}. Best is trial 0 with value: 0.09625769016971096.[0m


EarlyStopping counter: 5 out of 5
Early stopping the training
Study statistics: 
  Number of finished trials:  3
  Number of pruned trials:  0
  Number of complete trials:  3
Best trial:
  Value:  0.09625769016971096
  Params: 
    dropout_l0: 0.2
    dropout_l1: 0.5
    kernel_size_l0: 5
    kernel_size_l1: 5
    lr: 1.1465493564666274e-05
    n_layers: 2
    optimizer: Nadam
    out_channels_l0: 16
    out_channels_l1: 96
    weight_decay: 0.0009246569089574612



Epochs:   0%|          | 0/100 [00:00<?, ?it/s]

EarlyStopping counter: 1 out of 5
EarlyStopping counter: 1 out of 5
EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 4 out of 5
EarlyStopping counter: 5 out of 5
Early stopping the training
AUPRC test score: 0.09354446762173997


>>> ITERATION N. 2



  pruner=optuna.pruners.PatientPruner(optuna.pruners.MedianPruner(), patience=2),
[32m[I 2021-10-01 12:11:58,590][0m A new study created in RDB with name: H1_active_E_vs_inactive_E_CNN_1_2[0m


Epochs:   0%|          | 0/100 [00:00<?, ?it/s]

EarlyStopping counter: 1 out of 5
EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 4 out of 5
EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 4 out of 5


[32m[I 2021-10-01 12:21:00,805][0m Trial 0 finished with value: 0.04798415492957746 and parameters: {'n_layers': 1, 'out_channels_l0': 64, 'kernel_size_l0': 11, 'dropout_l0': 0.4, 'optimizer': 'RMSprop', 'lr': 0.023632764060451936, 'weight_decay': 0.014132701763788703}. Best is trial 0 with value: 0.04798415492957746.[0m


EarlyStopping counter: 5 out of 5
Early stopping the training


Epochs:   0%|          | 0/100 [00:00<?, ?it/s]

EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 4 out of 5


[32m[I 2021-10-01 12:25:25,044][0m Trial 1 finished with value: 0.04798415492957748 and parameters: {'n_layers': 1, 'out_channels_l0': 32, 'kernel_size_l0': 5, 'dropout_l0': 0.2, 'optimizer': 'RMSprop', 'lr': 0.09144936920661499, 'weight_decay': 0.01176516229528901}. Best is trial 1 with value: 0.04798415492957748.[0m


EarlyStopping counter: 5 out of 5
Early stopping the training


Epochs:   0%|          | 0/100 [00:00<?, ?it/s]

EarlyStopping counter: 1 out of 5
EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 4 out of 5


[32m[I 2021-10-01 12:35:38,799][0m Trial 2 finished with value: 0.048133802816901404 and parameters: {'n_layers': 2, 'out_channels_l0': 16, 'kernel_size_l0': 11, 'dropout_l0': 0.2, 'out_channels_l1': 96, 'kernel_size_l1': 15, 'dropout_l1': 0, 'optimizer': 'RMSprop', 'lr': 0.016709426400280756, 'weight_decay': 0.032184745676596065}. Best is trial 2 with value: 0.048133802816901404.[0m


EarlyStopping counter: 5 out of 5
Early stopping the training
Study statistics: 
  Number of finished trials:  3
  Number of pruned trials:  0
  Number of complete trials:  3
Best trial:
  Value:  0.048133802816901404
  Params: 
    dropout_l0: 0.2
    dropout_l1: 0
    kernel_size_l0: 11
    kernel_size_l1: 15
    lr: 0.016709426400280756
    n_layers: 2
    optimizer: RMSprop
    out_channels_l0: 16
    out_channels_l1: 96
    weight_decay: 0.032184745676596065



Epochs:   0%|          | 0/100 [00:00<?, ?it/s]

EarlyStopping counter: 1 out of 5
EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 4 out of 5


  pruner=optuna.pruners.PatientPruner(optuna.pruners.MedianPruner(), patience=2),
[32m[I 2021-10-01 12:43:52,699][0m A new study created in RDB with name: H1_active_E_vs_inactive_E_CNN_1_2_3[0m


EarlyStopping counter: 5 out of 5
Early stopping the training
AUPRC test score: 0.04773336643495529


>>> ITERATION N. 3



Epochs:   0%|          | 0/100 [00:00<?, ?it/s]

EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 4 out of 5


[32m[I 2021-10-01 12:48:05,502][0m Trial 0 finished with value: 0.10383787266835554 and parameters: {'n_layers': 1, 'out_channels_l0': 64, 'kernel_size_l0': 5, 'dropout_l0': 0.4, 'optimizer': 'Nadam', 'lr': 0.0034196905881384073, 'weight_decay': 0.00017740089252482645}. Best is trial 0 with value: 0.10383787266835554.[0m


EarlyStopping counter: 5 out of 5
Early stopping the training


Epochs:   0%|          | 0/100 [00:00<?, ?it/s]

EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 4 out of 5


[32m[I 2021-10-01 12:54:34,638][0m Trial 1 finished with value: 0.08121615442064589 and parameters: {'n_layers': 2, 'out_channels_l0': 16, 'kernel_size_l0': 11, 'dropout_l0': 0.4, 'out_channels_l1': 64, 'kernel_size_l1': 15, 'dropout_l1': 0, 'optimizer': 'Adam', 'lr': 0.00044156705581681446, 'weight_decay': 0.009688752946197641}. Best is trial 0 with value: 0.10383787266835554.[0m


EarlyStopping counter: 5 out of 5
Early stopping the training


Epochs:   0%|          | 0/100 [00:00<?, ?it/s]

EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 4 out of 5


[32m[I 2021-10-01 13:05:48,909][0m Trial 2 finished with value: 0.077431832283111 and parameters: {'n_layers': 2, 'out_channels_l0': 16, 'kernel_size_l0': 11, 'dropout_l0': 0, 'out_channels_l1': 96, 'kernel_size_l1': 5, 'dropout_l1': 0, 'optimizer': 'RMSprop', 'lr': 0.0009489979362388785, 'weight_decay': 0.004639222794243578}. Best is trial 0 with value: 0.10383787266835554.[0m


EarlyStopping counter: 5 out of 5
Early stopping the training
Study statistics: 
  Number of finished trials:  3
  Number of pruned trials:  0
  Number of complete trials:  3
Best trial:
  Value:  0.10383787266835554
  Params: 
    dropout_l0: 0.4
    kernel_size_l0: 5
    lr: 0.0034196905881384073
    n_layers: 1
    optimizer: Nadam
    out_channels_l0: 64
    weight_decay: 0.00017740089252482645



Epochs:   0%|          | 0/100 [00:00<?, ?it/s]

EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 4 out of 5
EarlyStopping counter: 5 out of 5
Early stopping the training
AUPRC test score: 0.06389892808835214



3-FOLD CROSS-VALIDATION AUPRC TEST SCORE: 0.06839


In [34]:
results_dict[cell_line][task][model.__name__] = kf_CV.scores_dict

In [35]:
with open ('results_dict.pickle', 'wb') as fout:
    pickle.dump(OrderedDict(results_dict), fout)

---
### 3. CNN-LSTM

In [None]:
model=CNN_LSTM

In [None]:
kf_CV = Kfold_CV()

kf_CV(build_dataloader_pipeline = pipe_data_load,
                num_epochs = 100,
                n_folds=3,
                cell_line=cell_line,
                task=task,
                sequence=True,
                augmentation=False,
                model = model,
                device = device,
                sampler = 'TPE',
                study_name = f'{cell_line}_{task}_{model.__name__}',
                hp_model_path = f'{cell_line}_{task}_{model.__name__}_HP.pt',
                test_model_path = f'{cell_line}_{task}_{model.__name__}_TEST.pt')

In [None]:
results_dict[cell_line][task][model.__name__] = kf_CV.scores_dict

In [None]:
with open ('results_dict.pickle', 'wb') as fout:
    pickle.dump(OrderedDict(results_dict), fout)

---

## 2) ACTIVE PROMOTERS vs INACTIVE PROMOTERS

In [36]:
task = TASKS[1]
task

'active_P_vs_inactive_P'

In [37]:
pipe_data_load = Build_DataLoader_Pipeline(path_name=f'{task}.pickle')

Data Preprocessing Done!




---

### 1. FFNN

In [None]:
model=FFNN

In [None]:
kf_CV = Kfold_CV()

kf_CV(build_dataloader_pipeline = pipe_data_load,
                num_epochs = 100,
                n_folds=3,
                cell_line=cell_line,
                sequence=False,
                augmentation=False,
                model = model,
                device = device,
                study_name = f'{cell_line}_{task}_{model.__name__}',
                hp_model_path = f'{cell_line}_{task}_{model.__name__}_HP.pt',
                test_model_path = f'{cell_line}_{task}_{model.__name__}_TEST.pt')

In [None]:
results_dict[cell_line][task][model.__name__] = kf_CV.scores_dict

In [None]:
with open ('results_dict.pickle', 'wb') as fout:
    pickle.dump(OrderedDict(results_dict), fout)

---
### 2. CNN

In [38]:
model=CNN

In [39]:
kf_CV = Kfold_CV()

kf_CV(build_dataloader_pipeline = pipe_data_load,
                num_epochs = 100,
                n_folds=3,
                cell_line=cell_line,
                task=task,
                sequence=True,
                augmentation=False,
                model = model,
                device = device,
                sampler = 'TPE',
                study_name = f'{cell_line}_{task}_{model.__name__}',
                hp_model_path = f'{cell_line}_{task}_{model.__name__}_HP.pt',
                test_model_path = f'{cell_line}_{task}_{model.__name__}_TEST.pt')

  pruner=optuna.pruners.PatientPruner(optuna.pruners.MedianPruner(), patience=2),
[32m[I 2021-10-01 13:13:32,141][0m A new study created in RDB with name: H1_active_P_vs_inactive_P_CNN_1[0m


>>> ITERATION N. 1



Epochs:   0%|          | 0/100 [00:00<?, ?it/s]

EarlyStopping counter: 1 out of 5
EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 4 out of 5


[32m[I 2021-10-01 13:24:20,018][0m Trial 0 finished with value: 0.13448336091193236 and parameters: {'n_layers': 1, 'out_channels_l0': 64, 'kernel_size_l0': 15, 'dropout_l0': 0, 'optimizer': 'Nadam', 'lr': 0.0072956909925684064, 'weight_decay': 0.04418167630659099}. Best is trial 0 with value: 0.13448336091193236.[0m


EarlyStopping counter: 5 out of 5
Early stopping the training


Epochs:   0%|          | 0/100 [00:00<?, ?it/s]

EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 4 out of 5


[32m[I 2021-10-01 13:30:09,582][0m Trial 1 finished with value: 0.13959267695571628 and parameters: {'n_layers': 1, 'out_channels_l0': 32, 'kernel_size_l0': 11, 'dropout_l0': 0.2, 'optimizer': 'Adam', 'lr': 1.1866650377699289e-05, 'weight_decay': 0.0001403406387809047}. Best is trial 1 with value: 0.13959267695571628.[0m


EarlyStopping counter: 5 out of 5
Early stopping the training


Epochs:   0%|          | 0/100 [00:00<?, ?it/s]

EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 4 out of 5


[32m[I 2021-10-01 13:38:53,157][0m Trial 2 finished with value: 0.16608732667995563 and parameters: {'n_layers': 1, 'out_channels_l0': 32, 'kernel_size_l0': 15, 'dropout_l0': 0, 'optimizer': 'Nadam', 'lr': 0.0005516445082442662, 'weight_decay': 0.001301118849981746}. Best is trial 2 with value: 0.16608732667995563.[0m


EarlyStopping counter: 5 out of 5
Early stopping the training
Study statistics: 
  Number of finished trials:  3
  Number of pruned trials:  0
  Number of complete trials:  3
Best trial:
  Value:  0.16608732667995563
  Params: 
    dropout_l0: 0
    kernel_size_l0: 15
    lr: 0.0005516445082442662
    n_layers: 1
    optimizer: Nadam
    out_channels_l0: 32
    weight_decay: 0.001301118849981746



Epochs:   0%|          | 0/100 [00:00<?, ?it/s]

EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 4 out of 5


  pruner=optuna.pruners.PatientPruner(optuna.pruners.MedianPruner(), patience=2),
[32m[I 2021-10-01 13:47:38,543][0m A new study created in RDB with name: H1_active_P_vs_inactive_P_CNN_1_2[0m


EarlyStopping counter: 5 out of 5
Early stopping the training
AUPRC test score: 0.16383131326251466


>>> ITERATION N. 2



Epochs:   0%|          | 0/100 [00:00<?, ?it/s]

EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 4 out of 5
EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 4 out of 5


[32m[I 2021-10-01 14:01:33,153][0m Trial 0 finished with value: 0.1547374031416341 and parameters: {'n_layers': 4, 'out_channels_l0': 32, 'kernel_size_l0': 15, 'dropout_l0': 0, 'out_channels_l1': 96, 'kernel_size_l1': 11, 'dropout_l1': 0, 'out_channels_l2': 96, 'kernel_size_l2': 15, 'dropout_l2': 0.5, 'out_channels_l3': 128, 'kernel_size_l3': 5, 'dropout_l3': 0.5, 'optimizer': 'Nadam', 'lr': 0.08291074138225955, 'weight_decay': 0.0014609757165821677}. Best is trial 0 with value: 0.1547374031416341.[0m


EarlyStopping counter: 5 out of 5
Early stopping the training


Epochs:   0%|          | 0/100 [00:00<?, ?it/s]

EarlyStopping counter: 1 out of 5
EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 4 out of 5


[32m[I 2021-10-01 14:10:07,810][0m Trial 1 finished with value: 0.15563833150290746 and parameters: {'n_layers': 1, 'out_channels_l0': 64, 'kernel_size_l0': 11, 'dropout_l0': 0.3, 'optimizer': 'Adam', 'lr': 0.00015524335404584936, 'weight_decay': 0.00014885666001690376}. Best is trial 1 with value: 0.15563833150290746.[0m


EarlyStopping counter: 5 out of 5
Early stopping the training


Epochs:   0%|          | 0/100 [00:00<?, ?it/s]

EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 4 out of 5


[32m[I 2021-10-01 14:15:35,072][0m Trial 2 finished with value: 0.14273814672030893 and parameters: {'n_layers': 1, 'out_channels_l0': 16, 'kernel_size_l0': 15, 'dropout_l0': 0, 'optimizer': 'Nadam', 'lr': 1.1473178529867119e-05, 'weight_decay': 0.010407020629212074}. Best is trial 1 with value: 0.15563833150290746.[0m


EarlyStopping counter: 5 out of 5
Early stopping the training
Study statistics: 
  Number of finished trials:  3
  Number of pruned trials:  0
  Number of complete trials:  3
Best trial:
  Value:  0.15563833150290746
  Params: 
    dropout_l0: 0.3
    kernel_size_l0: 11
    lr: 0.00015524335404584936
    n_layers: 1
    optimizer: Adam
    out_channels_l0: 64
    weight_decay: 0.00014885666001690376



Epochs:   0%|          | 0/100 [00:00<?, ?it/s]

EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 4 out of 5


  pruner=optuna.pruners.PatientPruner(optuna.pruners.MedianPruner(), patience=2),
[32m[I 2021-10-01 14:26:54,811][0m A new study created in RDB with name: H1_active_P_vs_inactive_P_CNN_1_2_3[0m


EarlyStopping counter: 5 out of 5
Early stopping the training
AUPRC test score: 0.15264495000223485


>>> ITERATION N. 3



Epochs:   0%|          | 0/100 [00:00<?, ?it/s]

EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 4 out of 5


[32m[I 2021-10-01 14:34:23,871][0m Trial 0 finished with value: 0.1346175767604339 and parameters: {'n_layers': 1, 'out_channels_l0': 64, 'kernel_size_l0': 5, 'dropout_l0': 0, 'optimizer': 'Nadam', 'lr': 0.047053934541099525, 'weight_decay': 0.0774196240302065}. Best is trial 0 with value: 0.1346175767604339.[0m


EarlyStopping counter: 5 out of 5
Early stopping the training


Epochs:   0%|          | 0/100 [00:00<?, ?it/s]

EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 4 out of 5


[32m[I 2021-10-01 14:40:46,228][0m Trial 1 finished with value: 0.22473176942367348 and parameters: {'n_layers': 2, 'out_channels_l0': 64, 'kernel_size_l0': 15, 'dropout_l0': 0.3, 'out_channels_l1': 32, 'kernel_size_l1': 11, 'dropout_l1': 0, 'optimizer': 'Nadam', 'lr': 4.739550963035682e-05, 'weight_decay': 0.015518033673366122}. Best is trial 1 with value: 0.22473176942367348.[0m


EarlyStopping counter: 5 out of 5
Early stopping the training


Epochs:   0%|          | 0/100 [00:00<?, ?it/s]

EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 4 out of 5


[32m[I 2021-10-01 14:48:30,686][0m Trial 2 finished with value: 0.13554150054150052 and parameters: {'n_layers': 1, 'out_channels_l0': 32, 'kernel_size_l0': 5, 'dropout_l0': 0, 'optimizer': 'RMSprop', 'lr': 7.051489803658327e-05, 'weight_decay': 0.00037903806020044803}. Best is trial 1 with value: 0.22473176942367348.[0m


EarlyStopping counter: 5 out of 5
Early stopping the training
Study statistics: 
  Number of finished trials:  3
  Number of pruned trials:  0
  Number of complete trials:  3
Best trial:
  Value:  0.22473176942367348
  Params: 
    dropout_l0: 0.3
    dropout_l1: 0
    kernel_size_l0: 15
    kernel_size_l1: 11
    lr: 4.739550963035682e-05
    n_layers: 2
    optimizer: Nadam
    out_channels_l0: 64
    out_channels_l1: 32
    weight_decay: 0.015518033673366122



Epochs:   0%|          | 0/100 [00:00<?, ?it/s]

EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 4 out of 5
EarlyStopping counter: 5 out of 5
Early stopping the training
AUPRC test score: 0.2189019667140438



3-FOLD CROSS-VALIDATION AUPRC TEST SCORE: 0.17846


In [40]:
results_dict[cell_line][task][model.__name__] = kf_CV.scores_dict

In [41]:
with open ('results_dict.pickle', 'wb') as fout:
    pickle.dump(OrderedDict(results_dict), fout)

---
### 3. CNN-LSTM

In [None]:
model=CNN_LSTM

In [None]:
kf_CV = Kfold_CV()

kf_CV(build_dataloader_pipeline = pipe_data_load,
                num_epochs = 100,
                n_folds=3,
                cell_line=cell_line,
                task=task,
                sequence=True,
                augmentation=False,
                model = model,
                device = device,
                sampler = 'TPE',
                study_name = f'{cell_line}_{task}_{model.__name__}',
                hp_model_path = f'{cell_line}_{task}_{model.__name__}_HP.pt',
                test_model_path = f'{cell_line}_{task}_{model.__name__}_TEST.pt')

In [None]:
results_dict[cell_line][task][model.__name__] = kf_CV.scores_dict

In [None]:
with open ('results_dict.pickle', 'wb') as fout:
    pickle.dump(OrderedDict(results_dict), fout)

---

## 3) ACTIVE ENHANCERS vs ACTIVE PROMOTERS

In [42]:
task = TASKS[2]
task

'active_E_vs_active_P'

In [43]:
pipe_data_load = Build_DataLoader_Pipeline(path_name=f'{task}.pickle')

Data Preprocessing Done!




---
### 1. FFNN

In [None]:
model=FFNN

In [None]:
kf_CV = Kfold_CV()

kf_CV(build_dataloader_pipeline = pipe_data_load,
                num_epochs = 100,
                n_folds=3,
                cell_line=cell_line,
                sequence=False,
                augmentation=False,
                random_state=32,
                model = model,
                device = device,
                study_name = f'{cell_line}_{task}_{model.__name__}',
                hp_model_path = f'{cell_line}_{task}_{model.__name__}_HP.pt',
                test_model_path = f'{cell_line}_{task}_{model.__name__}_TEST.pt')

In [None]:
results_dict[cell_line][task][model.__name__] = kf_CV.scores_dict

In [None]:
with open ('results_dict.pickle', 'wb') as fout:
    pickle.dump(OrderedDict(results_dict), fout)

---
### 2. CNN

In [44]:
model=CNN

In [45]:
kf_CV = Kfold_CV()

kf_CV(build_dataloader_pipeline = pipe_data_load,
                num_epochs = 100,
                n_folds=3,
                cell_line=cell_line,
                task=task,
                sequence=True,
                augmentation=False,
                model = model,
                device = device,
                sampler = 'TPE',
                study_name = f'{cell_line}_{task}_{model.__name__}',
                hp_model_path = f'{cell_line}_{task}_{model.__name__}_HP.pt',
                test_model_path = f'{cell_line}_{task}_{model.__name__}_TEST.pt')

  pruner=optuna.pruners.PatientPruner(optuna.pruners.MedianPruner(), patience=2),
[32m[I 2021-10-01 14:58:08,200][0m A new study created in RDB with name: H1_active_E_vs_active_P_CNN_1[0m


>>> ITERATION N. 1



Epochs:   0%|          | 0/100 [00:00<?, ?it/s]

EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 4 out of 5


[32m[I 2021-10-01 14:59:19,055][0m Trial 0 finished with value: 0.1959601706970128 and parameters: {'n_layers': 3, 'out_channels_l0': 64, 'kernel_size_l0': 5, 'dropout_l0': 0.4, 'out_channels_l1': 64, 'kernel_size_l1': 5, 'dropout_l1': 0, 'out_channels_l2': 96, 'kernel_size_l2': 5, 'dropout_l2': 0.4, 'optimizer': 'Nadam', 'lr': 0.00013014290070108728, 'weight_decay': 0.0029446545107655897}. Best is trial 0 with value: 0.1959601706970128.[0m


EarlyStopping counter: 5 out of 5
Early stopping the training


Epochs:   0%|          | 0/100 [00:00<?, ?it/s]

EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 4 out of 5


[32m[I 2021-10-01 15:00:22,134][0m Trial 1 finished with value: 0.33711665737827695 and parameters: {'n_layers': 3, 'out_channels_l0': 32, 'kernel_size_l0': 15, 'dropout_l0': 0, 'out_channels_l1': 64, 'kernel_size_l1': 5, 'dropout_l1': 0.4, 'out_channels_l2': 96, 'kernel_size_l2': 15, 'dropout_l2': 0, 'optimizer': 'Nadam', 'lr': 0.0025515339805708193, 'weight_decay': 0.0018416757825418667}. Best is trial 1 with value: 0.33711665737827695.[0m


EarlyStopping counter: 5 out of 5
Early stopping the training


Epochs:   0%|          | 0/100 [00:00<?, ?it/s]

EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 4 out of 5


[32m[I 2021-10-01 15:02:15,506][0m Trial 2 finished with value: 0.30405126745944494 and parameters: {'n_layers': 4, 'out_channels_l0': 32, 'kernel_size_l0': 5, 'dropout_l0': 0.4, 'out_channels_l1': 64, 'kernel_size_l1': 11, 'dropout_l1': 0.4, 'out_channels_l2': 256, 'kernel_size_l2': 15, 'dropout_l2': 0.4, 'out_channels_l3': 256, 'kernel_size_l3': 11, 'dropout_l3': 0.4, 'optimizer': 'Adam', 'lr': 0.032874802770563955, 'weight_decay': 0.007484002933554625}. Best is trial 1 with value: 0.33711665737827695.[0m


EarlyStopping counter: 5 out of 5
Early stopping the training
Study statistics: 
  Number of finished trials:  3
  Number of pruned trials:  0
  Number of complete trials:  3
Best trial:
  Value:  0.33711665737827695
  Params: 
    dropout_l0: 0
    dropout_l1: 0.4
    dropout_l2: 0
    kernel_size_l0: 15
    kernel_size_l1: 5
    kernel_size_l2: 15
    lr: 0.0025515339805708193
    n_layers: 3
    optimizer: Nadam
    out_channels_l0: 32
    out_channels_l1: 64
    out_channels_l2: 96
    weight_decay: 0.0018416757825418667



Epochs:   0%|          | 0/100 [00:00<?, ?it/s]

EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 4 out of 5


  pruner=optuna.pruners.PatientPruner(optuna.pruners.MedianPruner(), patience=2),
[32m[I 2021-10-01 15:05:57,108][0m A new study created in RDB with name: H1_active_E_vs_active_P_CNN_1_2[0m


EarlyStopping counter: 5 out of 5
Early stopping the training
AUPRC test score: 0.41314218882986353


>>> ITERATION N. 2



Epochs:   0%|          | 0/100 [00:00<?, ?it/s]

EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 4 out of 5


[32m[I 2021-10-01 15:07:10,960][0m Trial 0 finished with value: 0.5442957294577445 and parameters: {'n_layers': 2, 'out_channels_l0': 64, 'kernel_size_l0': 5, 'dropout_l0': 0.2, 'out_channels_l1': 96, 'kernel_size_l1': 11, 'dropout_l1': 0.5, 'optimizer': 'Adam', 'lr': 0.0011664818820017037, 'weight_decay': 0.0017223377843116829}. Best is trial 0 with value: 0.5442957294577445.[0m


EarlyStopping counter: 5 out of 5
Early stopping the training


Epochs:   0%|          | 0/100 [00:00<?, ?it/s]

EarlyStopping counter: 1 out of 5
EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 4 out of 5


[32m[I 2021-10-01 15:08:41,204][0m Trial 1 finished with value: 0.49554123181232934 and parameters: {'n_layers': 3, 'out_channels_l0': 32, 'kernel_size_l0': 11, 'dropout_l0': 0.4, 'out_channels_l1': 96, 'kernel_size_l1': 11, 'dropout_l1': 0.4, 'out_channels_l2': 64, 'kernel_size_l2': 5, 'dropout_l2': 0.4, 'optimizer': 'RMSprop', 'lr': 0.00508213481160198, 'weight_decay': 0.0003348993686063453}. Best is trial 0 with value: 0.5442957294577445.[0m


EarlyStopping counter: 5 out of 5
Early stopping the training


Epochs:   0%|          | 0/100 [00:00<?, ?it/s]

EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 4 out of 5
EarlyStopping counter: 1 out of 5
EarlyStopping counter: 1 out of 5
EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 4 out of 5


[32m[I 2021-10-01 15:11:47,567][0m Trial 2 finished with value: 0.18827169274537695 and parameters: {'n_layers': 3, 'out_channels_l0': 32, 'kernel_size_l0': 11, 'dropout_l0': 0.2, 'out_channels_l1': 64, 'kernel_size_l1': 15, 'dropout_l1': 0.4, 'out_channels_l2': 96, 'kernel_size_l2': 15, 'dropout_l2': 0, 'optimizer': 'Nadam', 'lr': 0.07692197328662466, 'weight_decay': 0.00018449310783021992}. Best is trial 0 with value: 0.5442957294577445.[0m


EarlyStopping counter: 5 out of 5
Early stopping the training
Study statistics: 
  Number of finished trials:  3
  Number of pruned trials:  0
  Number of complete trials:  3
Best trial:
  Value:  0.5442957294577445
  Params: 
    dropout_l0: 0.2
    dropout_l1: 0.5
    kernel_size_l0: 5
    kernel_size_l1: 11
    lr: 0.0011664818820017037
    n_layers: 2
    optimizer: Adam
    out_channels_l0: 64
    out_channels_l1: 96
    weight_decay: 0.0017223377843116829



Epochs:   0%|          | 0/100 [00:00<?, ?it/s]

EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 4 out of 5


  pruner=optuna.pruners.PatientPruner(optuna.pruners.MedianPruner(), patience=2),
[32m[I 2021-10-01 15:13:39,403][0m A new study created in RDB with name: H1_active_E_vs_active_P_CNN_1_2_3[0m


EarlyStopping counter: 5 out of 5
Early stopping the training
AUPRC test score: 0.5551891242492616


>>> ITERATION N. 3



Epochs:   0%|          | 0/100 [00:00<?, ?it/s]

EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 4 out of 5


[32m[I 2021-10-01 15:14:58,825][0m Trial 0 finished with value: 0.20583924881161783 and parameters: {'n_layers': 4, 'out_channels_l0': 32, 'kernel_size_l0': 11, 'dropout_l0': 0.4, 'out_channels_l1': 32, 'kernel_size_l1': 15, 'dropout_l1': 0, 'out_channels_l2': 64, 'kernel_size_l2': 5, 'dropout_l2': 0.5, 'out_channels_l3': 128, 'kernel_size_l3': 15, 'dropout_l3': 0, 'optimizer': 'Adam', 'lr': 3.835802105956803e-05, 'weight_decay': 0.00011682287246153977}. Best is trial 0 with value: 0.20583924881161783.[0m


EarlyStopping counter: 5 out of 5
Early stopping the training


Epochs:   0%|          | 0/100 [00:00<?, ?it/s]

EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 4 out of 5


[32m[I 2021-10-01 15:16:28,045][0m Trial 1 finished with value: 0.18903047091412742 and parameters: {'n_layers': 3, 'out_channels_l0': 16, 'kernel_size_l0': 15, 'dropout_l0': 0.2, 'out_channels_l1': 32, 'kernel_size_l1': 11, 'dropout_l1': 0.5, 'out_channels_l2': 128, 'kernel_size_l2': 15, 'dropout_l2': 0, 'optimizer': 'Nadam', 'lr': 0.019996944953380377, 'weight_decay': 0.0353017881140434}. Best is trial 0 with value: 0.20583924881161783.[0m


EarlyStopping counter: 5 out of 5
Early stopping the training


Epochs:   0%|          | 0/100 [00:00<?, ?it/s]

EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 4 out of 5


[32m[I 2021-10-01 15:18:02,419][0m Trial 2 finished with value: 0.19912742382271464 and parameters: {'n_layers': 3, 'out_channels_l0': 64, 'kernel_size_l0': 11, 'dropout_l0': 0.4, 'out_channels_l1': 96, 'kernel_size_l1': 15, 'dropout_l1': 0.5, 'out_channels_l2': 128, 'kernel_size_l2': 5, 'dropout_l2': 0.4, 'optimizer': 'Nadam', 'lr': 0.0952626634356558, 'weight_decay': 0.0005726713937469667}. Best is trial 0 with value: 0.20583924881161783.[0m


EarlyStopping counter: 5 out of 5
Early stopping the training
Study statistics: 
  Number of finished trials:  3
  Number of pruned trials:  0
  Number of complete trials:  3
Best trial:
  Value:  0.20583924881161783
  Params: 
    dropout_l0: 0.4
    dropout_l1: 0
    dropout_l2: 0.5
    dropout_l3: 0
    kernel_size_l0: 11
    kernel_size_l1: 15
    kernel_size_l2: 5
    kernel_size_l3: 15
    lr: 3.835802105956803e-05
    n_layers: 4
    optimizer: Adam
    out_channels_l0: 32
    out_channels_l1: 32
    out_channels_l2: 64
    out_channels_l3: 128
    weight_decay: 0.00011682287246153977



Epochs:   0%|          | 0/100 [00:00<?, ?it/s]

EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 1 out of 5
EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 4 out of 5
EarlyStopping counter: 5 out of 5
Early stopping the training
AUPRC test score: 0.2090264000336895



3-FOLD CROSS-VALIDATION AUPRC TEST SCORE: 0.39245


In [46]:
results_dict[cell_line][task][model.__name__] = kf_CV.scores_dict

In [47]:
with open ('results_dict.pickle', 'wb') as fout:
    pickle.dump(OrderedDict(results_dict), fout)

---

## 4) INACTIVE ENHANCERS vs INACTIVE PROMOTERS

In [17]:
task = TASKS[3]
task

'inactive_E_vs_inactive_P'

In [18]:
pipe_data_load = Build_DataLoader_Pipeline(path_name=f'{task}.pickle')

Data Preprocessing Done!


---
### 1. FFNN

In [None]:
model=FFNN

In [None]:
kf_CV = Kfold_CV()

kf_CV(build_dataloader_pipeline = pipe_data_load,
                num_epochs = 100,
                n_folds=3,
                cell_line=cell_line,
                sequence=False,
                augmentation=False,
                model = model,
                device = device,
                study_name = f'{cell_line}_{task}_{model.__name__}',
                hp_model_path = f'{cell_line}_{task}_{model.__name__}_HP.pt',
                test_model_path = f'{cell_line}_{task}_{model.__name__}_TEST.pt')

In [None]:
results_dict[cell_line][task][model.__name__] = kf_CV.scores_dict

In [None]:
with open ('results_dict.pickle', 'wb') as fout:
    pickle.dump(OrderedDict(results_dict), fout)

---
### 2. CNN

In [19]:
model=CNN

In [20]:
kf_CV = Kfold_CV()

kf_CV(build_dataloader_pipeline = pipe_data_load,
                num_epochs = 100,
                n_folds=3,
                cell_line=cell_line,
                task=task,
                sequence=True,
                augmentation=False,
                model = model,
                device = device,
                sampler = 'TPE',
                study_name = f'{cell_line}_{task}_{model.__name__}',
                hp_model_path = f'{cell_line}_{task}_{model.__name__}_HP.pt',
                test_model_path = f'{cell_line}_{task}_{model.__name__}_TEST.pt')

>>> ITERATION N. 1



  pruner=optuna.pruners.PatientPruner(optuna.pruners.MedianPruner(), patience=2),
[32m[I 2021-10-01 16:19:46,784][0m Using an existing study with name 'H1_inactive_E_vs_inactive_P_CNN_1' instead of creating a new one.[0m


Study statistics: 
  Number of finished trials:  3
  Number of pruned trials:  0
  Number of complete trials:  3
Best trial:
  Value:  0.5985109560594046
  Params: 
    dropout_l0: 0
    dropout_l1: 0
    dropout_l2: 0.5
    dropout_l3: 0
    kernel_size_l0: 15
    kernel_size_l1: 5
    kernel_size_l2: 11
    kernel_size_l3: 15
    lr: 0.0006177288276810144
    n_layers: 4
    optimizer: Adam
    out_channels_l0: 16
    out_channels_l1: 64
    out_channels_l2: 96
    out_channels_l3: 256
    weight_decay: 0.011506970047176721



Epochs:   0%|          | 0/100 [00:00<?, ?it/s]

  return torch.max_pool1d(input, kernel_size, stride, padding, dilation, ceil_mode)


EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 4 out of 5


  pruner=optuna.pruners.PatientPruner(optuna.pruners.MedianPruner(), patience=2),
[32m[I 2021-10-01 16:37:52,977][0m A new study created in RDB with name: H1_inactive_E_vs_inactive_P_CNN_1_2[0m


EarlyStopping counter: 5 out of 5
Early stopping the training
AUPRC test score: 0.5764858683549248


>>> ITERATION N. 2



Epochs:   0%|          | 0/100 [00:00<?, ?it/s]

EarlyStopping counter: 1 out of 5
EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 4 out of 5


[32m[I 2021-10-01 16:50:01,133][0m Trial 0 finished with value: 0.40722838137472284 and parameters: {'n_layers': 1, 'out_channels_l0': 16, 'kernel_size_l0': 11, 'dropout_l0': 0, 'optimizer': 'RMSprop', 'lr': 0.008372478638155996, 'weight_decay': 0.0034651741670167105}. Best is trial 0 with value: 0.40722838137472284.[0m


EarlyStopping counter: 5 out of 5
Early stopping the training


Epochs:   0%|          | 0/100 [00:00<?, ?it/s]

EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 4 out of 5


[32m[I 2021-10-01 16:58:02,680][0m Trial 1 finished with value: 0.5757595975625552 and parameters: {'n_layers': 1, 'out_channels_l0': 16, 'kernel_size_l0': 5, 'dropout_l0': 0.3, 'optimizer': 'Adam', 'lr': 3.370179556588162e-05, 'weight_decay': 0.0042142139300556155}. Best is trial 1 with value: 0.5757595975625552.[0m


EarlyStopping counter: 5 out of 5
Early stopping the training


Epochs:   0%|          | 0/100 [00:00<?, ?it/s]

EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 4 out of 5


[32m[I 2021-10-01 17:07:22,554][0m Trial 2 finished with value: 0.524019811094785 and parameters: {'n_layers': 2, 'out_channels_l0': 64, 'kernel_size_l0': 15, 'dropout_l0': 0.4, 'out_channels_l1': 32, 'kernel_size_l1': 11, 'dropout_l1': 0.4, 'optimizer': 'Adam', 'lr': 0.0850942058803178, 'weight_decay': 0.007635450933450643}. Best is trial 1 with value: 0.5757595975625552.[0m


EarlyStopping counter: 5 out of 5
Early stopping the training
Study statistics: 
  Number of finished trials:  3
  Number of pruned trials:  0
  Number of complete trials:  3
Best trial:
  Value:  0.5757595975625552
  Params: 
    dropout_l0: 0.3
    kernel_size_l0: 5
    lr: 3.370179556588162e-05
    n_layers: 1
    optimizer: Adam
    out_channels_l0: 16
    weight_decay: 0.0042142139300556155



Epochs:   0%|          | 0/100 [00:00<?, ?it/s]

EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 4 out of 5


  pruner=optuna.pruners.PatientPruner(optuna.pruners.MedianPruner(), patience=2),
[32m[I 2021-10-01 17:21:22,062][0m A new study created in RDB with name: H1_inactive_E_vs_inactive_P_CNN_1_2_3[0m


EarlyStopping counter: 5 out of 5
Early stopping the training
AUPRC test score: 0.5820401718614242


>>> ITERATION N. 3



Epochs:   0%|          | 0/100 [00:00<?, ?it/s]

EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 4 out of 5


[32m[I 2021-10-01 17:29:25,637][0m Trial 0 finished with value: 0.5606868518262608 and parameters: {'n_layers': 1, 'out_channels_l0': 16, 'kernel_size_l0': 15, 'dropout_l0': 0.3, 'optimizer': 'Adam', 'lr': 1.1165022479715584e-05, 'weight_decay': 0.006514138516176246}. Best is trial 0 with value: 0.5606868518262608.[0m


EarlyStopping counter: 5 out of 5
Early stopping the training


Epochs:   0%|          | 0/100 [00:00<?, ?it/s]

	add(Number alpha, Tensor other)
Consider using one of the following signatures instead:
	add(Tensor other, *, Number alpha) (Triggered internally at  /opt/conda/conda-bld/pytorch_1623448265233/work/torch/csrc/utils/python_arg_parser.cpp:1025.)
  grad = grad.add(group['weight_decay'], p.data)


EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 1 out of 5
EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 4 out of 5


[32m[I 2021-10-01 17:45:21,170][0m Trial 1 finished with value: 0.5693037198442992 and parameters: {'n_layers': 2, 'out_channels_l0': 16, 'kernel_size_l0': 11, 'dropout_l0': 0.4, 'out_channels_l1': 64, 'kernel_size_l1': 15, 'dropout_l1': 0.5, 'optimizer': 'Nadam', 'lr': 0.0038336073622503852, 'weight_decay': 0.0015880269793153365}. Best is trial 1 with value: 0.5693037198442992.[0m


EarlyStopping counter: 5 out of 5
Early stopping the training


Epochs:   0%|          | 0/100 [00:00<?, ?it/s]

EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 4 out of 5
EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 4 out of 5
EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 4 out of 5


[32m[I 2021-10-01 18:09:39,543][0m Trial 2 finished with value: 0.40851164079822616 and parameters: {'n_layers': 1, 'out_channels_l0': 32, 'kernel_size_l0': 5, 'dropout_l0': 0.4, 'optimizer': 'RMSprop', 'lr': 0.044464418176693235, 'weight_decay': 0.00024650865115092847}. Best is trial 1 with value: 0.5693037198442992.[0m


EarlyStopping counter: 5 out of 5
Early stopping the training
Study statistics: 
  Number of finished trials:  3
  Number of pruned trials:  0
  Number of complete trials:  3
Best trial:
  Value:  0.5693037198442992
  Params: 
    dropout_l0: 0.4
    dropout_l1: 0.5
    kernel_size_l0: 11
    kernel_size_l1: 15
    lr: 0.0038336073622503852
    n_layers: 2
    optimizer: Nadam
    out_channels_l0: 16
    out_channels_l1: 64
    weight_decay: 0.0015880269793153365



Epochs:   0%|          | 0/100 [00:00<?, ?it/s]

EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 4 out of 5
EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 4 out of 5
EarlyStopping counter: 5 out of 5
Early stopping the training
AUPRC test score: 0.5853241468121042



3-FOLD CROSS-VALIDATION AUPRC TEST SCORE: 0.58128


In [21]:
results_dict[cell_line][task][model.__name__] = kf_CV.scores_dict

In [22]:
with open ('results_dict.pickle', 'wb') as fout:
    pickle.dump(OrderedDict(results_dict), fout)

---

## 5) ACTIVE ENHANCERS + ACTIVE PROMOTERS vs INACTIVE REST

In [23]:
task = TASKS[4]
task

'active_EP_vs_inactive_rest'

In [24]:
pipe_data_load = Build_DataLoader_Pipeline(path_name=f'{task}.pickle')

Data Preprocessing Done!




---
### 1. FFNN

In [None]:
kf_CV = Kfold_CV()

kf_CV(build_dataloader_pipeline = pipe_data_load,
                num_epochs = 100,
                n_folds=3,
                cell_line=cell_line,
                sequence=False,
                augmentation=False,
                model = model,
                device = device,
                study_name = f'{cell_line}_{task}_{model.__name__}',
                hp_model_path = f'{cell_line}_{task}_{model.__name__}_HP.pt',
                test_model_path = f'{cell_line}_{task}_{model.__name__}_TEST.pt')

In [None]:
results_dict[cell_line][task][model.__name__] = kf_CV.scores_dict

In [None]:
with open ('results_dict.pickle', 'wb') as fout:
    pickle.dump(OrderedDict(results_dict), fout)

---
### 2. CNN

In [25]:
model=CNN

In [26]:
kf_CV = Kfold_CV()

kf_CV(build_dataloader_pipeline = pipe_data_load,
                num_epochs = 100,
                n_folds=3,
                cell_line=cell_line,
                task=task,
                sequence=True,
                augmentation=False,
                model = model,
                device = device,
                sampler = 'TPE',
                study_name = f'{cell_line}_{task}_{model.__name__}',
                hp_model_path = f'{cell_line}_{task}_{model.__name__}_HP.pt',
                test_model_path = f'{cell_line}_{task}_{model.__name__}_TEST.pt')

  pruner=optuna.pruners.PatientPruner(optuna.pruners.MedianPruner(), patience=2),


>>> ITERATION N. 1



[32m[I 2021-10-01 18:38:14,777][0m A new study created in RDB with name: H1_active_EP_vs_inactive_rest_CNN_1[0m


Epochs:   0%|          | 0/100 [00:00<?, ?it/s]

EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 4 out of 5


[32m[I 2021-10-01 18:47:10,136][0m Trial 0 finished with value: 0.1024703332784021 and parameters: {'n_layers': 1, 'out_channels_l0': 16, 'kernel_size_l0': 11, 'dropout_l0': 0.3, 'optimizer': 'RMSprop', 'lr': 2.6196700583316394e-05, 'weight_decay': 0.0074563135020834235}. Best is trial 0 with value: 0.1024703332784021.[0m


EarlyStopping counter: 5 out of 5
Early stopping the training


Epochs:   0%|          | 0/100 [00:00<?, ?it/s]

EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 4 out of 5


[32m[I 2021-10-01 18:59:52,858][0m Trial 1 finished with value: 0.09894859377910226 and parameters: {'n_layers': 2, 'out_channels_l0': 32, 'kernel_size_l0': 15, 'dropout_l0': 0.2, 'out_channels_l1': 96, 'kernel_size_l1': 15, 'dropout_l1': 0.4, 'optimizer': 'RMSprop', 'lr': 0.0029812021527358826, 'weight_decay': 0.00807190380783155}. Best is trial 0 with value: 0.1024703332784021.[0m


EarlyStopping counter: 5 out of 5
Early stopping the training


Epochs:   0%|          | 0/100 [00:00<?, ?it/s]

EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 4 out of 5


[32m[I 2021-10-01 19:09:12,101][0m Trial 2 finished with value: 0.09953225986376535 and parameters: {'n_layers': 2, 'out_channels_l0': 32, 'kernel_size_l0': 11, 'dropout_l0': 0.4, 'out_channels_l1': 32, 'kernel_size_l1': 15, 'dropout_l1': 0.5, 'optimizer': 'Nadam', 'lr': 0.0909909989936248, 'weight_decay': 0.00022823502710114376}. Best is trial 0 with value: 0.1024703332784021.[0m


EarlyStopping counter: 5 out of 5
Early stopping the training
Study statistics: 
  Number of finished trials:  3
  Number of pruned trials:  0
  Number of complete trials:  3
Best trial:
  Value:  0.1024703332784021
  Params: 
    dropout_l0: 0.3
    kernel_size_l0: 11
    lr: 2.6196700583316394e-05
    n_layers: 1
    optimizer: RMSprop
    out_channels_l0: 16
    weight_decay: 0.0074563135020834235



Epochs:   0%|          | 0/100 [00:00<?, ?it/s]

EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 4 out of 5


  pruner=optuna.pruners.PatientPruner(optuna.pruners.MedianPruner(), patience=2),
[32m[I 2021-10-01 19:22:36,126][0m A new study created in RDB with name: H1_active_EP_vs_inactive_rest_CNN_1_2[0m


EarlyStopping counter: 5 out of 5
Early stopping the training
AUPRC test score: 0.1033010095781835


>>> ITERATION N. 2



Epochs:   0%|          | 0/100 [00:00<?, ?it/s]

EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 4 out of 5


[32m[I 2021-10-01 19:42:58,385][0m Trial 0 finished with value: 0.10175242130750604 and parameters: {'n_layers': 3, 'out_channels_l0': 32, 'kernel_size_l0': 5, 'dropout_l0': 0.3, 'out_channels_l1': 32, 'kernel_size_l1': 15, 'dropout_l1': 0.4, 'out_channels_l2': 128, 'kernel_size_l2': 15, 'dropout_l2': 0, 'optimizer': 'Nadam', 'lr': 7.751065093065405e-05, 'weight_decay': 0.00015526170583804132}. Best is trial 0 with value: 0.10175242130750604.[0m


EarlyStopping counter: 5 out of 5
Early stopping the training


Epochs:   0%|          | 0/100 [00:00<?, ?it/s]

EarlyStopping counter: 1 out of 5
EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 4 out of 5
EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 4 out of 5


[32m[I 2021-10-01 20:03:44,752][0m Trial 1 finished with value: 0.11952412780693837 and parameters: {'n_layers': 1, 'out_channels_l0': 16, 'kernel_size_l0': 5, 'dropout_l0': 0, 'optimizer': 'Adam', 'lr': 4.4946131492384865e-05, 'weight_decay': 0.005984387460470812}. Best is trial 1 with value: 0.11952412780693837.[0m


EarlyStopping counter: 5 out of 5
Early stopping the training


Epochs:   0%|          | 0/100 [00:00<?, ?it/s]

EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 4 out of 5


[32m[I 2021-10-01 20:12:39,733][0m Trial 2 finished with value: 0.11283656205862666 and parameters: {'n_layers': 1, 'out_channels_l0': 16, 'kernel_size_l0': 11, 'dropout_l0': 0, 'optimizer': 'Adam', 'lr': 1.559105114199656e-05, 'weight_decay': 0.0016508261074933098}. Best is trial 1 with value: 0.11952412780693837.[0m


EarlyStopping counter: 5 out of 5
Early stopping the training
Study statistics: 
  Number of finished trials:  3
  Number of pruned trials:  0
  Number of complete trials:  3
Best trial:
  Value:  0.11952412780693837
  Params: 
    dropout_l0: 0
    kernel_size_l0: 5
    lr: 4.4946131492384865e-05
    n_layers: 1
    optimizer: Adam
    out_channels_l0: 16
    weight_decay: 0.005984387460470812



Epochs:   0%|          | 0/100 [00:00<?, ?it/s]

EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 4 out of 5


  pruner=optuna.pruners.PatientPruner(optuna.pruners.MedianPruner(), patience=2),
[32m[I 2021-10-01 20:28:16,077][0m A new study created in RDB with name: H1_active_EP_vs_inactive_rest_CNN_1_2_3[0m


EarlyStopping counter: 5 out of 5
Early stopping the training
AUPRC test score: 0.12255407536407693


>>> ITERATION N. 3



Epochs:   0%|          | 0/100 [00:00<?, ?it/s]

EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 4 out of 5
EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 4 out of 5


[32m[I 2021-10-01 21:00:12,676][0m Trial 0 finished with value: 0.10185192137055482 and parameters: {'n_layers': 4, 'out_channels_l0': 64, 'kernel_size_l0': 15, 'dropout_l0': 0.3, 'out_channels_l1': 32, 'kernel_size_l1': 5, 'dropout_l1': 0, 'out_channels_l2': 96, 'kernel_size_l2': 15, 'dropout_l2': 0.5, 'out_channels_l3': 512, 'kernel_size_l3': 15, 'dropout_l3': 0, 'optimizer': 'Nadam', 'lr': 0.0030815265026179783, 'weight_decay': 0.00024299658218821776}. Best is trial 0 with value: 0.10185192137055482.[0m


EarlyStopping counter: 5 out of 5
Early stopping the training


Epochs:   0%|          | 0/100 [00:00<?, ?it/s]

EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 4 out of 5


[32m[I 2021-10-01 21:09:39,598][0m Trial 1 finished with value: 0.11379471746021276 and parameters: {'n_layers': 1, 'out_channels_l0': 32, 'kernel_size_l0': 11, 'dropout_l0': 0, 'optimizer': 'Nadam', 'lr': 0.0004306828345584087, 'weight_decay': 0.0009460647752329262}. Best is trial 1 with value: 0.11379471746021276.[0m


EarlyStopping counter: 5 out of 5
Early stopping the training


Epochs:   0%|          | 0/100 [00:00<?, ?it/s]

EarlyStopping counter: 1 out of 5
EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 4 out of 5


[32m[I 2021-10-01 21:28:28,240][0m Trial 2 finished with value: 0.10425835032663001 and parameters: {'n_layers': 1, 'out_channels_l0': 32, 'kernel_size_l0': 5, 'dropout_l0': 0.3, 'optimizer': 'Nadam', 'lr': 0.0005268921593099583, 'weight_decay': 0.0010525079234122083}. Best is trial 1 with value: 0.11379471746021276.[0m


EarlyStopping counter: 5 out of 5
Early stopping the training
Study statistics: 
  Number of finished trials:  3
  Number of pruned trials:  0
  Number of complete trials:  3
Best trial:
  Value:  0.11379471746021276
  Params: 
    dropout_l0: 0
    kernel_size_l0: 11
    lr: 0.0004306828345584087
    n_layers: 1
    optimizer: Nadam
    out_channels_l0: 32
    weight_decay: 0.0009460647752329262



Epochs:   0%|          | 0/100 [00:00<?, ?it/s]

EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 4 out of 5
EarlyStopping counter: 5 out of 5
Early stopping the training
AUPRC test score: 0.11402469784651957



3-FOLD CROSS-VALIDATION AUPRC TEST SCORE: 0.11329


In [27]:
results_dict[cell_line][task][model.__name__] = kf_CV.scores_dict

In [28]:
with open ('results_dict.pickle', 'wb') as fout:
    pickle.dump(OrderedDict(results_dict), fout)