## BIOINFORMATICS THESIS: MULTIMODAL NEURAL NETWORK

# CELL LINE: HEK293

In [10]:
import pandas as pd 
import numpy as np
import os

import torch
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
import torch.nn as nn
import torch.optim as optim
from collections import defaultdict, OrderedDict
import pickle

import sqlite3
from sqlalchemy import create_engine

device = 'cuda' if torch.cuda.is_available() else 'cpu'

In [11]:
# create a database to store optuna studies with sqlite backend
#engine = create_engine('sqlite:///BIOINF_optuna_tuning.db')

In [12]:
from BIOINF_tesi.data_pipe import CELL_LINES, TASKS

In [13]:
cell_line = CELL_LINES[3]
cell_line

'HEK293'

---

In [14]:
from BIOINF_tesi.data_pipe import Load_Create_Task
from BIOINF_tesi.data_pipe import Build_DataLoader_Pipeline

In [15]:
from BIOINF_tesi.models import FFNN, CNN, CNN_LSTM
from BIOINF_tesi.models.utils import fit, Param_Search, Kfold_CV

In [49]:
with open ('results_dict.pickle', 'rb') as fin:
    results_dict = pickle.load(fin)
    results_dict = defaultdict(lambda: defaultdict(dict), results_dict)

In [50]:
for i in results_dict.keys():
    display(results_dict[i].keys())

dict_keys(['active_E_vs_inactive_E', 'active_P_vs_inactive_P', 'active_E_vs_active_P', 'inactive_E_vs_inactive_P', 'active_EP_vs_inactive_rest'])

dict_keys(['active_E_vs_inactive_E', 'active_P_vs_inactive_P', 'active_E_vs_active_P', 'inactive_E_vs_inactive_P', 'active_EP_vs_inactive_rest'])

dict_keys(['active_E_vs_inactive_E', 'active_P_vs_inactive_P', 'active_E_vs_active_P', 'inactive_E_vs_inactive_P', 'active_EP_vs_inactive_rest'])

dict_keys(['active_E_vs_inactive_E', 'active_P_vs_inactive_P', 'active_E_vs_active_P', 'inactive_E_vs_inactive_P', 'active_EP_vs_inactive_rest'])

## 1) ACTIVE ENHANCERS vs INACTIVE ENHANCERS

In [17]:
task = TASKS[0]
task

'active_E_vs_inactive_E'

In [18]:
pipe_data_load = Build_DataLoader_Pipeline(path_name=f'{task}.pickle')

Data Preprocessing Done!


### 1. FFNN

In [None]:
model=FFNN

In [None]:
kf_CV = Kfold_CV()

kf_CV(build_dataloader_pipeline = pipe_data_load,
                num_epochs = 100,
                n_folds=3,
                cell_line=cell_line,
                sequence=False,
                augmentation=False,
                model = model,
                device = device,
                study_name = f'{cell_line}_{task}_{model.__name__}',
                hp_model_path = f'{cell_line}_{task}_{model.__name__}_HP.pt',
                test_model_path = f'{cell_line}_{task}_{model.__name__}_TEST.pt')

In [None]:
results_dict[cell_line][task][model.__name__] = kf_CV.scores_dict

In [None]:
with open ('results_dict.pickle', 'wb') as fout:
    pickle.dump(OrderedDict(results_dict), fout)

---
### 2. CNN

In [22]:
model=CNN

In [23]:
kf_CV = Kfold_CV()

kf_CV(build_dataloader_pipeline = pipe_data_load,
                num_epochs = 100,
                n_folds=3,
                cell_line=cell_line,
                task=task,
                sequence=True,
                augmentation=False,
                model = model,
                device = device,
                sampler = 'TPE',
                study_name = f'{cell_line}_{task}_{model.__name__}',
                hp_model_path = f'{cell_line}_{task}_{model.__name__}_HP',
                test_model_path = f'{cell_line}_{task}_{model.__name__}_TEST')

>>> ITERATION N. 1



  pruner=optuna.pruners.PatientPruner(optuna.pruners.MedianPruner(), patience=2),
[32m[I 2021-10-01 22:21:30,676][0m Using an existing study with name 'HEK293_active_E_vs_inactive_E_CNN_1' instead of creating a new one.[0m


Epochs:   0%|          | 0/100 [00:00<?, ?it/s]

  return torch.max_pool1d(input, kernel_size, stride, padding, dilation, ceil_mode)
	add(Number alpha, Tensor other)
Consider using one of the following signatures instead:
	add(Tensor other, *, Number alpha) (Triggered internally at  /opt/conda/conda-bld/pytorch_1623448265233/work/torch/csrc/utils/python_arg_parser.cpp:1025.)
  grad = grad.add(group['weight_decay'], p.data)


EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 4 out of 5


[32m[I 2021-10-01 22:27:11,381][0m Trial 2 finished with value: 0.12051997545101485 and parameters: {'n_layers': 4, 'out_channels_l0': 16, 'kernel_size_l0': 5, 'dropout_l0': 0.4, 'out_channels_l1': 64, 'kernel_size_l1': 15, 'dropout_l1': 0.4, 'out_channels_l2': 128, 'kernel_size_l2': 11, 'dropout_l2': 0, 'out_channels_l3': 128, 'kernel_size_l3': 5, 'dropout_l3': 0.5, 'optimizer': 'Nadam', 'lr': 0.0007658954304628522, 'weight_decay': 0.0005653234495199956}. Best is trial 2 with value: 0.12051997545101485.[0m


EarlyStopping counter: 5 out of 5
Early stopping the training


Epochs:   0%|          | 0/100 [00:00<?, ?it/s]

EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 4 out of 5


[32m[I 2021-10-01 22:32:49,230][0m Trial 3 finished with value: 0.10621478873239439 and parameters: {'n_layers': 2, 'out_channels_l0': 64, 'kernel_size_l0': 11, 'dropout_l0': 0.2, 'out_channels_l1': 64, 'kernel_size_l1': 15, 'dropout_l1': 0, 'optimizer': 'RMSprop', 'lr': 0.037081404401245265, 'weight_decay': 0.004831128628882234}. Best is trial 2 with value: 0.12051997545101485.[0m


EarlyStopping counter: 5 out of 5
Early stopping the training


Epochs:   0%|          | 0/100 [00:00<?, ?it/s]

EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 4 out of 5


[32m[I 2021-10-01 22:40:06,810][0m Trial 4 finished with value: 0.10561619718309868 and parameters: {'n_layers': 2, 'out_channels_l0': 32, 'kernel_size_l0': 11, 'dropout_l0': 0.3, 'out_channels_l1': 32, 'kernel_size_l1': 15, 'dropout_l1': 0.4, 'optimizer': 'RMSprop', 'lr': 0.001239011137014999, 'weight_decay': 0.007327389380643972}. Best is trial 2 with value: 0.12051997545101485.[0m


EarlyStopping counter: 5 out of 5
Early stopping the training
Study statistics: 
  Number of finished trials:  5
  Number of pruned trials:  0
  Number of complete trials:  3
Best trial:
  Value:  0.12051997545101485
  Params: 
    dropout_l0: 0.4
    dropout_l1: 0.4
    dropout_l2: 0
    dropout_l3: 0.5
    kernel_size_l0: 5
    kernel_size_l1: 15
    kernel_size_l2: 11
    kernel_size_l3: 5
    lr: 0.0007658954304628522
    n_layers: 4
    optimizer: Nadam
    out_channels_l0: 16
    out_channels_l1: 64
    out_channels_l2: 128
    out_channels_l3: 128
    weight_decay: 0.0005653234495199956



Epochs:   0%|          | 0/100 [00:00<?, ?it/s]

EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 4 out of 5


  pruner=optuna.pruners.PatientPruner(optuna.pruners.MedianPruner(), patience=2),
[32m[I 2021-10-01 22:46:27,555][0m A new study created in RDB with name: HEK293_active_E_vs_inactive_E_CNN_1_2[0m


EarlyStopping counter: 5 out of 5
Early stopping the training
AUPRC test score: 0.13502023680697123


>>> ITERATION N. 2



Epochs:   0%|          | 0/100 [00:00<?, ?it/s]

EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 4 out of 5


[32m[I 2021-10-01 22:53:10,149][0m Trial 0 finished with value: 0.10542253521126763 and parameters: {'n_layers': 2, 'out_channels_l0': 16, 'kernel_size_l0': 11, 'dropout_l0': 0.2, 'out_channels_l1': 64, 'kernel_size_l1': 11, 'dropout_l1': 0.5, 'optimizer': 'Nadam', 'lr': 0.06926012474949482, 'weight_decay': 0.002547532129970606}. Best is trial 0 with value: 0.10542253521126763.[0m


EarlyStopping counter: 5 out of 5
Early stopping the training


Epochs:   0%|          | 0/100 [00:00<?, ?it/s]

EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 4 out of 5


[32m[I 2021-10-01 23:07:23,647][0m Trial 1 finished with value: 0.1057218309859155 and parameters: {'n_layers': 3, 'out_channels_l0': 32, 'kernel_size_l0': 5, 'dropout_l0': 0, 'out_channels_l1': 96, 'kernel_size_l1': 11, 'dropout_l1': 0, 'out_channels_l2': 96, 'kernel_size_l2': 11, 'dropout_l2': 0.4, 'optimizer': 'RMSprop', 'lr': 0.00961244288326145, 'weight_decay': 0.004637936410139126}. Best is trial 1 with value: 0.1057218309859155.[0m


EarlyStopping counter: 5 out of 5
Early stopping the training


Epochs:   0%|          | 0/100 [00:00<?, ?it/s]

EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 4 out of 5


[32m[I 2021-10-01 23:13:45,963][0m Trial 2 finished with value: 0.10572183098591546 and parameters: {'n_layers': 2, 'out_channels_l0': 32, 'kernel_size_l0': 11, 'dropout_l0': 0, 'out_channels_l1': 96, 'kernel_size_l1': 11, 'dropout_l1': 0, 'optimizer': 'Adam', 'lr': 0.022646430007709713, 'weight_decay': 0.00013987980637909596}. Best is trial 1 with value: 0.1057218309859155.[0m


EarlyStopping counter: 5 out of 5
Early stopping the training
Study statistics: 
  Number of finished trials:  3
  Number of pruned trials:  0
  Number of complete trials:  3
Best trial:
  Value:  0.1057218309859155
  Params: 
    dropout_l0: 0
    dropout_l1: 0
    dropout_l2: 0.4
    kernel_size_l0: 5
    kernel_size_l1: 11
    kernel_size_l2: 11
    lr: 0.00961244288326145
    n_layers: 3
    optimizer: RMSprop
    out_channels_l0: 32
    out_channels_l1: 96
    out_channels_l2: 96
    weight_decay: 0.004637936410139126



Epochs:   0%|          | 0/100 [00:00<?, ?it/s]

EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 4 out of 5


  pruner=optuna.pruners.PatientPruner(optuna.pruners.MedianPruner(), patience=2),
[32m[I 2021-10-01 23:22:45,784][0m A new study created in RDB with name: HEK293_active_E_vs_inactive_E_CNN_1_2_3[0m


EarlyStopping counter: 5 out of 5
Early stopping the training
AUPRC test score: 0.10433217477656409


>>> ITERATION N. 3



Epochs:   0%|          | 0/100 [00:00<?, ?it/s]

EarlyStopping counter: 1 out of 5
EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 4 out of 5


[32m[I 2021-10-01 23:32:30,654][0m Trial 0 finished with value: 0.10433978873239436 and parameters: {'n_layers': 4, 'out_channels_l0': 32, 'kernel_size_l0': 11, 'dropout_l0': 0.2, 'out_channels_l1': 32, 'kernel_size_l1': 5, 'dropout_l1': 0.5, 'out_channels_l2': 96, 'kernel_size_l2': 11, 'dropout_l2': 0, 'out_channels_l3': 256, 'kernel_size_l3': 15, 'dropout_l3': 0, 'optimizer': 'Adam', 'lr': 9.423054870813818e-05, 'weight_decay': 0.0024671390409695686}. Best is trial 0 with value: 0.10433978873239436.[0m


EarlyStopping counter: 5 out of 5
Early stopping the training


Epochs:   0%|          | 0/100 [00:00<?, ?it/s]

EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 4 out of 5


[32m[I 2021-10-01 23:36:40,976][0m Trial 1 finished with value: 0.10539716238608121 and parameters: {'n_layers': 1, 'out_channels_l0': 64, 'kernel_size_l0': 5, 'dropout_l0': 0, 'optimizer': 'Nadam', 'lr': 0.001664999155366735, 'weight_decay': 0.009674917856759164}. Best is trial 1 with value: 0.10539716238608121.[0m


EarlyStopping counter: 5 out of 5
Early stopping the training


Epochs:   0%|          | 0/100 [00:00<?, ?it/s]

EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 4 out of 5


[32m[I 2021-10-01 23:40:21,918][0m Trial 2 finished with value: 0.13107826152595628 and parameters: {'n_layers': 2, 'out_channels_l0': 16, 'kernel_size_l0': 5, 'dropout_l0': 0.4, 'out_channels_l1': 32, 'kernel_size_l1': 15, 'dropout_l1': 0, 'optimizer': 'Nadam', 'lr': 1.4657684411959107e-05, 'weight_decay': 0.000291528602349809}. Best is trial 2 with value: 0.13107826152595628.[0m


EarlyStopping counter: 5 out of 5
Early stopping the training
Study statistics: 
  Number of finished trials:  3
  Number of pruned trials:  0
  Number of complete trials:  3
Best trial:
  Value:  0.13107826152595628
  Params: 
    dropout_l0: 0.4
    dropout_l1: 0
    kernel_size_l0: 5
    kernel_size_l1: 15
    lr: 1.4657684411959107e-05
    n_layers: 2
    optimizer: Nadam
    out_channels_l0: 16
    out_channels_l1: 32
    weight_decay: 0.000291528602349809



Epochs:   0%|          | 0/100 [00:00<?, ?it/s]

EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 4 out of 5
EarlyStopping counter: 5 out of 5
Early stopping the training
AUPRC test score: 0.13105092640936397



3-FOLD CROSS-VALIDATION AUPRC TEST SCORE: 0.12347


In [24]:
results_dict[cell_line][task][model.__name__] = kf_CV.scores_dict

In [25]:
with open ('results_dict.pickle', 'wb') as fout:
    pickle.dump(OrderedDict(results_dict), fout)

---
### 3. CNN-LSTM

In [None]:
model=CNN_LSTM

In [None]:
kf_CV = Kfold_CV()

kf_CV(build_dataloader_pipeline = pipe_data_load,
                num_epochs = 100,
                n_folds=3,
                cell_line=cell_line,
                task=task,
                sequence=True,
                augmentation=False,
                model = model,
                device = device,
                sampler = 'TPE',
                study_name = f'{cell_line}_{task}_{model.__name__}',
                hp_model_path = f'{cell_line}_{task}_{model.__name__}_HP.pt',
                test_model_path = f'{cell_line}_{task}_{model.__name__}_TEST.pt')

In [None]:
results_dict[cell_line][task][model.__name__] = kf_CV.scores_dict

In [None]:
with open ('results_dict.pickle', 'wb') as fout:
    pickle.dump(OrderedDict(results_dict), fout)

---

## 2) ACTIVE PROMOTERS vs INACTIVE PROMOTERS

In [26]:
task = TASKS[1]
task

'active_P_vs_inactive_P'

In [27]:
pipe_data_load = Build_DataLoader_Pipeline(path_name=f'{task}.pickle')

Data Preprocessing Done!




---

### 1. FFNN

In [None]:
model=FFNN

In [None]:
kf_CV = Kfold_CV()

kf_CV(build_dataloader_pipeline = pipe_data_load,
                num_epochs = 100,
                n_folds=3,
                cell_line=cell_line,
                sequence=False,
                augmentation=False,
                model = model,
                device = device,
                study_name = f'{cell_line}_{task}_{model.__name__}',
                hp_model_path = f'{cell_line}_{task}_{model.__name__}_HP',
                test_model_path = f'{cell_line}_{task}_{model.__name__}_TEST')

In [None]:
results_dict[cell_line][task][model.__name__] = kf_CV.scores_dict

In [None]:
with open ('results_dict.pickle', 'wb') as fout:
    pickle.dump(OrderedDict(results_dict), fout)

---
### 2. CNN

In [28]:
model=CNN

In [29]:
kf_CV = Kfold_CV()

kf_CV(build_dataloader_pipeline = pipe_data_load,
                num_epochs = 100,
                n_folds=3,
                cell_line=cell_line,
                task=task,
                sequence=True,
                augmentation=False,
                model = model,
                device = device,
                sampler = 'TPE',
                study_name = f'{cell_line}_{task}_{model.__name__}',
                hp_model_path = f'{cell_line}_{task}_{model.__name__}_HP',
                test_model_path = f'{cell_line}_{task}_{model.__name__}_TEST')

  pruner=optuna.pruners.PatientPruner(optuna.pruners.MedianPruner(), patience=2),
[32m[I 2021-10-01 23:46:16,062][0m A new study created in RDB with name: HEK293_active_P_vs_inactive_P_CNN_1[0m


>>> ITERATION N. 1



Epochs:   0%|          | 0/100 [00:00<?, ?it/s]

EarlyStopping counter: 1 out of 5
EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 4 out of 5
EarlyStopping counter: 1 out of 5
EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 4 out of 5


[32m[I 2021-10-02 00:24:01,025][0m Trial 0 finished with value: 0.1348483177054606 and parameters: {'n_layers': 4, 'out_channels_l0': 32, 'kernel_size_l0': 5, 'dropout_l0': 0.3, 'out_channels_l1': 32, 'kernel_size_l1': 15, 'dropout_l1': 0.4, 'out_channels_l2': 96, 'kernel_size_l2': 11, 'dropout_l2': 0, 'out_channels_l3': 512, 'kernel_size_l3': 15, 'dropout_l3': 0.4, 'optimizer': 'Nadam', 'lr': 0.014453172898488085, 'weight_decay': 0.08938146525880786}. Best is trial 0 with value: 0.1348483177054606.[0m


EarlyStopping counter: 5 out of 5
Early stopping the training


Epochs:   0%|          | 0/100 [00:00<?, ?it/s]

EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 4 out of 5


[32m[I 2021-10-02 00:34:19,715][0m Trial 1 finished with value: 0.15993396154342923 and parameters: {'n_layers': 3, 'out_channels_l0': 64, 'kernel_size_l0': 5, 'dropout_l0': 0.2, 'out_channels_l1': 64, 'kernel_size_l1': 5, 'dropout_l1': 0, 'out_channels_l2': 64, 'kernel_size_l2': 5, 'dropout_l2': 0.4, 'optimizer': 'Nadam', 'lr': 0.003444163555305609, 'weight_decay': 0.0006946038569441946}. Best is trial 1 with value: 0.15993396154342923.[0m


EarlyStopping counter: 5 out of 5
Early stopping the training


Epochs:   0%|          | 0/100 [00:00<?, ?it/s]

EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 4 out of 5


[32m[I 2021-10-02 00:41:59,118][0m Trial 2 finished with value: 0.134853833425262 and parameters: {'n_layers': 1, 'out_channels_l0': 64, 'kernel_size_l0': 11, 'dropout_l0': 0.3, 'optimizer': 'Nadam', 'lr': 0.01563861488805426, 'weight_decay': 0.020967696618913553}. Best is trial 1 with value: 0.15993396154342923.[0m


EarlyStopping counter: 5 out of 5
Early stopping the training
Study statistics: 
  Number of finished trials:  3
  Number of pruned trials:  0
  Number of complete trials:  3
Best trial:
  Value:  0.15993396154342923
  Params: 
    dropout_l0: 0.2
    dropout_l1: 0
    dropout_l2: 0.4
    kernel_size_l0: 5
    kernel_size_l1: 5
    kernel_size_l2: 5
    lr: 0.003444163555305609
    n_layers: 3
    optimizer: Nadam
    out_channels_l0: 64
    out_channels_l1: 64
    out_channels_l2: 64
    weight_decay: 0.0006946038569441946



Epochs:   0%|          | 0/100 [00:00<?, ?it/s]

EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 4 out of 5


  pruner=optuna.pruners.PatientPruner(optuna.pruners.MedianPruner(), patience=2),
[32m[I 2021-10-02 00:51:19,645][0m A new study created in RDB with name: HEK293_active_P_vs_inactive_P_CNN_1_2[0m


EarlyStopping counter: 5 out of 5
Early stopping the training
AUPRC test score: 0.13170707927218417


>>> ITERATION N. 2



Epochs:   0%|          | 0/100 [00:00<?, ?it/s]

EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 4 out of 5


[32m[I 2021-10-02 00:57:27,400][0m Trial 0 finished with value: 0.24874301536739837 and parameters: {'n_layers': 3, 'out_channels_l0': 64, 'kernel_size_l0': 15, 'dropout_l0': 0.2, 'out_channels_l1': 64, 'kernel_size_l1': 5, 'dropout_l1': 0.5, 'out_channels_l2': 64, 'kernel_size_l2': 5, 'dropout_l2': 0, 'optimizer': 'RMSprop', 'lr': 9.881344997164725e-05, 'weight_decay': 0.002224377681466876}. Best is trial 0 with value: 0.24874301536739837.[0m


EarlyStopping counter: 5 out of 5
Early stopping the training


Epochs:   0%|          | 0/100 [00:00<?, ?it/s]

EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 4 out of 5


[32m[I 2021-10-02 01:03:48,880][0m Trial 1 finished with value: 0.14450639511311414 and parameters: {'n_layers': 2, 'out_channels_l0': 16, 'kernel_size_l0': 15, 'dropout_l0': 0, 'out_channels_l1': 96, 'kernel_size_l1': 15, 'dropout_l1': 0.5, 'optimizer': 'Nadam', 'lr': 1.6764624967699944e-05, 'weight_decay': 0.0013341525723783428}. Best is trial 0 with value: 0.24874301536739837.[0m


EarlyStopping counter: 5 out of 5
Early stopping the training


Epochs:   0%|          | 0/100 [00:00<?, ?it/s]

EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 4 out of 5


[32m[I 2021-10-02 01:09:42,406][0m Trial 2 finished with value: 0.1489976578590501 and parameters: {'n_layers': 1, 'out_channels_l0': 32, 'kernel_size_l0': 5, 'dropout_l0': 0.2, 'optimizer': 'Adam', 'lr': 0.004560253829265837, 'weight_decay': 0.00012000505848428214}. Best is trial 0 with value: 0.24874301536739837.[0m


EarlyStopping counter: 5 out of 5
Early stopping the training
Study statistics: 
  Number of finished trials:  3
  Number of pruned trials:  0
  Number of complete trials:  3
Best trial:
  Value:  0.24874301536739837
  Params: 
    dropout_l0: 0.2
    dropout_l1: 0.5
    dropout_l2: 0
    kernel_size_l0: 15
    kernel_size_l1: 5
    kernel_size_l2: 5
    lr: 9.881344997164725e-05
    n_layers: 3
    optimizer: RMSprop
    out_channels_l0: 64
    out_channels_l1: 64
    out_channels_l2: 64
    weight_decay: 0.002224377681466876



Epochs:   0%|          | 0/100 [00:00<?, ?it/s]

EarlyStopping counter: 1 out of 5
EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 4 out of 5


  pruner=optuna.pruners.PatientPruner(optuna.pruners.MedianPruner(), patience=2),
[32m[I 2021-10-02 01:26:40,188][0m A new study created in RDB with name: HEK293_active_P_vs_inactive_P_CNN_1_2_3[0m


EarlyStopping counter: 5 out of 5
Early stopping the training
AUPRC test score: 0.18421773204096384


>>> ITERATION N. 3



Epochs:   0%|          | 0/100 [00:00<?, ?it/s]

EarlyStopping counter: 1 out of 5
EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 4 out of 5


[32m[I 2021-10-02 01:40:35,536][0m Trial 0 finished with value: 0.15039599727236305 and parameters: {'n_layers': 4, 'out_channels_l0': 64, 'kernel_size_l0': 15, 'dropout_l0': 0.3, 'out_channels_l1': 96, 'kernel_size_l1': 11, 'dropout_l1': 0, 'out_channels_l2': 256, 'kernel_size_l2': 5, 'dropout_l2': 0, 'out_channels_l3': 256, 'kernel_size_l3': 11, 'dropout_l3': 0.5, 'optimizer': 'Nadam', 'lr': 0.00043511212493037684, 'weight_decay': 0.009350404044320703}. Best is trial 0 with value: 0.15039599727236305.[0m


EarlyStopping counter: 5 out of 5
Early stopping the training


Epochs:   0%|          | 0/100 [00:00<?, ?it/s]

EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 4 out of 5


[32m[I 2021-10-02 01:46:38,755][0m Trial 1 finished with value: 0.13511215296929585 and parameters: {'n_layers': 4, 'out_channels_l0': 32, 'kernel_size_l0': 5, 'dropout_l0': 0.3, 'out_channels_l1': 32, 'kernel_size_l1': 15, 'dropout_l1': 0.4, 'out_channels_l2': 64, 'kernel_size_l2': 5, 'dropout_l2': 0.5, 'out_channels_l3': 128, 'kernel_size_l3': 11, 'dropout_l3': 0, 'optimizer': 'RMSprop', 'lr': 0.0002340749727282356, 'weight_decay': 0.0037992147096698042}. Best is trial 0 with value: 0.15039599727236305.[0m


EarlyStopping counter: 5 out of 5
Early stopping the training


Epochs:   0%|          | 0/100 [00:00<?, ?it/s]

EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 4 out of 5


[32m[I 2021-10-02 01:53:11,794][0m Trial 2 finished with value: 0.13615643472786332 and parameters: {'n_layers': 1, 'out_channels_l0': 64, 'kernel_size_l0': 15, 'dropout_l0': 0.4, 'optimizer': 'Nadam', 'lr': 0.000919321652340543, 'weight_decay': 0.018459367261671456}. Best is trial 0 with value: 0.15039599727236305.[0m


EarlyStopping counter: 5 out of 5
Early stopping the training
Study statistics: 
  Number of finished trials:  3
  Number of pruned trials:  0
  Number of complete trials:  3
Best trial:
  Value:  0.15039599727236305
  Params: 
    dropout_l0: 0.3
    dropout_l1: 0
    dropout_l2: 0
    dropout_l3: 0.5
    kernel_size_l0: 15
    kernel_size_l1: 11
    kernel_size_l2: 5
    kernel_size_l3: 11
    lr: 0.00043511212493037684
    n_layers: 4
    optimizer: Nadam
    out_channels_l0: 64
    out_channels_l1: 96
    out_channels_l2: 256
    out_channels_l3: 256
    weight_decay: 0.009350404044320703



Epochs:   0%|          | 0/100 [00:00<?, ?it/s]

EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 4 out of 5
EarlyStopping counter: 5 out of 5
Early stopping the training
AUPRC test score: 0.1850664837674507



3-FOLD CROSS-VALIDATION AUPRC TEST SCORE: 0.167


In [30]:
results_dict[cell_line][task][model.__name__] = kf_CV.scores_dict

In [31]:
with open ('results_dict.pickle', 'wb') as fout:
    pickle.dump(OrderedDict(results_dict), fout)

---
### 3. CNN-LSTM

In [None]:
model=CNN_LSTM

In [None]:
kf_CV = Kfold_CV()

kf_CV(build_dataloader_pipeline = pipe_data_load,
                num_epochs = 100,
                n_folds=3,
                cell_line=cell_line,
                task=task,
                sequence=True,
                augmentation=False,
                model = model,
                device = device,
                sampler = 'TPE',
                study_name = f'{cell_line}_{task}_{model.__name__}',
                hp_model_path = f'{cell_line}_{task}_{model.__name__}_HP.pt',
                test_model_path = f'{cell_line}_{task}_{model.__name__}_TEST.pt')

In [None]:
results_dict[cell_line][task][model.__name__] = kf_CV.scores_dict

In [None]:
with open ('results_dict.pickle', 'wb') as fout:
    pickle.dump(OrderedDict(results_dict), fout)

---

## 3) ACTIVE ENHANCERS vs ACTIVE PROMOTERS

In [32]:
task = TASKS[2]
task

'active_E_vs_active_P'

In [33]:
pipe_data_load = Build_DataLoader_Pipeline(path_name=f'{task}.pickle')

Data Preprocessing Done!




---
### 1. FFNN

In [None]:
model=FFNN

In [None]:
kf_CV = Kfold_CV()

kf_CV(build_dataloader_pipeline = pipe_data_load,
                num_epochs = 100,
                n_folds=3,
                cell_line=cell_line,
                sequence=False,
                augmentation=False,
                random_state=32,
                model = model,
                device = device,
                study_name = f'{cell_line}_{task}_{model.__name__}',
                hp_model_path = f'{cell_line}_{task}_{model.__name__}_HP.pt',
                test_model_path = f'{cell_line}_{task}_{model.__name__}_TEST.pt')

In [None]:
results_dict[cell_line][task][model.__name__] = kf_CV.scores_dict

In [None]:
with open ('results_dict.pickle', 'wb') as fout:
    pickle.dump(OrderedDict(results_dict), fout)

---
### 2. CNN

In [34]:
model=CNN

In [35]:
kf_CV = Kfold_CV()

kf_CV(build_dataloader_pipeline = pipe_data_load,
                num_epochs = 100,
                n_folds=3,
                cell_line=cell_line,
                task=task,
                sequence=True,
                augmentation=False,
                model = model,
                device = device,
                sampler = 'TPE',
                study_name = f'{cell_line}_{task}_{model.__name__}',
                hp_model_path = f'{cell_line}_{task}_{model.__name__}_HP.',
                test_model_path = f'{cell_line}_{task}_{model.__name__}_TEST')

  pruner=optuna.pruners.PatientPruner(optuna.pruners.MedianPruner(), patience=2),
[32m[I 2021-10-02 02:09:00,384][0m A new study created in RDB with name: HEK293_active_E_vs_active_P_CNN_1[0m


>>> ITERATION N. 1



Epochs:   0%|          | 0/100 [00:00<?, ?it/s]

EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 4 out of 5


[32m[I 2021-10-02 02:10:55,751][0m Trial 0 finished with value: 0.3416048237476809 and parameters: {'n_layers': 4, 'out_channels_l0': 32, 'kernel_size_l0': 11, 'dropout_l0': 0.3, 'out_channels_l1': 32, 'kernel_size_l1': 5, 'dropout_l1': 0, 'out_channels_l2': 64, 'kernel_size_l2': 11, 'dropout_l2': 0.5, 'out_channels_l3': 256, 'kernel_size_l3': 11, 'dropout_l3': 0.4, 'optimizer': 'RMSprop', 'lr': 0.0005172348566156131, 'weight_decay': 0.0004725984529469053}. Best is trial 0 with value: 0.3416048237476809.[0m


EarlyStopping counter: 5 out of 5
Early stopping the training


Epochs:   0%|          | 0/100 [00:00<?, ?it/s]

EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 4 out of 5


[32m[I 2021-10-02 02:13:53,277][0m Trial 1 finished with value: 0.770604796974354 and parameters: {'n_layers': 4, 'out_channels_l0': 64, 'kernel_size_l0': 5, 'dropout_l0': 0.3, 'out_channels_l1': 32, 'kernel_size_l1': 15, 'dropout_l1': 0.4, 'out_channels_l2': 256, 'kernel_size_l2': 15, 'dropout_l2': 0.4, 'out_channels_l3': 512, 'kernel_size_l3': 15, 'dropout_l3': 0.4, 'optimizer': 'Adam', 'lr': 0.003256589123571609, 'weight_decay': 0.04804195778057813}. Best is trial 1 with value: 0.770604796974354.[0m


EarlyStopping counter: 5 out of 5
Early stopping the training


Epochs:   0%|          | 0/100 [00:00<?, ?it/s]

EarlyStopping counter: 1 out of 5
EarlyStopping counter: 1 out of 5
EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 4 out of 5


[32m[I 2021-10-02 02:16:18,964][0m Trial 2 finished with value: 0.7403634698756568 and parameters: {'n_layers': 2, 'out_channels_l0': 32, 'kernel_size_l0': 15, 'dropout_l0': 0, 'out_channels_l1': 32, 'kernel_size_l1': 5, 'dropout_l1': 0, 'optimizer': 'Nadam', 'lr': 0.0008722628287783382, 'weight_decay': 0.007206544516534481}. Best is trial 1 with value: 0.770604796974354.[0m


EarlyStopping counter: 5 out of 5
Early stopping the training
Study statistics: 
  Number of finished trials:  3
  Number of pruned trials:  0
  Number of complete trials:  3
Best trial:
  Value:  0.770604796974354
  Params: 
    dropout_l0: 0.3
    dropout_l1: 0.4
    dropout_l2: 0.4
    dropout_l3: 0.4
    kernel_size_l0: 5
    kernel_size_l1: 15
    kernel_size_l2: 15
    kernel_size_l3: 15
    lr: 0.003256589123571609
    n_layers: 4
    optimizer: Adam
    out_channels_l0: 64
    out_channels_l1: 32
    out_channels_l2: 256
    out_channels_l3: 512
    weight_decay: 0.04804195778057813



Epochs:   0%|          | 0/100 [00:00<?, ?it/s]

EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 4 out of 5


  pruner=optuna.pruners.PatientPruner(optuna.pruners.MedianPruner(), patience=2),
[32m[I 2021-10-02 02:20:43,443][0m A new study created in RDB with name: HEK293_active_E_vs_active_P_CNN_1_2[0m


EarlyStopping counter: 5 out of 5
Early stopping the training
AUPRC test score: 0.6975254467516503


>>> ITERATION N. 2



Epochs:   0%|          | 0/100 [00:00<?, ?it/s]

EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 4 out of 5
EarlyStopping counter: 1 out of 5
EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 4 out of 5


[32m[I 2021-10-02 02:26:07,122][0m Trial 0 finished with value: 0.7898011216336006 and parameters: {'n_layers': 4, 'out_channels_l0': 64, 'kernel_size_l0': 11, 'dropout_l0': 0.2, 'out_channels_l1': 32, 'kernel_size_l1': 5, 'dropout_l1': 0.5, 'out_channels_l2': 256, 'kernel_size_l2': 11, 'dropout_l2': 0, 'out_channels_l3': 256, 'kernel_size_l3': 15, 'dropout_l3': 0.5, 'optimizer': 'Adam', 'lr': 0.0018157676164454644, 'weight_decay': 0.00034867882951880114}. Best is trial 0 with value: 0.7898011216336006.[0m


EarlyStopping counter: 5 out of 5
Early stopping the training


Epochs:   0%|          | 0/100 [00:00<?, ?it/s]

EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 4 out of 5


[32m[I 2021-10-02 02:28:03,010][0m Trial 1 finished with value: 0.5373633241680079 and parameters: {'n_layers': 1, 'out_channels_l0': 32, 'kernel_size_l0': 5, 'dropout_l0': 0.2, 'optimizer': 'RMSprop', 'lr': 0.0033362895713983055, 'weight_decay': 0.00016246619876023692}. Best is trial 0 with value: 0.7898011216336006.[0m


EarlyStopping counter: 5 out of 5
Early stopping the training


Epochs:   0%|          | 0/100 [00:00<?, ?it/s]

EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 4 out of 5


[32m[I 2021-10-02 02:29:16,192][0m Trial 2 finished with value: 0.7459951435011315 and parameters: {'n_layers': 2, 'out_channels_l0': 16, 'kernel_size_l0': 15, 'dropout_l0': 0, 'out_channels_l1': 96, 'kernel_size_l1': 15, 'dropout_l1': 0, 'optimizer': 'Adam', 'lr': 2.446564053917732e-05, 'weight_decay': 0.00042505427953386276}. Best is trial 0 with value: 0.7898011216336006.[0m


EarlyStopping counter: 5 out of 5
Early stopping the training
Study statistics: 
  Number of finished trials:  3
  Number of pruned trials:  0
  Number of complete trials:  3
Best trial:
  Value:  0.7898011216336006
  Params: 
    dropout_l0: 0.2
    dropout_l1: 0.5
    dropout_l2: 0
    dropout_l3: 0.5
    kernel_size_l0: 11
    kernel_size_l1: 5
    kernel_size_l2: 11
    kernel_size_l3: 15
    lr: 0.0018157676164454644
    n_layers: 4
    optimizer: Adam
    out_channels_l0: 64
    out_channels_l1: 32
    out_channels_l2: 256
    out_channels_l3: 256
    weight_decay: 0.00034867882951880114



Epochs:   0%|          | 0/100 [00:00<?, ?it/s]

EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 4 out of 5


  pruner=optuna.pruners.PatientPruner(optuna.pruners.MedianPruner(), patience=2),
[32m[I 2021-10-02 02:34:31,884][0m A new study created in RDB with name: HEK293_active_E_vs_active_P_CNN_1_2_3[0m


EarlyStopping counter: 5 out of 5
Early stopping the training
AUPRC test score: 0.7057594755333825


>>> ITERATION N. 3



Epochs:   0%|          | 0/100 [00:00<?, ?it/s]

EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 4 out of 5


[32m[I 2021-10-02 02:36:23,864][0m Trial 0 finished with value: 0.5199626755894378 and parameters: {'n_layers': 4, 'out_channels_l0': 16, 'kernel_size_l0': 5, 'dropout_l0': 0, 'out_channels_l1': 96, 'kernel_size_l1': 11, 'dropout_l1': 0, 'out_channels_l2': 64, 'kernel_size_l2': 5, 'dropout_l2': 0, 'out_channels_l3': 128, 'kernel_size_l3': 15, 'dropout_l3': 0, 'optimizer': 'Adam', 'lr': 0.0014518893181585247, 'weight_decay': 0.0003793346291951941}. Best is trial 0 with value: 0.5199626755894378.[0m


EarlyStopping counter: 5 out of 5
Early stopping the training


Epochs:   0%|          | 0/100 [00:00<?, ?it/s]

EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 4 out of 5


[32m[I 2021-10-02 02:37:34,714][0m Trial 1 finished with value: 0.6806505749636211 and parameters: {'n_layers': 1, 'out_channels_l0': 32, 'kernel_size_l0': 11, 'dropout_l0': 0.2, 'optimizer': 'Nadam', 'lr': 0.01102311917927527, 'weight_decay': 0.00406789022283829}. Best is trial 1 with value: 0.6806505749636211.[0m


EarlyStopping counter: 5 out of 5
Early stopping the training


Epochs:   0%|          | 0/100 [00:00<?, ?it/s]

EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 4 out of 5


[32m[I 2021-10-02 02:38:40,462][0m Trial 2 finished with value: 0.6657845210609502 and parameters: {'n_layers': 1, 'out_channels_l0': 16, 'kernel_size_l0': 15, 'dropout_l0': 0.3, 'optimizer': 'Nadam', 'lr': 1.7337110473716872e-05, 'weight_decay': 0.00017521267344899094}. Best is trial 1 with value: 0.6806505749636211.[0m


EarlyStopping counter: 5 out of 5
Early stopping the training
Study statistics: 
  Number of finished trials:  3
  Number of pruned trials:  0
  Number of complete trials:  3
Best trial:
  Value:  0.6806505749636211
  Params: 
    dropout_l0: 0.2
    kernel_size_l0: 11
    lr: 0.01102311917927527
    n_layers: 1
    optimizer: Nadam
    out_channels_l0: 32
    weight_decay: 0.00406789022283829



Epochs:   0%|          | 0/100 [00:00<?, ?it/s]

EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 4 out of 5
EarlyStopping counter: 5 out of 5
Early stopping the training
AUPRC test score: 0.697818353130553



3-FOLD CROSS-VALIDATION AUPRC TEST SCORE: 0.70037


In [36]:
results_dict[cell_line][task][model.__name__] = kf_CV.scores_dict

In [37]:
with open ('results_dict.pickle', 'wb') as fout:
    pickle.dump(OrderedDict(results_dict), fout)

---

## 4) INACTIVE ENHANCERS vs INACTIVE PROMOTERS

In [38]:
task = TASKS[3]
task

'inactive_E_vs_inactive_P'

In [39]:
pipe_data_load = Build_DataLoader_Pipeline(path_name=f'{task}.pickle')

Data Preprocessing Done!




---
### 1. FFNN

In [None]:
model=FFNN

In [None]:
kf_CV = Kfold_CV()

kf_CV(build_dataloader_pipeline = pipe_data_load,
                num_epochs = 100,
                n_folds=3,
                cell_line=cell_line,
                sequence=False,
                augmentation=False,
                model = model,
                device = device,
                study_name = f'{cell_line}_{task}_{model.__name__}',
                hp_model_path = f'{cell_line}_{task}_{model.__name__}_HP.pt',
                test_model_path = f'{cell_line}_{task}_{model.__name__}_TEST.pt')

In [None]:
results_dict[cell_line][task][model.__name__] = kf_CV.scores_dict

In [None]:
with open ('results_dict.pickle', 'wb') as fout:
    pickle.dump(OrderedDict(results_dict), fout)

---
### 2. CNN

In [40]:
model=CNN

In [41]:
kf_CV = Kfold_CV()

kf_CV(build_dataloader_pipeline = pipe_data_load,
                num_epochs = 100,
                n_folds=3,
                cell_line=cell_line,
                task=task,
                sequence=True,
                augmentation=False,
                model = model,
                device = device,
                sampler = 'TPE',
                study_name = f'{cell_line}_{task}_{model.__name__}',
                hp_model_path = f'{cell_line}_{task}_{model.__name__}_HP',
                test_model_path = f'{cell_line}_{task}_{model.__name__}_TEST')

  pruner=optuna.pruners.PatientPruner(optuna.pruners.MedianPruner(), patience=2),
[32m[I 2021-10-02 02:40:43,041][0m A new study created in RDB with name: HEK293_inactive_E_vs_inactive_P_CNN_1[0m


>>> ITERATION N. 1



Epochs:   0%|          | 0/100 [00:00<?, ?it/s]

EarlyStopping counter: 1 out of 5
EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 4 out of 5


[32m[I 2021-10-02 02:55:04,762][0m Trial 0 finished with value: 0.5875682201309683 and parameters: {'n_layers': 2, 'out_channels_l0': 64, 'kernel_size_l0': 11, 'dropout_l0': 0, 'out_channels_l1': 64, 'kernel_size_l1': 15, 'dropout_l1': 0.5, 'optimizer': 'RMSprop', 'lr': 0.0014793191250024433, 'weight_decay': 0.0003765887943208693}. Best is trial 0 with value: 0.5875682201309683.[0m


EarlyStopping counter: 5 out of 5
Early stopping the training


Epochs:   0%|          | 0/100 [00:00<?, ?it/s]

EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 4 out of 5


[32m[I 2021-10-02 03:03:22,170][0m Trial 1 finished with value: 0.46191943451372425 and parameters: {'n_layers': 2, 'out_channels_l0': 64, 'kernel_size_l0': 11, 'dropout_l0': 0, 'out_channels_l1': 32, 'kernel_size_l1': 5, 'dropout_l1': 0.4, 'optimizer': 'Nadam', 'lr': 0.07748557830470171, 'weight_decay': 0.0337589453595448}. Best is trial 0 with value: 0.5875682201309683.[0m


EarlyStopping counter: 5 out of 5
Early stopping the training


Epochs:   0%|          | 0/100 [00:00<?, ?it/s]

EarlyStopping counter: 1 out of 5
EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 4 out of 5


[32m[I 2021-10-02 03:14:10,327][0m Trial 2 finished with value: 0.5518515786439718 and parameters: {'n_layers': 2, 'out_channels_l0': 16, 'kernel_size_l0': 11, 'dropout_l0': 0.2, 'out_channels_l1': 32, 'kernel_size_l1': 11, 'dropout_l1': 0, 'optimizer': 'RMSprop', 'lr': 1.2467366574391965e-05, 'weight_decay': 0.01779208859437995}. Best is trial 0 with value: 0.5875682201309683.[0m


EarlyStopping counter: 5 out of 5
Early stopping the training
Study statistics: 
  Number of finished trials:  3
  Number of pruned trials:  0
  Number of complete trials:  3
Best trial:
  Value:  0.5875682201309683
  Params: 
    dropout_l0: 0
    dropout_l1: 0.5
    kernel_size_l0: 11
    kernel_size_l1: 15
    lr: 0.0014793191250024433
    n_layers: 2
    optimizer: RMSprop
    out_channels_l0: 64
    out_channels_l1: 64
    weight_decay: 0.0003765887943208693



Epochs:   0%|          | 0/100 [00:00<?, ?it/s]

EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 4 out of 5


  pruner=optuna.pruners.PatientPruner(optuna.pruners.MedianPruner(), patience=2),
[32m[I 2021-10-02 03:38:30,756][0m A new study created in RDB with name: HEK293_inactive_E_vs_inactive_P_CNN_1_2[0m


EarlyStopping counter: 5 out of 5
Early stopping the training
AUPRC test score: 0.5846348267929703


>>> ITERATION N. 2



Epochs:   0%|          | 0/100 [00:00<?, ?it/s]

EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 4 out of 5


[32m[I 2021-10-02 03:52:04,994][0m Trial 0 finished with value: 0.5790889988288377 and parameters: {'n_layers': 4, 'out_channels_l0': 32, 'kernel_size_l0': 15, 'dropout_l0': 0.3, 'out_channels_l1': 96, 'kernel_size_l1': 5, 'dropout_l1': 0, 'out_channels_l2': 96, 'kernel_size_l2': 15, 'dropout_l2': 0.4, 'out_channels_l3': 128, 'kernel_size_l3': 15, 'dropout_l3': 0.4, 'optimizer': 'Nadam', 'lr': 0.0014276226130755707, 'weight_decay': 0.0006602937008089652}. Best is trial 0 with value: 0.5790889988288377.[0m


EarlyStopping counter: 5 out of 5
Early stopping the training


Epochs:   0%|          | 0/100 [00:00<?, ?it/s]

EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 4 out of 5


[32m[I 2021-10-02 04:01:17,810][0m Trial 1 finished with value: 0.5747010387176346 and parameters: {'n_layers': 1, 'out_channels_l0': 64, 'kernel_size_l0': 5, 'dropout_l0': 0.2, 'optimizer': 'Nadam', 'lr': 0.0003703047333770227, 'weight_decay': 0.0012706494490601185}. Best is trial 0 with value: 0.5790889988288377.[0m


EarlyStopping counter: 5 out of 5
Early stopping the training


Epochs:   0%|          | 0/100 [00:00<?, ?it/s]

EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 4 out of 5


[32m[I 2021-10-02 04:14:18,136][0m Trial 2 finished with value: 0.39216220238095234 and parameters: {'n_layers': 1, 'out_channels_l0': 16, 'kernel_size_l0': 5, 'dropout_l0': 0.2, 'optimizer': 'Nadam', 'lr': 0.03387959779405331, 'weight_decay': 0.03605942048329626}. Best is trial 0 with value: 0.5790889988288377.[0m


EarlyStopping counter: 5 out of 5
Early stopping the training
Study statistics: 
  Number of finished trials:  3
  Number of pruned trials:  0
  Number of complete trials:  3
Best trial:
  Value:  0.5790889988288377
  Params: 
    dropout_l0: 0.3
    dropout_l1: 0
    dropout_l2: 0.4
    dropout_l3: 0.4
    kernel_size_l0: 15
    kernel_size_l1: 5
    kernel_size_l2: 15
    kernel_size_l3: 15
    lr: 0.0014276226130755707
    n_layers: 4
    optimizer: Nadam
    out_channels_l0: 32
    out_channels_l1: 96
    out_channels_l2: 96
    out_channels_l3: 128
    weight_decay: 0.0006602937008089652



Epochs:   0%|          | 0/100 [00:00<?, ?it/s]

EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 4 out of 5


  pruner=optuna.pruners.PatientPruner(optuna.pruners.MedianPruner(), patience=2),
[32m[I 2021-10-02 04:34:38,741][0m A new study created in RDB with name: HEK293_inactive_E_vs_inactive_P_CNN_1_2_3[0m


EarlyStopping counter: 5 out of 5
Early stopping the training
AUPRC test score: 0.5894249158758661


>>> ITERATION N. 3



Epochs:   0%|          | 0/100 [00:00<?, ?it/s]

EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 4 out of 5


[32m[I 2021-10-02 04:43:01,058][0m Trial 0 finished with value: 0.551413081128876 and parameters: {'n_layers': 2, 'out_channels_l0': 16, 'kernel_size_l0': 11, 'dropout_l0': 0.2, 'out_channels_l1': 64, 'kernel_size_l1': 5, 'dropout_l1': 0.4, 'optimizer': 'Adam', 'lr': 1.0767901988909999e-05, 'weight_decay': 0.00012669650397020672}. Best is trial 0 with value: 0.551413081128876.[0m


EarlyStopping counter: 5 out of 5
Early stopping the training


Epochs:   0%|          | 0/100 [00:00<?, ?it/s]

EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 4 out of 5


[32m[I 2021-10-02 04:55:25,601][0m Trial 1 finished with value: 0.47161540461975066 and parameters: {'n_layers': 3, 'out_channels_l0': 64, 'kernel_size_l0': 5, 'dropout_l0': 0, 'out_channels_l1': 96, 'kernel_size_l1': 5, 'dropout_l1': 0.4, 'out_channels_l2': 128, 'kernel_size_l2': 11, 'dropout_l2': 0, 'optimizer': 'RMSprop', 'lr': 0.058655539340234146, 'weight_decay': 0.00046975249775440555}. Best is trial 0 with value: 0.551413081128876.[0m


EarlyStopping counter: 5 out of 5
Early stopping the training


Epochs:   0%|          | 0/100 [00:00<?, ?it/s]

EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 4 out of 5


[32m[I 2021-10-02 05:12:07,287][0m Trial 2 finished with value: 0.39998992787157167 and parameters: {'n_layers': 3, 'out_channels_l0': 32, 'kernel_size_l0': 11, 'dropout_l0': 0.2, 'out_channels_l1': 64, 'kernel_size_l1': 11, 'dropout_l1': 0.4, 'out_channels_l2': 256, 'kernel_size_l2': 15, 'dropout_l2': 0, 'optimizer': 'Nadam', 'lr': 3.587678379348472e-05, 'weight_decay': 0.00010737479902211135}. Best is trial 0 with value: 0.551413081128876.[0m


EarlyStopping counter: 5 out of 5
Early stopping the training
Study statistics: 
  Number of finished trials:  3
  Number of pruned trials:  0
  Number of complete trials:  3
Best trial:
  Value:  0.551413081128876
  Params: 
    dropout_l0: 0.2
    dropout_l1: 0.4
    kernel_size_l0: 11
    kernel_size_l1: 5
    lr: 1.0767901988909999e-05
    n_layers: 2
    optimizer: Adam
    out_channels_l0: 16
    out_channels_l1: 64
    weight_decay: 0.00012669650397020672



Epochs:   0%|          | 0/100 [00:00<?, ?it/s]

EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 4 out of 5
EarlyStopping counter: 5 out of 5
Early stopping the training
AUPRC test score: 0.554318851393717



3-FOLD CROSS-VALIDATION AUPRC TEST SCORE: 0.57613


In [42]:
results_dict[cell_line][task][model.__name__] = kf_CV.scores_dict

In [43]:
with open ('results_dict.pickle', 'wb') as fout:
    pickle.dump(OrderedDict(results_dict), fout)

---

## 5) ACTIVE ENHANCERS + ACTIVE PROMOTERS vs INACTIVE REST

In [44]:
task = TASKS[4]
task

'active_EP_vs_inactive_rest'

In [45]:
pipe_data_load = Build_DataLoader_Pipeline(path_name=f'{task}.pickle')

Data Preprocessing Done!




---
### 1. FFNN

In [None]:
kf_CV = Kfold_CV()

kf_CV(build_dataloader_pipeline = pipe_data_load,
                num_epochs = 100,
                n_folds=3,
                cell_line=cell_line,
                sequence=False,
                augmentation=False,
                model = model,
                device = device,
                study_name = f'{cell_line}_{task}_{model.__name__}',
                hp_model_path = f'{cell_line}_{task}_{model.__name__}_HP.pt',
                test_model_path = f'{cell_line}_{task}_{model.__name__}_TEST.pt')

In [None]:
results_dict[cell_line][task][model.__name__] = kf_CV.scores_dict

In [None]:
with open ('results_dict.pickle', 'wb') as fout:
    pickle.dump(OrderedDict(results_dict), fout)

---
### 2. CNN

In [None]:
model=CNN

In [46]:
kf_CV = Kfold_CV()

kf_CV(build_dataloader_pipeline = pipe_data_load,
                num_epochs = 100,
                n_folds=3,
                cell_line=cell_line,
                task=task,
                sequence=True,
                augmentation=False,
                model = model,
                device = device,
                sampler = 'TPE',
                study_name = f'{cell_line}_{task}_{model.__name__}',
                hp_model_path = f'{cell_line}_{task}_{model.__name__}_HP',
                test_model_path = f'{cell_line}_{task}_{model.__name__}_TEST')

  pruner=optuna.pruners.PatientPruner(optuna.pruners.MedianPruner(), patience=2),
[32m[I 2021-10-02 05:25:28,695][0m A new study created in RDB with name: HEK293_active_EP_vs_inactive_rest_CNN_1[0m


>>> ITERATION N. 1



Epochs:   0%|          | 0/100 [00:00<?, ?it/s]

EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 4 out of 5


[32m[I 2021-10-02 05:35:03,663][0m Trial 0 finished with value: 0.11765738498789352 and parameters: {'n_layers': 2, 'out_channels_l0': 16, 'kernel_size_l0': 5, 'dropout_l0': 0.4, 'out_channels_l1': 64, 'kernel_size_l1': 11, 'dropout_l1': 0.4, 'optimizer': 'Adam', 'lr': 0.05889370551019785, 'weight_decay': 0.004895775338496469}. Best is trial 0 with value: 0.11765738498789352.[0m


EarlyStopping counter: 5 out of 5
Early stopping the training


Epochs:   0%|          | 0/100 [00:00<?, ?it/s]

EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 4 out of 5


[32m[I 2021-10-02 06:03:16,112][0m Trial 1 finished with value: 0.11759173030359474 and parameters: {'n_layers': 4, 'out_channels_l0': 64, 'kernel_size_l0': 15, 'dropout_l0': 0.4, 'out_channels_l1': 96, 'kernel_size_l1': 11, 'dropout_l1': 0, 'out_channels_l2': 256, 'kernel_size_l2': 5, 'dropout_l2': 0.5, 'out_channels_l3': 128, 'kernel_size_l3': 5, 'dropout_l3': 0.4, 'optimizer': 'Adam', 'lr': 1.4323626369719555e-05, 'weight_decay': 0.0725499288971147}. Best is trial 0 with value: 0.11765738498789352.[0m


EarlyStopping counter: 5 out of 5
Early stopping the training


Epochs:   0%|          | 0/100 [00:00<?, ?it/s]

EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 4 out of 5


[32m[I 2021-10-02 06:14:27,133][0m Trial 2 finished with value: 0.11863148794386562 and parameters: {'n_layers': 1, 'out_channels_l0': 32, 'kernel_size_l0': 11, 'dropout_l0': 0, 'optimizer': 'Adam', 'lr': 1.724907740895119e-05, 'weight_decay': 0.04403562160783262}. Best is trial 2 with value: 0.11863148794386562.[0m


EarlyStopping counter: 5 out of 5
Early stopping the training
Study statistics: 
  Number of finished trials:  3
  Number of pruned trials:  0
  Number of complete trials:  3
Best trial:
  Value:  0.11863148794386562
  Params: 
    dropout_l0: 0
    kernel_size_l0: 11
    lr: 1.724907740895119e-05
    n_layers: 1
    optimizer: Adam
    out_channels_l0: 32
    weight_decay: 0.04403562160783262



Epochs:   0%|          | 0/100 [00:00<?, ?it/s]

EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 4 out of 5


  pruner=optuna.pruners.PatientPruner(optuna.pruners.MedianPruner(), patience=2),
[32m[I 2021-10-02 06:31:24,674][0m A new study created in RDB with name: HEK293_active_EP_vs_inactive_rest_CNN_1_2[0m


EarlyStopping counter: 5 out of 5
Early stopping the training
AUPRC test score: 0.12442547852474324


>>> ITERATION N. 2



Epochs:   0%|          | 0/100 [00:00<?, ?it/s]

EarlyStopping counter: 1 out of 5
EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 4 out of 5


[32m[I 2021-10-02 06:59:00,145][0m Trial 0 finished with value: 0.12278403799590237 and parameters: {'n_layers': 4, 'out_channels_l0': 64, 'kernel_size_l0': 15, 'dropout_l0': 0.3, 'out_channels_l1': 96, 'kernel_size_l1': 15, 'dropout_l1': 0.4, 'out_channels_l2': 256, 'kernel_size_l2': 11, 'dropout_l2': 0.5, 'out_channels_l3': 256, 'kernel_size_l3': 15, 'dropout_l3': 0, 'optimizer': 'RMSprop', 'lr': 0.00012509716440146348, 'weight_decay': 0.006725193604002217}. Best is trial 0 with value: 0.12278403799590237.[0m


EarlyStopping counter: 5 out of 5
Early stopping the training


Epochs:   0%|          | 0/100 [00:00<?, ?it/s]

EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 4 out of 5


[32m[I 2021-10-02 07:10:47,494][0m Trial 1 finished with value: 0.12448113942020868 and parameters: {'n_layers': 3, 'out_channels_l0': 64, 'kernel_size_l0': 5, 'dropout_l0': 0.4, 'out_channels_l1': 96, 'kernel_size_l1': 5, 'dropout_l1': 0.5, 'out_channels_l2': 96, 'kernel_size_l2': 11, 'dropout_l2': 0.5, 'optimizer': 'Nadam', 'lr': 0.0002512269075709799, 'weight_decay': 0.0005146365692615669}. Best is trial 1 with value: 0.12448113942020868.[0m


EarlyStopping counter: 5 out of 5
Early stopping the training


Epochs:   0%|          | 0/100 [00:00<?, ?it/s]

EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 4 out of 5


[32m[I 2021-10-02 07:20:20,825][0m Trial 2 finished with value: 0.17666618468057904 and parameters: {'n_layers': 2, 'out_channels_l0': 16, 'kernel_size_l0': 11, 'dropout_l0': 0.4, 'out_channels_l1': 64, 'kernel_size_l1': 5, 'dropout_l1': 0.4, 'optimizer': 'Adam', 'lr': 0.00018727578989975044, 'weight_decay': 0.002728103802992829}. Best is trial 2 with value: 0.17666618468057904.[0m


EarlyStopping counter: 5 out of 5
Early stopping the training
Study statistics: 
  Number of finished trials:  3
  Number of pruned trials:  0
  Number of complete trials:  3
Best trial:
  Value:  0.17666618468057904
  Params: 
    dropout_l0: 0.4
    dropout_l1: 0.4
    kernel_size_l0: 11
    kernel_size_l1: 5
    lr: 0.00018727578989975044
    n_layers: 2
    optimizer: Adam
    out_channels_l0: 16
    out_channels_l1: 64
    weight_decay: 0.002728103802992829



Epochs:   0%|          | 0/100 [00:00<?, ?it/s]

EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 4 out of 5


  pruner=optuna.pruners.PatientPruner(optuna.pruners.MedianPruner(), patience=2),
[32m[I 2021-10-02 07:34:47,778][0m A new study created in RDB with name: HEK293_active_EP_vs_inactive_rest_CNN_1_2_3[0m


EarlyStopping counter: 5 out of 5
Early stopping the training
AUPRC test score: 0.17836668822038831


>>> ITERATION N. 3



Epochs:   0%|          | 0/100 [00:00<?, ?it/s]

EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 4 out of 5


[32m[I 2021-10-02 07:45:49,490][0m Trial 0 finished with value: 0.12226777813979516 and parameters: {'n_layers': 3, 'out_channels_l0': 32, 'kernel_size_l0': 5, 'dropout_l0': 0.4, 'out_channels_l1': 32, 'kernel_size_l1': 11, 'dropout_l1': 0, 'out_channels_l2': 64, 'kernel_size_l2': 5, 'dropout_l2': 0, 'optimizer': 'Nadam', 'lr': 0.003211958210465616, 'weight_decay': 0.0003660619325397926}. Best is trial 0 with value: 0.12226777813979516.[0m


EarlyStopping counter: 5 out of 5
Early stopping the training


Epochs:   0%|          | 0/100 [00:00<?, ?it/s]

EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 4 out of 5


[32m[I 2021-10-02 07:55:52,497][0m Trial 1 finished with value: 0.15470304237009863 and parameters: {'n_layers': 2, 'out_channels_l0': 32, 'kernel_size_l0': 15, 'dropout_l0': 0.3, 'out_channels_l1': 96, 'kernel_size_l1': 5, 'dropout_l1': 0, 'optimizer': 'RMSprop', 'lr': 7.887983736378855e-05, 'weight_decay': 0.026545012591882536}. Best is trial 1 with value: 0.15470304237009863.[0m


EarlyStopping counter: 5 out of 5
Early stopping the training


Epochs:   0%|          | 0/100 [00:00<?, ?it/s]

EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 4 out of 5


[32m[I 2021-10-02 08:17:00,614][0m Trial 2 finished with value: 0.12047619047619053 and parameters: {'n_layers': 4, 'out_channels_l0': 16, 'kernel_size_l0': 11, 'dropout_l0': 0.3, 'out_channels_l1': 96, 'kernel_size_l1': 11, 'dropout_l1': 0.5, 'out_channels_l2': 96, 'kernel_size_l2': 11, 'dropout_l2': 0.4, 'out_channels_l3': 256, 'kernel_size_l3': 15, 'dropout_l3': 0, 'optimizer': 'Adam', 'lr': 0.0019583020710715222, 'weight_decay': 0.04489594159264728}. Best is trial 1 with value: 0.15470304237009863.[0m


EarlyStopping counter: 5 out of 5
Early stopping the training
Study statistics: 
  Number of finished trials:  3
  Number of pruned trials:  0
  Number of complete trials:  3
Best trial:
  Value:  0.15470304237009863
  Params: 
    dropout_l0: 0.3
    dropout_l1: 0
    kernel_size_l0: 15
    kernel_size_l1: 5
    lr: 7.887983736378855e-05
    n_layers: 2
    optimizer: RMSprop
    out_channels_l0: 32
    out_channels_l1: 96
    weight_decay: 0.026545012591882536



Epochs:   0%|          | 0/100 [00:00<?, ?it/s]

EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 4 out of 5
EarlyStopping counter: 5 out of 5
Early stopping the training
AUPRC test score: 0.15223539197959984



3-FOLD CROSS-VALIDATION AUPRC TEST SCORE: 0.15168


In [47]:
results_dict[cell_line][task][model.__name__] = kf_CV.scores_dict

In [48]:
with open ('results_dict.pickle', 'wb') as fout:
    pickle.dump(OrderedDict(results_dict), fout)