In [1]:
from pathlib import Path
import pickle


path = '/home/jovyan/RNAModif/rnamodif/util_notebooks/2022_chr_split.pickle'
with open(path, 'rb') as handle:
    split_dict = pickle.load(handle)

pos_files_5eu_chr1 = split_dict['5eu_2022_nia_chr1']
neg_files_5eu_chr1 = split_dict['UNM_2022_nia_chr1']

pos_files_5eu_chr2X = split_dict['5eu_2022_nia_chr2-X']
neg_files_5eu_chr2X = split_dict['UNM_2022_nia_chr2-X']

print('5eu_2022_nia_chr2-X', len(pos_files_5eu_chr2X))
print('UNM_2022_nia_chr2-X', len(neg_files_5eu_chr2X))

fiveEU_nanoid_path = Path('/home/jovyan/local_store/nanoid/20180514_1054_K562_5EU_1440_labeled_run')
pos_files_nanoid_24h = list(fiveEU_nanoid_path.rglob('*.fast5'))

fiveEU_nanoid_path_neg = Path('/home/jovyan/local_store/nanoid/20180403_1208_K562_5EU_0_unlabeled_III_run')
neg_files_nanoid = list(fiveEU_nanoid_path_neg.rglob('*.fast5'))

pos_2020_path = Path('/home/jovyan/local_store/store/seq/ont/experiments/20201016_hsa_dRNASeq_HeLa_5EU_polyA_REL5_short_1/runs')
pos_files_2020 = list(pos_2020_path.rglob('*.fast5'))

neg_2020_path = Path('/home/jovyan/local_store/store/seq/ont/experiments/20201016_hsa_dRNASeq_HeLa_dmso_polyA_REL5_short_1/runs')
neg_files_2020 = list(neg_2020_path.rglob('*.fast5'))

valid_exp_to_files_pos = {'5eu_2020_pos':pos_files_2020, 'Nanoid_pos':pos_files_nanoid_24h, '5eu_2022_chr1_pos':pos_files_5eu_chr1}
valid_exp_to_files_neg = {'UNM_2020':neg_files_2020, 'Nanoid_neg':neg_files_nanoid, '5eu_2022_chr1_neg':neg_files_5eu_chr1}

for k,v in valid_exp_to_files_pos.items():
    print(k, len(v))

for k,v in valid_exp_to_files_neg.items():
    print(k, len(v))



5eu_2022_nia_chr2-X 891131
UNM_2022_nia_chr2-X 1758977
5eu_2020_pos 150
Nanoid_pos 177088
5eu_2022_chr1_pos 73607
UNM_2020 203
Nanoid_neg 216906
5eu_2022_chr1_neg 176093


In [None]:
from rnamodif.architectures.rodan_seq_5eu import RodanPretrainedSeqcaller5eu
from rnamodif.data_utils.dataloading_5eu import nanopore_datamodule_5eu
from rnamodif.data_utils.split_methods import get_kfold_splits, get_fullvalid_split, get_valid_portions, get_5eu_chr_split
import pytorch_lightning as pl
from pytorch_lightning.loggers import CometLogger
from pytorch_lightning.callbacks import ModelCheckpoint


model = RodanPretrainedSeqcaller5eu(lr=1e-3, warmup_steps=3000, freeze=False, fr_layers=0, gru_layers=1, gru_dropout=0.5, gru_hidden=128)

#TODO make my files into multifast5 files for faster dataloading
dm = nanopore_datamodule_5eu(
    train_pos_files=pos_files_5eu_chr2X,
    train_neg_files=neg_files_5eu_chr2X,
    valid_exp_to_files_pos=valid_exp_to_files_pos,
    valid_exp_to_files_neg=valid_exp_to_files_neg,
    batch_size=64, 
    window=4096,
    per_dset_read_limit=250, 
    shuffle_valid=True,
    workers=16,
)

experiment_name = '5eu_2022_unfrozen_gru_dropout+pools'
checkpoint_callback = ModelCheckpoint(
    dirpath=f"/home/jovyan/RNAModif/rnamodif/checkpoints_pl/{experiment_name}", 
    save_top_k=2, 
    monitor="valid_loss", 
    save_last=True, 
    save_weights_only=False
)

logger = CometLogger(api_key="TEVQbgxxvilM1WdTyqZLJ57ac", project_name='RNAModif', experiment_name=experiment_name) 
trainer= pl.Trainer(
    max_steps = 1000000, logger=logger, accelerator='gpu',
    auto_lr_find=False, val_check_interval=1000,  
    log_every_n_steps=1000, benchmark=True, precision=16,
    callbacks=[checkpoint_callback],
    # resume_from_checkpoint=f'/home/jovyan/RNAModif/rnamodif/checkpoints_pl/{experiment_name}/lastX.ckpt'
)


trainer.fit(model, dm)

CometLogger will be initialized in online mode


FREEZING 0 layers


Using 16bit native Automatic Mixed Precision (AMP)
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs


Generating valid dataset


 17%|█▋        | 1/6 [00:01<00:08,  1.66s/it]

5eu_2020_pos 250


 33%|███▎      | 2/6 [00:25<00:57, 14.44s/it]

Nanoid_pos 250


 50%|█████     | 3/6 [00:36<00:38, 12.92s/it]

5eu_2022_chr1_pos 250


 67%|██████▋   | 4/6 [00:37<00:16,  8.40s/it]

UNM_2020 250


 83%|████████▎ | 5/6 [01:06<00:15, 15.73s/it]

Nanoid_neg 250


100%|██████████| 6/6 [01:30<00:00, 15.02s/it]

5eu_2022_chr1_neg 250



LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name            | Type              | Params
------------------------------------------------------
0 | trainable_rodan | network           | 10.7 M
1 | head            | Sequential        | 712 K 
2 | acc             | BinaryAccuracy    | 0     
3 | ce              | BCEWithLogitsLoss | 0     
------------------------------------------------------
11.4 M    Trainable params
0         Non-trainable params
11.4 M    Total params
22.765    Total estimated model params size (MB)


Sanity Checking: 0it [00:00, ?it/s]

  rank_zero_warn(


Found array with 0 sample(s) (shape=(0,)) while a minimum of 1 is required.
Found array with 0 sample(s) (shape=(0,)) while a minimum of 1 is required.
Only one class present in y_true. ROC AUC score is not defined in that case.
Only one class present in y_true. ROC AUC score is not defined in that case.
Found array with 0 sample(s) (shape=(0,)) while a minimum of 1 is required.
Found array with 0 sample(s) (shape=(0,)) while a minimum of 1 is required.




Training: 0it [00:00, ?it/s]

COMET INFO: Experiment is live on comet.ml https://www.comet.com/vlasta/rnamodif/c88cb0631ce042c9a23c7a40a037fbf0



Validation: 0it [00:00, ?it/s]



Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]