In [1]:
from pathlib import Path
from rnamodif.data_utils.data_paths import name_to_files

train_pos_files = name_to_files['nia_2022_pos']['train']
train_neg_files = name_to_files['nia_2022_neg']['train']

valid_exp_to_files_pos = {
    '5eu_2020_pos':name_to_files['nia_2020_pos']['test'], 
    'Nanoid_pos':name_to_files['nano_pos_1']['test'], 
    '5eu_2022_chr1_pos':name_to_files['nia_2022_pos']['test']
}

valid_exp_to_files_neg = {
    'UNM_2020':name_to_files['nia_2020_neg']['test'], 
    'Nanoid_neg':name_to_files['nano_neg_1']['test'], 
    '5eu_2022_chr1_neg':name_to_files['nia_2022_neg']['test']
}

In [4]:
from rnamodif.rodan_seq_5eu import RodanPretrained
from rnamodif.data_utils.dataloading_5eu import TrainingDatamodule
import pytorch_lightning as pl
from pytorch_lightning.loggers import CometLogger
from pytorch_lightning.callbacks import ModelCheckpoint


model = RodanPretrained(lr=1e-4, warmup_steps=3000, frozen_layers=0, gru_layers=1, gru_dropout=0.5, gru_hidden=32)

dm = TrainingDatamodule(
    train_pos_files=train_pos_files,
    train_neg_files=train_neg_files,
    valid_exp_to_files_pos=valid_exp_to_files_pos,
    valid_exp_to_files_neg=valid_exp_to_files_neg,
    batch_size=64, 
    window=4096,
    per_dset_read_limit=250, 
    shuffle_valid=True,
    workers=8,
)

experiment_name = '5eu_2022_unfrozen_gru32_lowLR_smakesplit'
checkpoint_callback = ModelCheckpoint(
    dirpath=f"/home/jovyan/RNAModif/rnamodif/checkpoints_pl/{experiment_name}", 
    save_top_k=2, 
    monitor="valid_loss", 
    save_last=True, 
    save_weights_only=False
)

logger = CometLogger(api_key="TEVQbgxxvilM1WdTyqZLJ57ac", project_name='RNAModif', experiment_name=experiment_name) 
trainer= pl.Trainer(
    max_steps = 1000000, logger=logger, accelerator='gpu',
    auto_lr_find=False, val_check_interval=1000,  
    log_every_n_steps=1000, benchmark=True, precision=16,
    callbacks=[checkpoint_callback],
    # resume_from_checkpoint=f'/home/jovyan/RNAModif/rnamodif/checkpoints_pl/{experiment_name}/lastX.ckpt'
)


trainer.fit(model, dm)

CometLogger will be initialized in online mode
Using 16bit native Automatic Mixed Precision (AMP)
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs


Generating valid dataset


 17%|█▋        | 1/6 [00:02<00:11,  2.23s/it]

5eu_2020_pos 250


 33%|███▎      | 2/6 [00:04<00:09,  2.47s/it]

Nanoid_pos 250


 50%|█████     | 3/6 [00:07<00:07,  2.38s/it]

5eu_2022_chr1_pos 250


 67%|██████▋   | 4/6 [00:09<00:04,  2.39s/it]

UNM_2020 250


 83%|████████▎ | 5/6 [00:12<00:02,  2.56s/it]

Nanoid_neg 250


100%|██████████| 6/6 [00:14<00:00,  2.41s/it]

5eu_2022_chr1_neg 250



LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name            | Type              | Params
------------------------------------------------------
0 | trainable_rodan | network           | 10.7 M
1 | head            | Sequential        | 159 K 
2 | acc             | BinaryAccuracy    | 0     
3 | ce              | BCEWithLogitsLoss | 0     
------------------------------------------------------
10.8 M    Trainable params
0         Non-trainable params
10.8 M    Total params
21.659    Total estimated model params size (MB)


Sanity Checking: 0it [00:00, ?it/s]

  rank_zero_warn(


Found array with 0 sample(s) (shape=(0,)) while a minimum of 1 is required.
Found array with 0 sample(s) (shape=(0,)) while a minimum of 1 is required.
Only one class present in y_true. ROC AUC score is not defined in that case.
Only one class present in y_true. ROC AUC score is not defined in that case.
Found array with 0 sample(s) (shape=(0,)) while a minimum of 1 is required.
Found array with 0 sample(s) (shape=(0,)) while a minimum of 1 is required.




Training: 0it [00:00, ?it/s]

  rank_zero_warn("Detected KeyboardInterrupt, attempting graceful shutdown...")
