In [1]:
# import os
# os.chdir('basenji')

## basenji
## ├── data
## │   └── MDA_MB_231_LM2.entire-gene.is-exon.dataset.required-cols.pkl
## ├── model.py
## ├── modules.py
## └── train-basenji.ipynb

In [2]:
import torch
import numpy as np
import random

from model import Basenji
from modules import Trainer

In [3]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'

random_seed = 2022
random.seed(random_seed)
np.random.seed(random_seed)
torch.manual_seed(random_seed)

num_threads = 4
torch.set_num_threads(num_threads)

In [4]:
data_path = 'data/MDA_MB_231_LM2.entire-gene.is-exon.dataset.required-cols.pkl'

In [5]:
param_vals = { 
    "mode": "regression", # "classification",
    "optimizer" : "Adam", 
    "init_lr": 0.001, 
    "optimizer_momentum": 0.9, 
    "weight_decay": 1e-3, 
    "loss": "binomial", # "bce",
    "loss_reduction": "mean",
    "num_targets": 1,
    "seq_len": 2**15, # 128*128*8,
    "min_seq_len": 2**12,
    "max_seq_len": 2**16,
    "target_window": 1024*32,
    "batch_size": 64, # 8
    "train_frac": 0.8, # "cut": 0.8,
    "val_frac": 0.1,
    "shuffle_data": False,
    "num_workers": 4, # 8,
    "num_epochs": 100,
}

In [6]:
model = Basenji(debug=False, max_seq_len = param_vals['max_seq_len'])
model.compile(device)

In [None]:
trainer = Trainer(param_vals, model, data_path)
trainer.train(debug=False)