### Modelling

In [1]:
import torch.nn as nn
import pickle
from scipy import sparse
import torch.optim as optim
import pytorch_lightning as pl
import logging
from torch.utils.data import TensorDataset, DataLoader
from dnn import BaseNet
from datamodule import DataModule

from pytorch_lightning.trainer import Trainer
from pytorch_lightning.callbacks import TQDMProgressBar, EarlyStopping
from pytorch_lightning.loggers import TensorBoardLogger

  from .autonotebook import tqdm as notebook_tqdm


#### Load the Data

In [2]:
paths = {
    'multiome': 
    {        
        'x': '/arc/project/st-jiaruid-1/yinian/multiome/svd-comp/train_input_multiome_svd128.pkl',
        'y': '/arc/project/st-jiaruid-1/yinian/multiome/sparse-data/train_multi_targets_values.sparse.npz',
        'x_test': '/arc/project/st-jiaruid-1/yinian/multiome/svd-comp/test_input_multiome_svd128.pkl',
        'x_cols': '/arc/project/st-jiaruid-1/yinian/multiome/sparse-data/train_multi_inputs_idxcol.npz',
        'y_cols': '/arc/project/st-jiaruid-1/yinian/multiome/sparse-data/train_multi_targets_idxcol.npz',
        'x_test_cols': '/arc/project/st-jiaruid-1/yinian/multiome/sparse-data/test_multi_inputs_idxcol.npz'
    },
    'cite':
    {
        'x': '/arc/project/st-jiaruid-1/yinian/multiome/svd-comp/train_input_cite_svd128.pkl',
        'y': '/arc/project/st-jiaruid-1/yinian/multiome/sparse-data/train_cite_targets_values.sparse.npz',
        'x_test': '/arc/project/st-jiaruid-1/yinian/multiome/svd-comp/test_input_cite_svd128.pkl',
        'x_cols': '/arc/project/st-jiaruid-1/yinian/multiome/sparse-data/train_cite_inputs_idxcol.npz',
        'y_cols': '/arc/project/st-jiaruid-1/yinian/multiome/sparse-data/train_cite_targets_idxcol.npz',
        'x_test_cols': '/arc/project/st-jiaruid-1/yinian/multiome/sparse-data/test_cite_inputs_idxcol.npz',
    }
}

In [3]:
modality = 'cite'

In [4]:
dm = DataModule(
    x_path = paths[modality]["x"],
    y_path = paths[modality]["y"],
    x_test_path = paths[modality]["x_test"],
    batch_size = 1000,
)

#### Random Architectures

In [5]:
model = BaseNet(input_dim = 128, output_dim = 140)

In [6]:
trainer = Trainer(
    logger=TensorBoardLogger(save_dir="output/temp"),
    callbacks=[
        TQDMProgressBar(refresh_rate=1000),
        EarlyStopping(monitor="val/pcc", mode="max", patience=20)
    ],
    max_epochs=2,
    num_sanity_val_steps=0,
)

GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs


In [7]:
trainer.fit(model, dm)


  | Name  | Type       | Params
-------------------------------------
0 | stack | Sequential | 739 K 
1 | loss  | MSELoss    | 0     
-------------------------------------
739 K     Trainable params
0         Non-trainable params
739 K     Total params
2.957     Total estimated model params size (MB)
  rank_zero_warn(
  rank_zero_warn(


Epoch 0:   0%|          | 0/72 [00:00<?, ?it/s] 
Validation: 0it [00:00, ?it/s][A
Validation:   0%|          | 0/15 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|          | 0/15 [00:00<?, ?it/s][A
Epoch 0: 100%|██████████| 72/72 [00:13<00:00,  5.22it/s, loss=4.78, v_num=13, val/pcc=0.779]
Epoch 1:   0%|          | 0/72 [00:00<?, ?it/s, loss=4.78, v_num=13, val/pcc=0.779, train/pcc=0.782]         
Validation: 0it [00:00, ?it/s][A
Validation:   0%|          | 0/15 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|          | 0/15 [00:00<?, ?it/s][A
Epoch 1: 100%|██████████| 72/72 [00:05<00:00, 12.19it/s, loss=4.22, v_num=13, val/pcc=0.826, train/pcc=0.782]
Epoch 1: 100%|██████████| 72/72 [00:05<00:00, 12.02it/s, loss=4.22, v_num=13, val/pcc=0.826, train/pcc=0.829]

`Trainer.fit` stopped: `max_epochs=2` reached.


Epoch 1: 100%|██████████| 72/72 [00:06<00:00, 11.94it/s, loss=4.22, v_num=13, val/pcc=0.826, train/pcc=0.829]


In [None]:
len(model.pcc_storage['train']), len(model.pcc_storage['val']) 