In [1]:
%load_ext autoreload
%autoreload 2
%load_ext tensorboard

import sys
import os
module_path = os.path.abspath(os.path.join(os.pardir))
if module_path not in sys.path:
    sys.path.append(module_path)

In [2]:
from project.datasets import Dataset, CTRPDataModule
from project.film_model import FiLMNetwork

In [10]:
import pandas as pd
import numpy as np
import joblib
from pathlib import Path
from sklearn import model_selection
from pytorch_lightning import Trainer, seed_everything
from pytorch_lightning.callbacks import EarlyStopping

In [4]:
data_path = Path("../../film-gex-data/processed/")
input_cols = joblib.load(data_path.joinpath("input_cols.pkl"))
cond_cols = joblib.load(data_path.joinpath("cond_cols.pkl"))
data = pd.read_pickle(data_path.joinpath("train_sub.pkl.gz"))

In [5]:
n_splits = 5
target = "cpd_avg_pv"
group = "stripped_cell_line_name"

In [11]:
def cv():
    gkf = model_selection.GroupKFold(n_splits=n_splits)

    for fold, (train_idx, val_idx) in enumerate(gkf.split(X=data, y=data[target].to_numpy(), groups=data[group].to_numpy())):
        model_path = Path("l")
        train = data.iloc[train_idx]
        val = data.iloc[val_idx]

        dm = CTRPDataModule(train,
                            val,
                            input_cols,
                            cond_cols,
                            target=target,
                            batch_size=1280)
        model = FiLMNetwork(len(input_cols), len(cond_cols))
        trainer = Trainer(max_epochs=3, 
                          gpus=-1,
                          early_stop_callback=EarlyStopping(monitor='val_loss'),
                          distributed_backend='dp')
        trainer.fit(model, dm)
        joblib.dump(val_idx, "lightning_logs/version_{}/val_idx.fold_{}".format(fold, fold))

In [12]:
cv()

GPU available: True, used: True
TPU available: False, using: 0 TPU cores
CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  isetter(loc, value[:, i].tolist())
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  isetter(loc, value[:, i].tolist())

  | Name       | Type          | Params
---------------------------------------------
0 | metric     | R2Score       | 0     
1 | inputs_emb | LinearBlock   | 677 K 
2 | conds_emb  | LinearBlock   | 16 K  
3 | film_1     | FiLMGenerator | 2 K   
4 | block_1    | LinearBlock   | 864   


Epoch 0:  79%|███████▊  | 37/47 [00:07<00:01,  5.02it/s, loss=0.101, v_num=5]
Validating: 0it [00:00, ?it/s][A
Epoch 0:  81%|████████  | 38/47 [00:10<00:02,  3.69it/s, loss=0.101, v_num=5]
Epoch 0:  83%|████████▎ | 39/47 [00:10<00:02,  3.75it/s, loss=0.101, v_num=5]
Epoch 0:  87%|████████▋ | 41/47 [00:10<00:01,  3.87it/s, loss=0.101, v_num=5]
Epoch 0:  91%|█████████▏| 43/47 [00:10<00:01,  4.00it/s, loss=0.101, v_num=5]
Epoch 0:  96%|█████████▌| 45/47 [00:10<00:00,  4.11it/s, loss=0.101, v_num=5]
Epoch 0: 100%|██████████| 47/47 [00:11<00:00,  4.22it/s, loss=0.101, v_num=5]



Epoch 0: 100%|██████████| 47/47 [00:11<00:00,  4.07it/s, loss=0.101, v_num=5]
Epoch 1:  79%|███████▊  | 37/47 [00:07<00:02,  4.93it/s, loss=0.062, v_num=5]
Validating: 0it [00:00, ?it/s][A
Epoch 1:  81%|████████  | 38/47 [00:10<00:02,  3.64it/s, loss=0.062, v_num=5]
Epoch 1:  85%|████████▌ | 40/47 [00:10<00:01,  3.76it/s, loss=0.062, v_num=5]
Epoch 1:  89%|████████▉ | 42/47 [00:10<00:01,  3.88it/s, loss=0.062, v_num=5]
Epoch 1:  94%|█████████▎| 44/47 [00:11<00:00,  4.00it/s, loss=0.062, v_num=5]
Epoch 1:  98%|█████████▊| 46/47 [00:11<00:00,  4.11it/s, loss=0.062, v_num=5]
Validating: 100%|██████████| 10/10 [00:03<00:00,  1.75it/s][A



Epoch 1: 100%|██████████| 47/47 [00:11<00:00,  4.03it/s, loss=0.062, v_num=5]
Epoch 2:  79%|███████▊  | 37/47 [00:07<00:01,  5.09it/s, loss=0.053, v_num=5]
Validating: 0it [00:00, ?it/s][A
Epoch 2:  81%|████████  | 38/47 [00:10<00:02,  3.72it/s, loss=0.053, v_num=5]
Epoch 2:  85%|████████▌ | 40/47 [00:10<00:01,  3.85it/s, loss=0.053, v_num=5]
Epoch 2:  89%|████████▉ | 42/47 [00:10<00:01,  3.97it/s, loss=0.053, v_num=5]
Epoch 2:  94%|█████████▎| 44/47 [00:10<00:00,  4.09it/s, loss=0.053, v_num=5]
Epoch 2:  98%|█████████▊| 46/47 [00:10<00:00,  4.21it/s, loss=0.053, v_num=5]
Validating: 100%|██████████| 10/10 [00:03<00:00,  1.75it/s][A



Epoch 2: 100%|██████████| 47/47 [00:11<00:00,  4.14it/s, loss=0.053, v_num=5]
                                                           [A

Saving latest checkpoint..


Epoch 2: 100%|██████████| 47/47 [00:11<00:00,  4.14it/s, loss=0.053, v_num=5]


GPU available: True, used: True
TPU available: False, using: 0 TPU cores
CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  isetter(loc, value[:, i].tolist())
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  isetter(loc, value[:, i].tolist())

  | Name       | Type          | Params
---------------------------------------------
0 | metric     | R2Score       | 0     
1 | inputs_emb | LinearBlock   | 677 K 
2 | conds_emb  | LinearBlock   | 16 K  
3 | film_1     | FiLMGenerator | 2 K   
4 | block_1    | LinearBlock   | 864   


Epoch 0:  79%|███████▊  | 37/47 [00:07<00:02,  4.64it/s, loss=0.135, v_num=6]
Validating: 0it [00:00, ?it/s][A
Epoch 0:  81%|████████  | 38/47 [00:10<00:02,  3.47it/s, loss=0.135, v_num=6]
Epoch 0:  85%|████████▌ | 40/47 [00:11<00:01,  3.59it/s, loss=0.135, v_num=6]
Epoch 0:  89%|████████▉ | 42/47 [00:11<00:01,  3.71it/s, loss=0.135, v_num=6]
Epoch 0:  94%|█████████▎| 44/47 [00:11<00:00,  3.83it/s, loss=0.135, v_num=6]
Epoch 0:  98%|█████████▊| 46/47 [00:11<00:00,  3.94it/s, loss=0.135, v_num=6]



Epoch 0: 100%|██████████| 47/47 [00:12<00:00,  3.87it/s, loss=0.135, v_num=6]
Epoch 1:  79%|███████▊  | 37/47 [00:07<00:02,  4.99it/s, loss=0.064, v_num=6]
Validating: 0it [00:00, ?it/s][A
Epoch 1:  81%|████████  | 38/47 [00:10<00:02,  3.64it/s, loss=0.064, v_num=6]
Epoch 1:  85%|████████▌ | 40/47 [00:10<00:01,  3.76it/s, loss=0.064, v_num=6]
Epoch 1:  89%|████████▉ | 42/47 [00:10<00:01,  3.88it/s, loss=0.064, v_num=6]
Epoch 1:  94%|█████████▎| 44/47 [00:10<00:00,  4.00it/s, loss=0.064, v_num=6]
Epoch 1:  98%|█████████▊| 46/47 [00:11<00:00,  4.12it/s, loss=0.064, v_num=6]
Validating: 100%|██████████| 10/10 [00:03<00:00,  1.71it/s][A



Epoch 1: 100%|██████████| 47/47 [00:11<00:00,  4.04it/s, loss=0.064, v_num=6]
Epoch 2:  79%|███████▊  | 37/47 [00:07<00:02,  4.96it/s, loss=0.060, v_num=6]
Validating: 0it [00:00, ?it/s][A
Epoch 2:  81%|████████  | 38/47 [00:10<00:02,  3.64it/s, loss=0.060, v_num=6]
Validating:  20%|██        | 2/10 [00:03<00:16,  2.10s/it][A
Epoch 2:  85%|████████▌ | 40/47 [00:10<00:01,  3.73it/s, loss=0.060, v_num=6]
Epoch 2:  89%|████████▉ | 42/47 [00:10<00:01,  3.85it/s, loss=0.060, v_num=6]
Epoch 2:  94%|█████████▎| 44/47 [00:11<00:00,  3.97it/s, loss=0.060, v_num=6]
Epoch 2:  98%|█████████▊| 46/47 [00:11<00:00,  4.09it/s, loss=0.060, v_num=6]



Epoch 2: 100%|██████████| 47/47 [00:11<00:00,  4.00it/s, loss=0.060, v_num=6]
                                                          [A

Saving latest checkpoint..


Epoch 2: 100%|██████████| 47/47 [00:11<00:00,  4.00it/s, loss=0.060, v_num=6]


GPU available: True, used: True
TPU available: False, using: 0 TPU cores
CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  isetter(loc, value[:, i].tolist())
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  isetter(loc, value[:, i].tolist())

  | Name       | Type          | Params
---------------------------------------------
0 | metric     | R2Score       | 0     
1 | inputs_emb | LinearBlock   | 677 K 
2 | conds_emb  | LinearBlock   | 16 K  
3 | film_1     | FiLMGenerator | 2 K   
4 | block_1    | LinearBlock   | 864   


Epoch 0:  40%|████      | 19/47 [00:05<00:08,  3.50it/s, loss=0.831, v_num=7]

Saving latest checkpoint..


Epoch 0:  40%|████      | 19/47 [00:05<00:08,  3.29it/s, loss=0.831, v_num=7]


GPU available: True, used: True
TPU available: False, using: 0 TPU cores
CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]


KeyboardInterrupt: 

In [15]:
%tensorboard --logdir ./lightning_logs --port 9066

In [22]:
trainer.default_root_dir

'/srv/home/wconnell/github/film-gex/notebooks'

In [8]:
import torch

In [9]:
foo = torch.FloatTensor(val[cond_cols].to_numpy())
model.forward(foo)

tensor([[ -5.2721,  -5.0805,   1.7124,  ...,  -5.4074,   1.7200, -13.4092],
        [ -0.8518,   0.4557,  -1.1743,  ...,   0.7598,   1.3746,  -2.2434],
        [ -1.5418,   1.0390,  -1.1317,  ...,   1.3517,   2.7206,  -1.4814],
        ...,
        [ -1.4564,   1.6760,   0.2275,  ...,   1.7220,   1.8293,  -1.2928],
        [ -2.3295,  -1.4681,  -0.4503,  ...,  -2.5142,   2.1346,  -8.3439],
        [ -2.1074,  -1.6664,   0.6190,  ...,  -1.4646,   1.0616,  -6.0193]],
       grad_fn=<AddmmBackward>)