In [1]:
! pip install pytorch_lightning --upgrade

Collecting pytorch_lightning
  Downloading pytorch_lightning-1.3.1-py3-none-any.whl (805 kB)
[K     |████████████████████████████████| 805 kB 5.3 MB/s eta 0:00:01
Collecting pyDeprecate==0.3.0
  Downloading pyDeprecate-0.3.0-py3-none-any.whl (10 kB)
Collecting fsspec[http]>=2021.4.0
  Downloading fsspec-2021.4.0-py3-none-any.whl (108 kB)
[K     |████████████████████████████████| 108 kB 15.3 MB/s eta 0:00:01
Collecting tensorboard!=2.5.0,>=2.2.0
  Downloading tensorboard-2.4.1-py3-none-any.whl (10.6 MB)
[K     |████████████████████████████████| 10.6 MB 10.9 MB/s eta 0:00:01
Collecting torchmetrics>=0.2.0
  Downloading torchmetrics-0.3.2-py3-none-any.whl (274 kB)
[K     |████████████████████████████████| 274 kB 85.5 MB/s eta 0:00:01
Installing collected packages: pyDeprecate, fsspec, tensorboard, torchmetrics, pytorch-lightning
  Attempting uninstall: fsspec
    Found existing installation: fsspec 0.8.5
    Uninstalling fsspec-0.8.5:
      Successfully uninstalled fsspec-0.8.5
  Atte

In [20]:
import os
import time
import yaml
import shutil
import argparse
import tarfile
import random
import tempfile

import torch
import pytorch_lightning as ptl
from pytorch_lightning import Trainer

import boda


In [2]:
print(torch.__version__)
print(ptl.__version__)

1.7.1
1.3.1


In [3]:
def main(args):
    data_module = getattr(boda.data, args['Main args'].data_module)
    model_module= getattr(boda.model, args['Main args'].model_module)
    graph_module= getattr(boda.graph, args['Main args'].graph_module)

    data = data_module(**vars(data_module.process_args(args)))
    model= model_module(**vars(model_module.process_args(args)))

    model.__class__ = type(
        'BODA_module',
        (model_module,graph_module),
        vars(graph_module.process_args(args))
    )

    os.makedirs('/tmp/output/artifacts', exist_ok=True)
    trainer = Trainer.from_argparse_args(args['pl.Trainer'])
    
    trainer.fit(model, data)
    
    _save_model(data_module, model_module, graph_module, 
                model, trainer, args['Main args'])
    
    return data_module, model_module, graph_module, model, trainer, args

In [4]:
def _save_model(data_module, model_module, graph_module, 
                model, trainer, main_args):
    local_dir = '/tmp/output/artifacts'
    save_dict = {
        'data_module'  : data_module.__name__,
        'data_hparams' : data_module.process_args(args),
        'model_module' : model_module.__name__,
        'model_hparams': model_module.process_args(args),
        'graph_module' : graph_module.__name__,
        'graph_hparams': graph_module.process_args(args),
        'model_state_dict': model.state_dict(),
        'timestamp'    : time.strftime("%Y%m%d_%H%M%S")
    }
    torch.save(save_dict, os.path.join(local_dir,'torch_checkpoint.pt'))
    
    rtag = random.randint(1000,9999)
    filename=f'model_artifacts__{save_dict["timestamp"]}__{rtag}.tar.gz'
    with tarfile.open(os.path.join('/tmp/output/',filename), 'w:gz') as tar:
        tar.add(local_dir,arcname='artifacts')
    
    if 'gs://' in main_args.artifact_path:
        subprocess.check_call(
            ['gsutil', 'cp', os.path.join('/tmp/output/',filename), main_args.artifact_path]
        )
    else:
        os.makedirs(main_args.artifact_path, exist_ok=True)
        shutil.copy(os.path.join('/tmp/output/',filename), main_args.artifact_path)


In [18]:
def _save_model(data_module, model_module, graph_module, 
                model, trainer, args):
    local_dir = args['pl.Trainer'].default_root_dir
    save_dict = {
        'data_module'  : data_module.__name__,
        'data_hparams' : data_module.process_args(args),
        'model_module' : model_module.__name__,
        'model_hparams': model_module.process_args(args),
        'graph_module' : graph_module.__name__,
        'graph_hparams': graph_module.process_args(args),
        'model_state_dict': model.state_dict(),
        'timestamp'    : time.strftime("%Y%m%d_%H%M%S")
    }
    torch.save(save_dict, os.path.join(local_dir,'torch_checkpoint.pt'))
    
    rtag = random.randint(1000,9999)
    filename=f'model_artifacts__{save_dict["timestamp"]}__{rtag}.tar.gz'
    with tempfile.TemporaryDirectory() as tmpdirname:
        with tarfile.open(os.path.join(tmpdirname,filename), 'w:gz') as tar:
            tar.add(local_dir,arcname='artifacts')

        if 'gs://' in args['Main args'].artifact_path:
            subprocess.check_call(
                ['gsutil', 'cp', os.path.join(tmpdirname,filename), args['Main args'].artifact_path]
            )
        else:
            os.makedirs(args['Main args'].artifact_path, exist_ok=True)
            shutil.copy(os.path.join(tmpdirname,filename), args['Main args'].artifact_path)


In [5]:
def model_fn(model_dir):
    checkpoint = torch.load(os.path.join(model_dir,'torch_checkpoint.pt'))
    model_module = getattr(boda, checkpoint['model_module'])
    model        = model_module(**checkpoint['model_hparams'])
    model.load_state_dict(checkpoint['model_state_dict'])
    print(f'Loaded model from {checkpoint["timestamp"]}')
    return model


# Process runtime arguments

## Command line args to use for testing

In [17]:
cmd_str = '--data_module BODA2_DataModule ' +\
            '--dataFile_path  ../../data/BODA.MPRA.txt ' +\
            '--ValSize_pct  5 --TestSize_pct  5 ' +\
            '--batchSize  32 --paddedSeqLen 600 --numWorkers 1 ' +\
          '--model_module Basset ' +\
            '--n_outputs  3 --loss_criterion MSELoss ' +\
          '--graph_module CNNBasicTraining ' +\
          '--gpus 1 --min_epochs 5 --max_epochs 5 --default_root_dir /tmp/output/artifacts ' +\
          '--artifact_path /opt/ml/output '

'python /home/ubuntu/boda2/src/main.py ' + cmd_str

'python /home/ubuntu/boda2/src/main.py --data_module BODA2_DataModule --dataFile_path  ../../data/BODA.MPRA.txt --ValSize_pct  5 --TestSize_pct  5 --batchSize  32 --paddedSeqLen 600 --numWorkers 1 --model_module Basset --n_outputs  3 --loss_criterion MSELoss --graph_module CNNBasicTraining --gpus 1 --min_epochs 5 --max_epochs 5 --default_root_dir /tmp/output/artifacts --artifact_path /opt/ml/output '

## Set base args for script

Basic arguments to identify which submodules are used and where data will be saved

In [7]:
parser = argparse.ArgumentParser(description="BODA trainer", add_help=False)
group = parser.add_argument_group('Main args')
group.add_argument('--data_module', type=str, required=True, help='BODA data module to process dataset.')
group.add_argument('--model_module',type=str, required=True, help='BODA model module to fit dataset.')
group.add_argument('--graph_module',type=str, required=True, help='BODA graph module to define computations.')
group.add_argument('--artifact_path', type=str, default='/opt/ml/checkpoints/', help='Path where model artifacts are deposited.')
group.add_argument('--pretrained_weights', type=str, help='Pretrained weights.')



_StoreAction(option_strings=['--pretrained_weights'], dest='pretrained_weights', nargs=None, const=None, default=None, type=<class 'str'>, choices=None, help='Pretrained weights.', metavar=None)

In [8]:
known_args, leftover_args = parser.parse_known_args(
    cmd_str.rstrip().split()
)

In [9]:
known_args

Namespace(artifact_path='/opt/ml/output', data_module='BODA2_DataModule', graph_module='CNNBasicTraining', model_module='Basset', pretrained_weights=None)

## Extract first-order submodule args

Get submodule specific arguments.

In [10]:
Data  = getattr(boda.data,  known_args.data_module)
Model = getattr(boda.model, known_args.model_module)
Graph = getattr(boda.graph, known_args.graph_module)

parser = Data.add_data_specific_args(parser)
parser = Model.add_model_specific_args(parser)
parser = Graph.add_graph_specific_args(parser)

known_args, leftover_args = parser.parse_known_args(
    cmd_str.rstrip().split()
)


In [11]:
known_args

Namespace(TestSize_pct=5.0, ValSize_pct=5.0, activityColumns=['K562', 'HepG2', 'SKNSH'], artifact_path='/opt/ml/output', batchSize=32, conv1_channels=300, conv1_kernel_size=19, conv2_channels=200, conv2_kernel_size=11, conv3_channels=200, conv3_kernel_size=7, dataFile_path='../../data/BODA.MPRA.txt', data_module='BODA2_DataModule', dropout_p=0.3, graph_module='CNNBasicTraining', linear1_channels=1000, linear2_channels=1000, loss_criterion='MSELoss', model_module='Basset', n_outputs=3, numWorkers=1, optimizer='Adam', paddedSeqLen=600, pretrained_weights=None, scheduler=None, scheduler_interval='epoch', scheduler_monitor=None, sequenceColumn='nt.sequence', use_batch_norm=True, use_weight_norm=False)

## Extract second-order submodule args

Get another set of submodule specific arguments based preliminary choices. (i.e., optional arguments for optimizer of choice)

In [12]:
parser = Data.add_conditional_args(parser, known_args)
parser = Model.add_conditional_args(parser, known_args)
parser = Graph.add_conditional_args(parser, known_args)

parser = Trainer.add_argparse_args(parser)
parser.add_argument('--help', '-h', action='help')
args = parser.parse_args(
    cmd_str.rstrip().split()
)

args = boda.common.utils.organize_args(parser, args)



In [13]:
args

{'positional arguments': Namespace(),
 'optional arguments': Namespace(help=None),
 'Main args': Namespace(artifact_path='/opt/ml/output', data_module='BODA2_DataModule', graph_module='CNNBasicTraining', model_module='Basset', pretrained_weights=None),
 'Data Module args': Namespace(TestSize_pct=5.0, ValSize_pct=5.0, activityColumns=['K562', 'HepG2', 'SKNSH'], batchSize=32, dataFile_path='../../data/BODA.MPRA.txt', numWorkers=1, paddedSeqLen=600, sequenceColumn='nt.sequence'),
 'Model Module args': Namespace(conv1_channels=300, conv1_kernel_size=19, conv2_channels=200, conv2_kernel_size=11, conv3_channels=200, conv3_kernel_size=7, dropout_p=0.3, linear1_channels=1000, linear2_channels=1000, loss_criterion='MSELoss', n_outputs=3, use_batch_norm=True, use_weight_norm=False),
 'Graph Module args': Namespace(optimizer='Adam', scheduler=None, scheduler_interval='epoch', scheduler_monitor=None),
 'Optimizer args': Namespace(amsgrad=False, beta1=0.9, beta2=0.999, eps=1e-08, lr=0.001, weight_d

# Run training
Do line-by-line first to debug

In [14]:
data_module, model_module, graph_module, model, trainer, args = main(args)

GPU available: True, used: True
TPU available: False, using: 0 TPU cores


Padding sequences and converting to one-hot tensors...
10000/27719 sequences padded and tokenized...
20000/27719 sequences padded and tokenized...


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

   | Name      | Type          | Params
---------------------------------------------
0  | pad1      | ConstantPad1d | 0     
1  | conv1     | Conv1dNorm    | 23.7 K
2  | pad2      | ConstantPad1d | 0     
3  | conv2     | Conv1dNorm    | 660 K 
4  | pad3      | ConstantPad1d | 0     
5  | conv3     | Conv1dNorm    | 280 K 
6  | pad4      | ConstantPad1d | 0     
7  | maxpool_3 | MaxPool1d     | 0     
8  | maxpool_4 | MaxPool1d     | 0     
9  | linear1   | LinearNorm    | 2.6 M 
10 | linear2   | LinearNorm    | 1.0 M 
11 | output    | Linear        | 3.0 K 
12 | nonlin    | ReLU          | 0     
13 | dropout   | Dropout       | 0     
14 | criterion | MSELoss       | 0     
---------------------------------------------
4.6 M     Trainable params
0         Non-trainable params
4.6 M     Total params
18.296    Total estimated model params size (MB)


Found 4573903 parameters


Validation sanity check: |          | 0/? [00:00<?, ?it/s]



Training: |          | 0/? [00:00<?, ?it/s]

Validating: |          | 0/? [00:00<?, ?it/s]

Validating: |          | 0/? [00:00<?, ?it/s]

Validating: |          | 0/? [00:00<?, ?it/s]

Validating: |          | 0/? [00:00<?, ?it/s]

Validating: |          | 0/? [00:00<?, ?it/s]

In [21]:
_save_model(data_module, model_module, graph_module, 
            model, trainer, args)

## Import relevent submodules

In [None]:
data_module = getattr(boda.data, args['Main args'].data_module)
model_module= getattr(boda.model, args['Main args'].model_module)
graph_module= getattr(boda.graph, args['Main args'].graph_module)

In [None]:
data = data_module(**vars(data_module.process_args(args)))
model= model_module(**vars(model_module.process_args(args)))

In [None]:
model.__class__ = type(
    'BODA_module',
    (model_module,graph_module),
    vars(graph_module.process_args(args))
)


In [None]:
os.makedirs('/tmp/output/artifacts', exist_ok=True)
trainer = Trainer.from_argparse_args(args['optional arguments'])

In [None]:
model(torch.randn(2,4,600))

In [None]:
trainer.fit(model, data)

In [None]:
_save_model(data_module, model_module, graph_module, 
            model, trainer, args)


In [None]:
data_module = getattr(boda, args['Main args'].data_module)
model_module= getattr(boda, args['Main args'].model_module)
graph_module= getattr(boda, args['Main args'].graph_module)

data = data_module(**data_module.process_args(args))
model= model_module(**model_module.process_args(args))

model.__class__ = type(
    'BODA_module',
    (model_module,graph_module),
    graph_module.process_args(args)
)

os.makedirs('/tmp/output/artifacts', exist_ok=True)
trainer = Trainer.from_argparse_args(args['positional arguments'])
trainer.default_root_dir = '/tmp/output/artifacts'

trainer.fit(model, data)

_save_model(data_module, model_module, graph_module, 
            model, trainer, args)