In [1]:
import os
import sys
import time
import yaml
import shutil
import argparse
import tarfile
import random
import tempfile
import subprocess

import torch
import pytorch_lightning as ptl
from pytorch_lightning import Trainer

import boda


In [2]:
print(torch.__version__)
print(ptl.__version__)

1.7.1
1.3.0


In [3]:
def main(args):
    data_module = getattr(boda.data, args['Main args'].data_module)
    model_module= getattr(boda.model, args['Main args'].model_module)
    graph_module= getattr(boda.graph, args['Main args'].graph_module)

    data = data_module(**vars(data_module.process_args(args)))
    model= model_module(**vars(model_module.process_args(args)))

    model.__class__ = type(
        'BODA_module',
        (model_module,graph_module),
        vars(graph_module.process_args(args))
    )

    os.makedirs('/tmp/output/artifacts', exist_ok=True)
    trainer = Trainer.from_argparse_args(args['pl.Trainer'])
    
    trainer.fit(model, data)
    
    #_save_model(data_module, model_module, graph_module, 
    #            model, trainer, args)
    
    return data_module, model_module, graph_module, model, trainer, args

In [4]:
def _save_model(data_module, model_module, graph_module, 
                model, trainer, args):
    local_dir = args['pl.Trainer'].default_root_dir
    save_dict = {
        'data_module'  : data_module.__name__,
        'data_hparams' : data_module.process_args(args),
        'model_module' : model_module.__name__,
        'model_hparams': model_module.process_args(args),
        'graph_module' : graph_module.__name__,
        'graph_hparams': graph_module.process_args(args),
        'model_state_dict': model.state_dict(),
        'timestamp'    : time.strftime("%Y%m%d_%H%M%S"),
        'random_tag'   : random.randint(100000,999999)
    }
    torch.save(save_dict, os.path.join(local_dir,'torch_checkpoint.pt'))
    
    filename=f'model_artifacts__{save_dict["timestamp"]}__{save_dict["random_tag"]}.tar.gz'
    with tempfile.TemporaryDirectory() as tmpdirname:
        tmpdirname = '/tmp/output'
        with tarfile.open(os.path.join(tmpdirname,filename), 'w:gz') as tar:
            tar.add(local_dir,arcname='artifacts')

        if 'gs://' in args['Main args'].artifact_path:
            clound_target = os.path.join(args['Main args'].artifact_path,filename)
            subprocess.check_call(
                ['gsutil', 'cp', os.path.join(tmpdirname,filename), clound_target]
            )
        else:
            os.makedirs(args['Main args'].artifact_path, exist_ok=True)
            shutil.copy(os.path.join(tmpdirname,filename), args['Main args'].artifact_path)


In [5]:
def model_fn(model_dir):
    checkpoint = torch.load(os.path.join(model_dir,'torch_checkpoint.pt'))
    model_module = getattr(boda, checkpoint['model_module'])
    model        = model_module(**checkpoint['model_hparams'])
    model.load_state_dict(checkpoint['model_state_dict'])
    print(f'Loaded model from {checkpoint["timestamp"]}')
    return model


# Process runtime arguments

## Command line args to use for testing

In [6]:
cmd_str = '--data_module BODA2_DataModule ' +\
            '--datafile_path gs://syrgoth/data/BODA.MPRA.txt ' +\
            '--valid_pct 5 --test_pct 5 ' +\
            '--batch_size  32 --padded_seq_len 600 --num_workers 1 ' +\
          '--model_module Basset ' +\
            '--n_outputs  3 --loss_criterion MSELoss ' +\
          '--graph_module CNNBasicTraining ' +\
          '--gpus 1 --min_epochs 5 --max_epochs 5 --default_root_dir /tmp/output/artifacts ' +\
          '--artifact_path gs://haddath/sgosai/deposit_test '

'python /home/ubuntu/boda2/src/main.py ' + cmd_str

'python /home/ubuntu/boda2/src/main.py --data_module BODA2_DataModule --datafile_path gs://syrgoth/data/BODA.MPRA.txt --valid_pct  5 --test_pct  5 --batch_size  32 --padded_seq_len 600 --num_workers 1 --model_module Basset --n_outputs  3 --loss_criterion MSELoss --graph_module CNNBasicTraining --gpus 1 --min_epochs 5 --max_epochs 5 --default_root_dir /tmp/output/artifacts --artifact_path gs://haddath/sgosai/deposit_test '

## Set base args for script

Basic arguments to identify which submodules are used and where data will be saved

In [7]:
parser = argparse.ArgumentParser(description="BODA trainer", add_help=False)
group = parser.add_argument_group('Main args')
group.add_argument('--data_module', type=str, required=True, help='BODA data module to process dataset.')
group.add_argument('--model_module',type=str, required=True, help='BODA model module to fit dataset.')
group.add_argument('--graph_module',type=str, required=True, help='BODA graph module to define computations.')
group.add_argument('--artifact_path', type=str, default='/opt/ml/checkpoints/', help='Path where model artifacts are deposited.')
group.add_argument('--pretrained_weights', type=str, help='Pretrained weights.')



_StoreAction(option_strings=['--pretrained_weights'], dest='pretrained_weights', nargs=None, const=None, default=None, type=<class 'str'>, choices=None, help='Pretrained weights.', metavar=None)

In [8]:
known_args, leftover_args = parser.parse_known_args(
    cmd_str.rstrip().split()
)

In [9]:
known_args

Namespace(artifact_path='gs://haddath/sgosai/deposit_test', data_module='BODA2_DataModule', graph_module='CNNBasicTraining', model_module='Basset', pretrained_weights=None)

## Extract first-order submodule args

Get submodule specific arguments.

In [10]:
Data  = getattr(boda.data,  known_args.data_module)
Model = getattr(boda.model, known_args.model_module)
Graph = getattr(boda.graph, known_args.graph_module)

parser = Data.add_data_specific_args(parser)
parser = Model.add_model_specific_args(parser)
parser = Graph.add_graph_specific_args(parser)

known_args, leftover_args = parser.parse_known_args(
    cmd_str.rstrip().split()
)


In [11]:
known_args

Namespace(activity_columns=['K562', 'HepG2.neon', 'SKNSH'], artifact_path='gs://haddath/sgosai/deposit_test', batch_size=32, conv1_channels=300, conv1_kernel_size=19, conv2_channels=200, conv2_kernel_size=11, conv3_channels=200, conv3_kernel_size=7, data_module='BODA2_DataModule', datafile_path='gs://syrgoth/data/BODA.MPRA.txt', dropout_p=0.3, graph_module='CNNBasicTraining', linear1_channels=1000, linear2_channels=1000, loss_criterion='MSELoss', model_module='Basset', n_outputs=3, num_workers=1, optimizer='Adam', padded_seq_len=600, pretrained_weights=None, scheduler=None, scheduler_interval='epoch', scheduler_monitor=None, sequence_column='nt.sequence', test_pct=5.0, use_batch_norm=True, use_weight_norm=False, valid_pct=5.0)

## Extract second-order submodule args

Get another set of submodule specific arguments based preliminary choices. (i.e., optional arguments for optimizer of choice)

In [12]:
parser = Data.add_conditional_args(parser, known_args)
parser = Model.add_conditional_args(parser, known_args)
parser = Graph.add_conditional_args(parser, known_args)

parser = Trainer.add_argparse_args(parser)
parser.add_argument('--help', '-h', action='help')
args = parser.parse_args(
    cmd_str.rstrip().split()
)

args = boda.common.utils.organize_args(parser, args)



In [13]:
args

{'positional arguments': Namespace(),
 'optional arguments': Namespace(help=None),
 'Main args': Namespace(artifact_path='gs://haddath/sgosai/deposit_test', data_module='BODA2_DataModule', graph_module='CNNBasicTraining', model_module='Basset', pretrained_weights=None),
 'Data Module args': Namespace(activity_columns=['K562', 'HepG2.neon', 'SKNSH'], batch_size=32, datafile_path='gs://syrgoth/data/BODA.MPRA.txt', num_workers=1, padded_seq_len=600, sequence_column='nt.sequence', test_pct=5.0, valid_pct=5.0),
 'Model Module args': Namespace(conv1_channels=300, conv1_kernel_size=19, conv2_channels=200, conv2_kernel_size=11, conv3_channels=200, conv3_kernel_size=7, dropout_p=0.3, linear1_channels=1000, linear2_channels=1000, loss_criterion='MSELoss', n_outputs=3, use_batch_norm=True, use_weight_norm=False),
 'Graph Module args': Namespace(optimizer='Adam', scheduler=None, scheduler_interval='epoch', scheduler_monitor=None),
 'Optimizer args': Namespace(amsgrad=False, beta1=0.9, beta2=0.999,

# Run training
use modified `main` runner.

In [14]:
data_module, model_module, graph_module, model, trainer, args = main(args)

GPU available: True, used: True
TPU available: False, using: 0 TPU cores


Padding sequences and converting to one-hot tensors...
10000/27719 sequences padded and tokenized...
20000/27719 sequences padded and tokenized...


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

   | Name      | Type          | Params
---------------------------------------------
0  | pad1      | ConstantPad1d | 0     
1  | conv1     | Conv1dNorm    | 23.7 K
2  | pad2      | ConstantPad1d | 0     
3  | conv2     | Conv1dNorm    | 660 K 
4  | pad3      | ConstantPad1d | 0     
5  | conv3     | Conv1dNorm    | 280 K 
6  | pad4      | ConstantPad1d | 0     
7  | maxpool_3 | MaxPool1d     | 0     
8  | maxpool_4 | MaxPool1d     | 0     
9  | linear1   | LinearNorm    | 2.6 M 
10 | linear2   | LinearNorm    | 1.0 M 
11 | output    | Linear        | 3.0 K 
12 | nonlin    | ReLU          | 0     
13 | dropout   | Dropout       | 0     
14 | criterion | MSELoss       | 0     
---------------------------------------------
4.6 M     Trainable params
0         Non-trainable params
4.6 M     Total params
18.296    Total estimated model params size (MB)


Found 4573903 parameters


Validation sanity check: 0it [00:00, ?it/s]




------------------------------------------------------------------------------------------------------------------------------------
| arithmetic_mean_loss: 3.16676 | harmonic_mean_loss: 2.87380 | prediction_mean_pearson: -0.04345 | entropy_mean_pearson: -0.03769 |
------------------------------------------------------------------------------------------------------------------------------------





Training: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]


----------------------------------------------------------------------------------------------------------------------------------
| arithmetic_mean_loss: 1.30195 | harmonic_mean_loss: 1.24278 | prediction_mean_pearson: 0.37595 | entropy_mean_pearson: 0.14144 |
----------------------------------------------------------------------------------------------------------------------------------



Validating: 0it [00:00, ?it/s]


----------------------------------------------------------------------------------------------------------------------------------
| arithmetic_mean_loss: 1.21013 | harmonic_mean_loss: 1.14407 | prediction_mean_pearson: 0.34851 | entropy_mean_pearson: 0.16916 |
----------------------------------------------------------------------------------------------------------------------------------



Validating: 0it [00:00, ?it/s]


----------------------------------------------------------------------------------------------------------------------------------
| arithmetic_mean_loss: 1.17637 | harmonic_mean_loss: 1.11274 | prediction_mean_pearson: 0.44279 | entropy_mean_pearson: 0.21071 |
----------------------------------------------------------------------------------------------------------------------------------



Validating: 0it [00:00, ?it/s]


----------------------------------------------------------------------------------------------------------------------------------
| arithmetic_mean_loss: 1.54664 | harmonic_mean_loss: 1.44587 | prediction_mean_pearson: 0.42229 | entropy_mean_pearson: 0.20462 |
----------------------------------------------------------------------------------------------------------------------------------



Validating: 0it [00:00, ?it/s]


----------------------------------------------------------------------------------------------------------------------------------
| arithmetic_mean_loss: 9.84945 | harmonic_mean_loss: 9.02783 | prediction_mean_pearson: 0.37639 | entropy_mean_pearson: 0.17420 |
----------------------------------------------------------------------------------------------------------------------------------



In [15]:
_save_model(data_module, model_module, graph_module, 
            model, trainer, args)