# Use a custom model quickly

Reuse the data(loader) from the "ML pipe"

Import the libraries needed

In [7]:
import sys
import os
from loguru import logger

from src.run_training import parse_args_to_dict
from src.training.experiment import define_experiment_data
from src.utils.config_utils import import_config

If you run this Notebook on Colab, you need to install the Virtual Environment with Poetry yourself (what I understood):

In [None]:
running_in_colab = 'google.colab' in str(get_ipython())
if running_in_colab:
    logger.info('You are running this on COLAB so installing the environment here')
    os.chdir("/content")    
    !git clone https://github.com/petteriTeikari/minivess_mlops.git
    !pip install poetry
    os.chdir("/content/minivess_mlops")
    !poetry config virtualenvs.in-project true
    !poetry install
    !poetry shell
else:
    logger.info('Assuming that you are runnign this from IDE, '
                'or some other environment where you have your Jupyter kernel created from Poetry files')

Import helper subfunction(s)

In [8]:
def get_dataloaders(experim_dataloaders: dict):
    # Get the "validation" and "train" dataloaders from the dictionary
    fold_name = 'fold0'
    split_names = list(experim_dataloaders[fold_name].keys())
    fold_key = experim_dataloaders.get(fold_name)
    if fold_key is not None:
        try:
            train = experim_dataloaders[fold_name]['TRAIN']
            val = experim_dataloaders[fold_name]['VAL']['MINIVESS']
        except Exception as e:
            raise IOError('Could not get the dataloaders from the dictionary, error = {}'.format(e))
    else:
        raise IOError('Fold name = "{}" not found in the dataloaders dictionary'.format(fold_name))
    return train, val

Input arguments for the training (you can add all the input arguments supported by `run_training.py` here

In [None]:
input_args = ['-c', 'tutorials/train_demo']

# Fake these as coming from the command line to match the main code (run_training.py)
sys.argv = ['notebook_run']  # Jupyter has all the extra crap, so replace that with this
for sysargv in input_args:
    sys.argv.append(sysargv)
args = parse_args_to_dict()

Create the config with Hydra from the .yaml file(s)

In [None]:
config, exp_run = import_config(args=args, task_cfg_name=args['task_config_file'])

Import the dataloaders (now the data augmentations are here as well as data transformations)

In [None]:
_, _, experim_dataloaders, exp_run = (
        define_experiment_data(config=config,
                               exp_run=exp_run))

# Get the "validation" and "train" dataloaders from the dictionary
train, val = get_dataloaders(experim_dataloaders)

Now you are ready to train your new model that you just wanna quickly test without
wanting to have a battle with the config .YAML files
Add maybe some fastai demo with MLflow autologging:
https://github.com/mlflow/mlflow/blob/master/examples/fastai/train.py

In [None]:
# Iterate the dataloaders for demo
no_of_epochs = 3
logger.info('Training for {} epochs'.format(no_of_epochs))
for epoch in range(no_of_epochs):
    
    logger.info('Epoch {}/{}'.format(epoch, no_of_epochs - 1))

    # Train
    logger.info('train with {} batches'.format(len(train))) 
    for i, batch in enumerate(train):
        images, mask = batch['image'], batch['label']

    # Validation
    logger.info('validate with {} batches'.format(len(train))) 
    for j, batch in enumerate(val):
        images, mask = batch['image'], batch['label']

logger.info('Training done!')