# Train LSTM Model by MLFlow

In [None]:
from argparse import ArgumentParser
import os
import re

from loguru import logger
import mlflow
from mlflow.tracking import MlflowClient
import numpy as np
import pandas as pd
import pytorch_lightning as pl
import seaborn as sns
import torch
from torch import nn
import torch.nn.functional as F
from torch.utils.data import TensorDataset, DataLoader

from models import HarLSTM, ModelUtils
from pl_data import HarDataModule
from utils import FeatUtils

%load_ext autoreload
%autoreload 2

sns.set_theme(style="dark")

# 1. Prepare features

In [None]:
data_dir_path = "./data/har_dataset"
batch_size = 16
data_module = HarDataModule(data_dir_path, 
                            batch_size=batch_size,
                           normalize="std")

# 2. Define Network parameters

In [None]:
lstr_args = ['--max_epochs','10',
            '--gpus', '1',
             '--batch_size', '16',
             '--stochastic_weight_avg', 'True',
             '--gradient_clip_val', '5',
             '--gradient_clip_algorithm', 'norm',
            # DEBUGGING https://pytorch-lightning.readthedocs.io/en/latest/common/debugging.html
            # don't forget to turn it off after debugging, slows things down a lot.
            # '--profiler', 'pytorch', # issue no.3
            # '--log_gpu_memory', 'all',
            # '--limit_train_batches', '3',
            # '--limit_predict_batches', '3',
            # '--overfit_batches', '3',
            # Inspect gradient norms
            # about 10% performance hit, let's do it always anyway.
            # '--track_grad_norm', '2',
             ]

parser = ArgumentParser()
parser.add_argument('--batch_size', default=16, type=int)
parser = pl.Trainer.add_argparse_args(parser)
args = parser.parse_args(lstr_args)

In [None]:
# check if GPU is available
use_gpu = torch.cuda.is_available()
if(use_gpu):
    print('Training on GPU!')
else: 
    print('No GPU available, training on CPU; consider making n_epochs very small.')

In [None]:
# Instantiate the model w/ hyperparams
input_size = 9
output_size = 6
n_hidden = 128
n_layers = 2

# training params
epochs = 50
lr=0.0001

In [None]:
net = HarLSTM(input_size, output_size, n_hidden=n_hidden, n_layers=n_layers)
print("Model information:")
print(net)
trainer = pl.Trainer.from_argparse_args(args)

# 3. Train the model by MLFlow

In [None]:
# Define helper functions
def log_model_params_step(net):
    mlflow.log_param("model_type", type(net))
    mlflow.log_param("n_layers", net.n_layers)
    mlflow.log_param("n_hidden", net.n_hidden)
    mlflow.log_param("drop_prob", net.drop_prob)
    mlflow.log_param("input_size", net.input_size)

def save_scaler_step(scaler, scaler_path="scaler.pkl"):
    FeatUtils.save_feat_scaler(scaler, scaler_path)
    mlflow.log_artifact(scaler_path, artifact_path="model")
    os.remove(scaler_path)
    
def test_model_step(net, data_module, batch_size, use_gpu=True):
    test_loader = data_module.test_dataloader()
    _, test_acc = ModelUtils.test_net(net, net.criterion, test_loader, batch_size, use_gpu=use_gpu)
    
    mlflow.log_metric("acc", test_acc)

In [None]:
experiment_name = "HAR_LSTM_Experiment"
mlflow_uri = "http://mlflow_tracker:5000"
mlflow.set_tracking_uri(mlflow_uri)

mlflow.set_experiment(experiment_name)

tracking_uri = mlflow.get_tracking_uri()
print("Current tracking uri: {}".format(tracking_uri))

In [None]:
mlflow_run_name = "HAR_LSTM_Training"
mlflow.pytorch.autolog()

# Train the model
with mlflow.start_run(run_name=mlflow_run_name) as run:
    artifact_uri = mlflow.get_artifact_uri()
    print("Current artifact uri: {}".format(artifact_uri))
    
    log_model_params_step(net)
    
    mlflow.log_param("batch_size", batch_size)
    mlflow.log_param("train_val_ratio", data_module.train_val_ratio)
    mlflow.log_param("scaler", type(data_module.scaler) if data_module.scaler is not None else None)
    trainer.fit(net, datamodule=data_module)
    trainer.test(ckpt_path="best", datamodule=data_module)
    
    test_model_step(net, data_module, batch_size, use_gpu=use_gpu)

# 4. Test inference by a loaded modelnorm_method

In [None]:
loaded_net = HarLSTM(input_size, output_size, n_hidden=n_hidden, n_layers=n_layers)
ModelUtils.load_model_weight(loaded_net, model_path)

In [None]:
test_loss, test_acc = ModelUtils.test_net(loaded_net, criterion, test_loader, batch_size, use_gpu=use_gpu)