### Hyperparameter tuning for Language Model

In [1]:
import nuclio

In [2]:
# nuclio: start-code

In [3]:
%nuclio config spec.image = "mlrun/ml-models-gpu"
%nuclio config kind = "job"

%nuclio: setting spec.image to 'mlrun/ml-models-gpu'
%nuclio: setting kind to 'job'


In [4]:
from os import path
import pandas as pd
import numpy as np
import random
import fastai
import json
from fastai.text import *
from fastai.callbacks import *
torch.cuda.set_device(0)

def get_min_grad(model):
    model.lr_find(num_it=200)
    model.recorder.plot(suggestion=True)
    return model.recorder.min_grad_lr

def train_lm_model(context):
    # Load params
    bs = int(context.get_param('bs'))
    drop_mult = float(context.get_param('drop_mult'))
    epochs = int(context.get_param('epochs'))
    num_samples = int(context.get_param('num_samples'))
    data_lm_path = context.get_param('data_lm_path')
    
    # Load data
    version = f"sample_{num_samples}"
    data_lm = load_data("", data_lm_path, bs=bs)

    # Define metrics
    metrics = [accuracy]

    # Define model callbacks
    early_stop = partial(EarlyStoppingCallback,
                         monitor='accuracy',
                         mode="max",
                         min_delta=0.01,
                         patience=5)
    graph = ShowGraph
    callback_fns = [early_stop]

    # Define language model
    learn_lm = language_model_learner(data_lm,
                                      AWD_LSTM,
                                      drop_mult=drop_mult,
                                      metrics=metrics,
                                      callback_fns=callback_fns).to_fp16()
    
    # Define training callback
    model_name = f"learn_lm_{version}_bs_{bs}_dropmult_{drop_mult}"
    model_enc_name = f"learn_lm_enc_{version}_bs_{bs}_dropmult_{drop_mult}"
    save_best = SaveModelCallback(learn_lm,
                                  every='improvement',
                                  monitor="accuracy",
                                  mode="max",
                                  name=model_name)
    callbacks = [save_best]

    # Train
    min_grad = get_min_grad(learn_lm)
    learn_lm.fit_one_cycle(1, min_grad, callbacks=callbacks)
    learn_lm.unfreeze()

    min_grad = get_min_grad(learn_lm)
    learn_lm.fit_one_cycle(epochs, slice(min_grad,min_grad*100), moms=(0.8,0.7), callbacks=callbacks)
    
    # Get accuracy
    best_acc = max([i[0] for i in learn_lm.recorder.metrics]).item()

    params = {"bs": bs, "drop_mult" : drop_mult, "epochs" : epochs}
    
    context.log_result('accuracy', best_acc)
    context.log_model("best_params", body=json.dumps(params, indent=2).encode('utf-8'),
                      artifact_path=context.artifact_subpath('models'),
                      model_file="params.json")

In [5]:
# nuclio: end-code

In [6]:
from mlrun import mlconf
import os
from os import path

# Target location for storing pipeline artifacts
artifact_path = path.abspath('../jobs')
# MLRun DB path or API service URL
mlconf.dbpath = mlconf.dbpath or 'http://mlrun-api:8080'

print(f'Artifacts path: {artifact_path}\nMLRun DB path: {mlconf.dbpath}')

Artifacts path: /User/nlp/components/jobs
MLRun DB path: http://mlrun-api:8080


In [7]:
from mlrun import code_to_function 
# create job function object from notebook code
fn = code_to_function("hyper_param_lm_search")

# add metadata (for templates and reuse)
fn.spec.default_handler = "train_lm_model"
fn.spec.description = "hyperparameter tuning for language model"
fn.metadata.categories = ["hyperparameter", "ml"]
fn.metadata.labels = {"author": "nschenone"}
fn.spec.resources["limits"] = {'nvidia.com/gpu' : 1}
fn.export("../yaml/hyper_lm.yaml")

> 2020-08-13 19:01:31,981 [info] function spec saved to path: ../yaml/hyper_lm.yaml


<mlrun.runtimes.kubejob.KubejobRuntime at 0x7f8a8cd9c160>