### Hyperparameter tuning for Classification Model

In [1]:
import nuclio

In [2]:
# nuclio: start-code

In [3]:
%nuclio config spec.image = "mlrun/ml-models-gpu"
%nuclio config kind = "job"

%nuclio: setting spec.image to 'mlrun/ml-models-gpu'
%nuclio: setting kind to 'job'


In [4]:
from os import path
import pandas as pd
import numpy as np
import random
import fastai
import json
from fastai.text import *
from fastai.callbacks import *
torch.cuda.set_device(0)

def get_min_grad(model):
    model.lr_find(num_it=200)
    model.recorder.plot(suggestion=True)
    return model.recorder.min_grad_lr

def train_clas_model(context):
    # Load params
    bs = int(context.get_param('bs'))
    thresh = float(context.get_param('thresh'))
    drop_mult = float(context.get_param('drop_mult'))
    epochs = int(context.get_param('epochs'))
    num_samples = int(context.get_param('num_samples'))
    encodings = context.get_param('encodings')
    data_clas_path = context.get_param('data_clas_path')
    
    # Load data
    version = f"sample_{num_samples}.pkl"
    data_clas = load_data("", data_clas_path, bs=bs, **{"num_workers" : 0})

    # Define metrics
    acc = partial(accuracy_thresh, thresh=thresh)
    f_score = partial(fbeta, thresh=thresh)
    metrics = [acc, f_score]

    # Define model callbacks
    early_stop = partial(EarlyStoppingCallback,
                         monitor='fbeta',
                         mode="max",
                         min_delta=0.001,
                         patience=5)

    callback_fns = [early_stop]

    # Define classification model
    learn_clas = text_classifier_learner(data_clas,
                                         AWD_LSTM,
                                         drop_mult=drop_mult,
                                         metrics=metrics,
                                         callback_fns=callback_fns).to_fp16()
    learn_clas.load_encoder(encodings)
    learn_clas.freeze()
    
    # Define training callback
    model_name = f"learn_clas_{version}_bs_{bs}_thresh_{thresh}_dropmult_{drop_mult}"
    save_best = SaveModelCallback(learn_clas,
                                  every='improvement',
                                  monitor="fbeta",
                                  mode="max",
                                  name=model_name)
    callbacks = [save_best]

    # Train
    min_grad = get_min_grad(learn_clas)
    learn_clas.fit_one_cycle(1, min_grad, callbacks=callbacks)
    learn_clas.unfreeze()

    min_grad = get_min_grad(learn_clas)
    learn_clas.fit_one_cycle(epochs, slice(min_grad,min_grad*100), moms=(0.8,0.7), callbacks=callbacks)
    
    # Get f score
    best_fbeta = max([i[1] for i in learn_clas.recorder.metrics]).item()
    
    params = {"bs": bs, "thresh" : thresh, "drop_mult" : drop_mult, "epochs" : epochs}
    
    context.log_result('fbeta', best_fbeta)
    context.log_model("best_params", body=json.dumps(params, indent=2).encode('utf-8'),
                      artifact_path=context.artifact_subpath('models'),
                      model_file="params.json")

In [5]:
# nuclio: end-code

In [6]:
from mlrun import mlconf
import os
from os import path

# Target location for storing pipeline artifacts
artifact_path = path.abspath('../jobs')
# MLRun DB path or API service URL
mlconf.dbpath = mlconf.dbpath or 'http://mlrun-api:8080'

print(f'Artifacts path: {artifact_path}\nMLRun DB path: {mlconf.dbpath}')

Artifacts path: /User/nlp/components/jobs
MLRun DB path: http://mlrun-api:8080


In [7]:
from mlrun import code_to_function 
# create job function object from notebook code
fn = code_to_function("hyper_param_clas_search")

# add metadata (for templates and reuse)
fn.spec.default_handler = "train_clas_model"
fn.spec.description = "hyperparameter tuning for classification model"
fn.metadata.categories = ["hyperparameter", "ml"]
fn.metadata.labels = {"author": "nschenone"}
fn.spec.resources["limits"] = {'nvidia.com/gpu' : 1}
fn.export("../yaml/hyper_clas.yaml")

> 2020-08-13 19:01:00,305 [info] function spec saved to path: ../yaml/hyper_clas.yaml


<mlrun.runtimes.kubejob.KubejobRuntime at 0x7f33ba3d4048>