In [None]:
import time
import subprocess
import sys
import threading
from queue import Queue, Empty

from functools import partial

import mlflow
import mlflow.sklearn

from cuml.metrics.accuracy import accuracy_score as cuml_acc
from cuml.preprocessing.model_selection import train_test_split as cuml_split
from cuml.ensemble import RandomForestClassifier as cu_RF

from sklearn.model_selection import train_test_split as sk_split
from sklearn.metrics import accuracy_score as sk_acc
from sklearn.ensemble import RandomForestClassifier as sk_RF
import pandas as pd

from hyperopt import fmin, tpe, hp, Trials, STATUS_OK

### Define some environment helpers

In [None]:
import os

experiment = "rapids_mlflow"
dbvars = {
    "MLFLOW_EXPERIMENT_NAME": f"/Users/[USER_NAME]/{experiment}",
    "MLFLOW_TRACKING_URI": "databricks",
    "DATABRICKS_HOST": "https://[ACCOUNT_ID].cloud.databricks.com",
    "DATABRICKS_TOKEN": "[ACCESS TOKEN]"
}

def set_databricks_env():
    for k, v in dbvars.items():
        os.environ[k] = v
        
    mlflow.set_experiment(f"/Users/[USER_NAME]{experiment}")
     
set_databricks_env()

### Define a data loading helper

In [None]:
def load_data(fpath, compute_type):
    data_type = 'ORC' if fpath.lower().endswith('.orc') else 'parquet'

    if 'CPU' in compute_type:
        try:
            import pandas
            import pyarrow
            from pyarrow import orc
        except Exception as error:
            print(f'! CPU import error : {error}')
    elif 'GPU' in compute_type:
        try:
            import cudf
        except Exception as error:
            print(f'! GPU import error : {error}')

    if 'CPU' in compute_type:
        if ('ORC' == data_type):
            if (fpath.startswith('/') or fpath.startswith('file:')):
                with open(fpath, mode='rb') as reader:
                    df = pyarrow.orc.ORCFile(reader).read().to_pandas()
            else:
                raise NotImplemented("CPU Remote read not implemented")
        else:
            if (fpath.startswith('/') or fpath.startswith('file:')):
                df = pd.read_parquet(fpath)
            else:
                raise NotImplemented("CPU Remote read not implemented")


    elif ('GPU' in compute_type):
        if ('ORC' == data_type):
            df = cudf.read_orc(fpath)
        else:
            df = cudf.read_parquet(fpath)

    return df

### Define our training routine, and Hyperopt entry points.

In [None]:
def _train(params, fpath, mode='GPU', log_to_mlflow=None, hyperopt=False):
    max_depth, max_features, n_estimators = params
    max_depth, max_features, n_estimators = int(max_depth), float(max_features), int(n_estimators)

    df = load_data(fpath, compute_type=mode)

    for col in df.columns:
        if (df[col].dtype == 'int32'):
            df[col] = df[col].astype('float32')
        
    X = df.drop(["ArrDelayBinary"], axis=1)
    y = df["ArrDelayBinary"].astype('int32')

    if (mode.upper() == "GPU"):
        X_train, X_test, y_train, y_test = cuml_split(X, y, test_size=0.2)
        mod = cu_RF(max_depth=max_depth, max_features=max_features, n_estimators=n_estimators)
        acc_scorer = cuml_acc
    elif (mode.upper() == "CPU"):
        X_train, X_test, y_train, y_test = sk_split(X, y, test_size=0.2)
        mod = sk_RF(max_depth=max_depth, max_features=max_features, n_estimators=n_estimators)
        acc_scorer = sk_acc
    else:
        raise RuntimeError("Unknown option. Choose between [CPU, GPU].")

    mod.fit(X_train, y_train)
    preds = mod.predict(X_test)
    acc = acc_scorer(y_test, preds)

    if (log_to_mlflow):
        mlparams = {"max_depth": str(max_depth),
                    "max_features": str(max_features),
                    "n_estimators": str(n_estimators),
                    "mode": str(mode)}
        mlflow.log_params(mlparams)

        mlmetrics = {"accuracy": acc}
        mlflow.log_metrics(mlmetrics)

    if (not hyperopt):
        return mod

    return {'loss': acc, 'status': STATUS_OK}


def train(params, fpath, mode='GPU', log_to_mlflow=None, hyperopt=False):
    """
    An example train method that computes the square of the input.
    This method will be passed to `hyperopt.fmin()`.

    :param params: hyperparameters. Its structure is consistent with how search space is defined. See below.
    :return: dict with fields 'loss' (scalar loss) and 'status' (success/failure status of run)
    """
    if (log_to_mlflow):
        with mlflow.start_run(nested=True):
            return _train(params, fpath, mode, log_to_mlflow, hyperopt)
    else:
        return _train(params, fpath, mode, log_to_mlflow, hyperopt)

### Implement our MLFlow training loop, and save our best model to the tracking server.

In [None]:
algorithm = 'tpe'
conda_env = 'https://[PATH_TO_CONDA_CONFIG]/conda.yaml'
fpath     = 'https://[PATH_TO_AIRLINE_DATASET]/airline_100000.orc'
mode      = 'GPU'

search_space = [
    hp.uniform('max_depth', 5, 20),
    hp.uniform('max_features', 0., 1.0),
    hp.uniform('n_estimators', 150, 1000)
]

trials = Trials()
algorithm = tpe.suggest if algorithm == 'tpe' else None
fn = partial(train, fpath=fpath, mode=mode, log_to_mlflow=True, hyperopt=True)
experid = 0

with mlflow.start_run():
    mlflow.set_tag("mlflow.runName", "RAPIDS-Hyperopt-Databricks")
    argmin = fmin(fn=fn,
                  space=search_space,
                  algo=algorithm,
                  max_evals=2,
                  trials=trials)

    print("===========")
    fn = partial(train, fpath=fpath, mode=mode, log_to_mlflow=False, hyperopt=False)
    final_model = fn(tuple(argmin.values()))

    conda_data = ""
    if (conda_env.startswith("http")):
        import requests

        resp = requests.get(conda_env)
        conda_data = str(resp.text)
    else:
        with open(conda_env, 'r') as reader:
            conda_data = reader.read()

    with open("conda.yaml", 'w') as writer:
        writer.write(conda_data)
    
    mlflow.sklearn.log_model(final_model,
                             artifact_path="rapids_mlflow_test",
                             registered_model_name="rapids_mlflow_test",
                             conda_env='conda.yaml')

    client = mlflow.tracking.MlflowClient()
    latest_model = dict(client.search_model_versions("name='rapids_mlflow_test'")[0])
    latest_model_source = latest_model['source']
    
    retries = 0
    while(True):
        if (retries > 1):
            raise RuntimeError("Failed to update registered model status.")
        try:
            # We need to wait for the model to be registered
            time.sleep(10)
            client.transition_model_version_stage(
                name="rapids_mlflow_test",
                version=latest_model['version'],
                stage="Production")
            print(f"Successfully registered model version {latest_model['version']}, as production.")
            break
        except Exception as e:
            print(e, flush=True)
            retries += 1

### Helper to track our server output.

In [None]:
def queue_descriptor_output(out, queue):
    for line in iter(out.readline, b''):
        queue.put(line)
    out.close()

def follow_subprocess(cmd, timeout=1000, line_timeout=60.00):
    p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
    q = Queue()
    t = threading.Thread(target=queue_descriptor_output, args=(p.stdout, q))
    t.daemon = True
    t.start()

    elapsed = 0
    line_elapsed = 0
    last_line_time = time.perf_counter()
    while (p.poll() is None and elapsed < timeout and line_elapsed < line_timeout):
        try:
            time.sleep(2)
            elapsed += 2
            while (True):
                line = q.get(timeout=0.1)
                line_elapsed = 0
                last_line_time = time.perf_counter()
                sys.stdout.write(line.decode())

        except Empty:
            line_elapsed = (time.perf_counter() - last_line_time)
        except KeyboardInterrupt:
            sys.stderr.write("\nCaught ctrl+c, killing subprocess ({})\n".format(' '.join(cmd)))
            p.kill()
            raise

    try:
        p.kill()
    except:
        pass

    t.join(2)

    ## Drain any remaining text
    try:
        while (True):
            line = q.get(timeout=0.1)
            sys.stdout.write(line)

    except Empty:
        pass

### Begin serving our trained model using MLFlow
### Note: the serving thread will continue to run in this cell. Select the cell and click 'interrupt the kernel' to stop it.

In [None]:
port = 55754
host = 'localhost'
command = f"mlflow models serve -m {latest_model_source} -p {port} -h {host}".split()
kwargs = { "cmd": command, "timeout":float('Inf'), "line_timeout": float('Inf') }

threading.Thread(target=follow_subprocess, kwargs=kwargs).start()

## Wait for service to come up.
time.sleep(10)

### Make requests against the deployed model

In [None]:
import json
import requests

headers = {
    "Content-Type": "application/json",
    "format": "pandas-split"
}

data = { 
    "columns": ["Year", "Month", "DayofMonth", "DayofWeek", "CRSDepTime", "CRSArrTime", "UniqueCarrier", "FlightNum", "ActualElapsedTime", "Origin", "Dest", "Distance", "Diverted"],
    "data": [[1987, 10, 1, 4, 1, 556, 0, 190, 247, 202, 162, 1846, 0]]
}

resp = requests.post(url=f"http://{host}:{port}/invocations", data=json.dumps(data), headers=headers)
print(f'Classification: {"ON-Time" if resp.text == "[0.0]" else "LATE"}')