## Load in data

In [None]:
import pandas as pd
import numpy as np

# Taken from https://gist.github.com/dcrankshaw/f851ea2fee582f544288d36ae97ef86d
def load_digits(digits_location, digits_filename):
    digits_path = digits_location + "/" + digits_filename
    print "Source file:", digits_path
    df = pd.read_csv(digits_path, sep=",", header=None)
    data = df.values
    print "Number of image files:", len(data)
    y = data[:,0]
    X = data[:,1:]
    return (X, y)

def normalize_digits(X):
    print("Normalizing data")
    mu = np.mean(X,0)
    sigma = np.var(X,0)
    Z = (X - mu) / np.array([np.sqrt(z) if z > 0 else 1. for z in sigma])
    return Z

Provide the location of the MNIST data. It can be downloaded here: https://www.dropbox.com/s/n3jqkdg5ukx7ku5/mnist.zip?dl=0

In [None]:
digits_location = "mnist_data" # Set this to path of the folder enclosing the .data files
train_data_fname = "train-mnist-dense-with-labels.data"
test_data_fname = "test-mnist-dense-with-labels.data"

Load in the training data

In [None]:
train_x, train_y = load_digits(digits_location, train_data_fname)
test_x, test_y = load_digits(digits_location, test_data_fname)

## Select subset of data to train on

In [None]:
import random
NUM_DATAPOINTS = 1000
n, d = train_x.shape

rand_range = list(range(n))
random.shuffle(rand_range)

indices = np.array(rand_range[0:NUM_DATAPOINTS])

train_x_short = train_x[indices, :]
train_y_short = train_y[indices]

## Pre-process data and train model

In [None]:
from sklearn import linear_model as lm
def train_sklearn_model(m, train_x, train_y):
    m.fit(train_x, train_y)
    return m
normalized_training_x_short = normalize_digits(train_x_short)
lr_model = train_sklearn_model(lm.LogisticRegression(), normalized_training_x_short, train_y_short)

## Define prediction function

In [None]:
def predict(data):
    normalized_data = normalize_digits(data)
    preds = lr_model.predict(normalized_data)
    return preds

In [None]:
def get_prediction_func_score(prediction_func, test_x, test_y):
    n, _ = test_x.shape
    correct = 0
    preds = prediction_func(test_x)
    difference = np.subtract(preds, test_y)
    for i in difference:
        if i == 0:
            correct += 1
    return float(correct)/n, preds

In [None]:
accuracy, preds = get_prediction_func_score(predict, test_x, test_y)
print("Prediction function has a {}% accuracy".format(accuracy * 100))

## Deploy the function to clipper

In [None]:
import sys
import os
sys.path.append(os.path.abspath('../../management/'))
import clipper_manager
# Change the username if necessary
user = ""
# Set the path to the SSH key
key = ""
# Set the SSH host
host = "localhost"
clipper = clipper_manager.Clipper(host, user, key)

clipper.start()

In [None]:
app_name = "mnist_predict_func_demo"
candidate_models = [
    {"model_name": "predict_func_model", "model_version": 1},
]

clipper.register_application(
    app_name,
    candidate_models,
    "ints",
    "EXP4",
    slo_micros=20000)

In [None]:
model_added = clipper.deploy_predict_function(
    "predict_func_model",
    1,
    predict,
    ["mnist", "predict_func"],
    ["noop"],
    "ints",
    num_containers=1
)

print("Model deploy successful? {success}".format(success=model_added))

In [None]:
clipper.get_app_info(app_name)

In [None]:
import json
import requests
def get_prediction(host, app, x):
    uid = 0
    url = "http://%s:1337/%s/predict" % (host, app)
    req_json = json.dumps({'uid': uid, 'input': list(x)})
    headers = {'Content-type': 'application/json'}
    r = requests.post(url, headers=headers, data=req_json)
    return r.text

In [None]:
test_datapoint = test_x[5020]
get_prediction(host, app_name, test_datapoint.astype(np.int64))

In [None]:
import seaborn as sns
import cifar_utils
# sns.set_style("whitegrid")
# sns.despine()
import matplotlib as mpl
%matplotlib notebook
cifar_utils.run_serving_workload(host, app_name, test_x, test_y)