In [70]:
import os
os.environ['VERTA_DEV_KEY'] = '3f679a2a-140f-49a2-93f7-7814c93276f8'
os.environ['VERTA_EMAIL'] = 'conrado@verta.ai'

# Census Demo

In [71]:
import warnings
from sklearn.exceptions import ConvergenceWarning
warnings.filterwarnings("ignore", category=ConvergenceWarning)

import itertools
import time
from multiprocessing import Pool

import numpy as np
import pandas as pd

from sklearn import model_selection
from sklearn import linear_model
from sklearn import metrics
import sklearn
import six
import verta
from verta.utils import ModelAPI
from verta._demo_utils import DeployedModel

In [72]:
df_train = pd.read_csv("census-train.csv")
X_train = df_train.iloc[:,:-1]
y_train = df_train.iloc[:, -1]
df_test = pd.read_csv("census-test.csv")
X_test = df_test.iloc[:,:-1].values
hyperparam_candidates = {
    'C': [1e-4, 1e-6],
    'solver': ['lbfgs'],
    'max_iter': [28, 32],
}
hyperparam_sets = [dict(zip(hyperparam_candidates.keys(), values))
                   for values
                   in itertools.product(*hyperparam_candidates.values())]

# Log Workflow

## Connect to the platform

In [82]:
from verta import Client
client = Client("http://3.226.122.162:30080/")

set email from environment
set developer key from environment
connection successfully established


## Version a dataset

In [74]:
dataset = client.set_dataset(name="Census dataset - train", workspace="Demos")
dataset.create_version('small-census-train.csv')
train_version = dataset.create_version('census-train.csv')

dataset = client.set_dataset(name="Census dataset - test", workspace="Demos")
dataset.create_version('small-census-test.csv')
test_version = dataset.create_version('census-test.csv')

created new Dataset: Census dataset - train in workspace: Demos
created new DatasetVersion: 1
created new DatasetVersion: 2
created new Dataset: Census dataset - test in workspace: Demos
created new DatasetVersion: 1
created new DatasetVersion: 2


## Version a model

In [84]:
proj = client.set_project("Census - Demo", workspace="Demos")
expt = client.set_experiment("Linear regression")

set existing Project: Census - Demo from workspace: Demos
set existing Experiment: Linear regression


### Log the 4 components of a model

In [86]:
runs = []
for i, hyperparams in enumerate(hyperparam_sets):
    run = client.set_experiment_run("Iteration %d" % i)
    runs.append(run)
    
    # Config
    run.log_hyperparameters(hyperparams)
    # Code
    run.log_code()
    # Environment
    run.log_requirements(['scikit-learn'])
    # Dataset
    run.log_dataset_version('train', train_version)
    run.log_dataset_version('test', test_version)

set existing ExperimentRun: Iteration 0
set existing ExperimentRun: Iteration 1
set existing ExperimentRun: Iteration 2
set existing ExperimentRun: Iteration 3


In [87]:
print(run)

name: Iteration 3
url: http://3.226.122.162:30080/Demos/projects/eb3bcabb-df4b-42f4-bf97-c433c3113c17/exp-runs/608002ec-26ba-4bc8-9dcc-c57bf9fc335c
description: 
tags: []
attributes: {}
id: 608002ec-26ba-4bc8-9dcc-c57bf9fc335c
experiment id: f67ea01e-c88b-4dd5-8ccd-ba8b83071db1
project id: eb3bcabb-df4b-42f4-bf97-c433c3113c17
hyperparameters: {'C': 1e-06, 'solver': 'lbfgs', 'max_iter': 32}
observations: {}
metrics: {'val_acc': 0.7871752349364289}
artifact keys: ['requirements.txt', 'custom_modules', 'model.pkl', 'model_api.json', 'train_data']


### Train and log the results

In [78]:
(X_val_train, X_val_test,
 y_val_train, y_val_test) = model_selection.train_test_split(X_train, y_train,
                                                             test_size=0.2,
                                                             shuffle=True)

model = linear_model.LogisticRegression(**hyperparams)
model.fit(X_train, y_train)

val_acc = model.score(X_val_test, y_val_test)
run.log_metric("val_acc", val_acc)

### Log information for deployment

In [79]:
model_api = ModelAPI(X_train, model.predict(X_train))
run.log_model(model, model_api=model_api)
run.log_training_data(X_train, y_train)

upload complete (custom_modules.zip)
upload complete (model.pkl)
upload complete (model_api.json)
upload complete (train_data.csv)


## Deploy a model

In [88]:
deployed_model = run.get_deployed_model()
print(deployed_model)

<DeployedModel at http://3.226.122.162:30080/api/v1/predict/608002ec-26ba-4bc8-9dcc-c57bf9fc335c>


In [89]:
for x in itertools.cycle(X_test):
    print(deployed_model.predict([x.tolist()]))
    time.sleep(0.1)

[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[1]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[1]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[1]
[1]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[1]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[1]
[1]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[1]
[0]
[0]
[0]
[0]
[0]
[0]


KeyboardInterrupt: 