## Vectice Configuration

In [2]:
#Install Vectice Python library 
# In this notebook we will do code versioning using github, we also support gitlab
# and bitbucket: !pip install -q "vectice[github, gitlab, bitbucket]"
!pip install --q vectice[github]==22.3.5.1

[0m

In [3]:
#Verify if Vectice python library was installed
!pip3 show vectice

Name: vectice
Version: 22.3.5.1
Summary: Vectice Python library
Home-page: https://www.vectice.com
Author: Vectice Inc.
Author-email: sdk@vectice.com
License: Apache License 2.0
Location: /opt/conda/lib/python3.7/site-packages
Requires: python-dotenv, requests, urllib3
Required-by: 


In [4]:
import json
f = open('DSP_JDN.json',)
DSP_JDN_key = json.load(f)

In [5]:
#Import the required packages
from vectice import Experiment
from vectice.api.json import ModelType
from vectice.api.json import JobType
from vectice.api.json import JobArtifactType
from vectice.api.json import ModelVersionStatus
from vectice.api.json import VersionStrategy
import logging
import os
logging.basicConfig(level=logging.INFO)

# Specify the API endpoint for Vectice.
# You can specify your API endpoint here in the notebook, but we recommand you to add it to a .env file
os.environ['VECTICE_API_ENDPOINT']= "app.vectice.com"

# To use the Vectice Python library, you first need to authenticate your account using an API token.
# You can generate an API token from the Vectice UI, by going to the "API Tokens" section in the "My Profile" section
# which is located under your profile picture.
# You can specify your API Token here in the notebook, but we recommend you to add it to a .env file
os.environ['VECTICE_API_TOKEN'] = DSP_JDN_key['key']

# Add you project id. The project id can be found in the project settings page in the Vectice UI
project_id = 4734

## Setup environment for ML

In [6]:
import os
import pandas as pd
from matplotlib import pyplot as plt
%matplotlib inline

INFO:numexpr.utils:NumExpr defaulting to 2 threads.


In [7]:

import string
from math import sqrt

# Load scikit-learn packages
from sklearn.model_selection import train_test_split  # Model Selection
from sklearn.metrics import mean_absolute_error, mean_squared_error  # Model Evaluation
from sklearn.linear_model import LinearRegression  # Linear Regression
from sklearn.tree import DecisionTreeRegressor, plot_tree  # Decision Tree Regression
from sklearn.ensemble import RandomForestRegressor  # Random Forest Regression

## Load Cleaned Training Data and Take a Peek

In [10]:
# Once your file is loaded you can view your dataset in a Pandas dataframe.
df = pd.read_csv('data/train_cleaned_kc_house_data.csv')

In [11]:
df.head()

Unnamed: 0,price,bedrooms,bathrooms,sqft_living,sqft_lot,floors,waterfront,view,condition,grade,sqft_above,sqft_basement,yr_built,yr_renovated,zipcode,lat,long,sqft_living15,sqft_lot15
0,510000.0,4,3.0,3610,18948,2.0,0,0,3,10,3610,0,1993,0,98023,47.2911,-122.342,3568,18948
1,555000.0,3,2.0,2080,7020,1.0,0,0,4,7,1040,1040,1951,0,98115,47.6768,-122.285,1920,7000
2,469500.0,4,2.5,2090,7241,1.0,0,0,4,7,1140,950,2001,0,98034,47.726,-122.221,1510,7402
3,530000.0,3,2.5,3150,21893,2.0,0,0,3,9,3150,0,2006,0,98014,47.6455,-121.901,2280,21886
4,525000.0,3,2.0,1540,7800,1.0,0,0,3,8,1540,0,2004,0,98125,47.7041,-122.288,1510,7800


### 1. Create a Vectice job for the modeling stage

In [14]:
# We create our second experiment for modeling and specify the workspace and the project we will be working on
# Each experiment only contains one job. Each invokation of the job is called a run.
# autocode = True enables you to track your git changes for your code automatically every time you execute a run (see below).
experiment = Experiment(job="Modeling - train", project=project_id, job_type=JobType.TRAINING, auto_code=True)

INFO:Client:Successfully authenticated. You'll be working on Project: Predicting house prices in King County, Washington part of Workspace: .jnorman
INFO:Project:Job with id: 10767 successfully created.


In [None]:
#create a code checkpoint for this version of the notebook
input_code = experiment.add_code_version_uri(git_uri="https://github.com/stbiadmin/vectice-examples",
                                             entrypoint="demo/demo_dataprep.ipynb") 

## Let's benchmark first: Linear regression 

In [15]:
#set some run properties to help us make sense of what is happening
technique = ["Approach", "Linreg"]
citation = ["wiki","https://en.wikipedia.org/wiki/Linear_regression"]

In [16]:
# we declare the dataset versions and code to use as inputs of our run
experiment.start(inputs=[input_code, train_ds_version, test_ds_version],
                run_properties={technique[0]: technique[1], citation[0]:citation[1]})

# Linear regression model
lr_rg = LinearRegression()
lr_rg.fit(X_train, y_train)
lr_pred = lr_rg.predict(X_test)

# Evaluate Metrics
MAE = round(mean_absolute_error(lr_pred, y_test),3)
RMSE = round(sqrt(mean_squared_error(lr_pred, y_test)),3)

print("Root Mean Squared Error: ", RMSE)
print("Mean Absolute Error: ", MAE)

# Let's log the model we trained along with its metrics, as a new version 
# of the "Regressor" model in Vectice.
metrics = {"RMSE": RMSE, "MAE": MAE}
model_version = experiment.add_model_version(model="Regressor", algorithm="Linear Regression", metrics=metrics)

# We complete the current experiment's run 
## The created model version will be automatically attached as output of the run
experiment.complete()

NameError: name 'input_code' is not defined