#### The purpose of this notebook is to demonstrate a notional ML application, indended for deployment via and MLOps pipeline

In [2]:
import pandas as pd
import numpy as np
import sklearn
import random as rand
from sklearn.ensemble import GradientBoostingRegressor, RandomForestRegressor
from sklearn.model_selection import train_test_split
from pandas.io.json import json_normalize
from IPython.display import clear_output
import ipywidgets as widgets

In [3]:
data = pd.read_csv("data/kc_house_data.csv")

In [4]:
labels = data['price']
conv_dates = [1 if values == 2014 else 0 for values in data.date ]
data['date'] = conv_dates
train1 = data.drop(['id', 'price'],axis=1)

In [6]:
data.head()

Unnamed: 0,id,date,price,bedrooms,bathrooms,sqft_living,sqft_lot,floors,waterfront,view,...,grade,sqft_above,sqft_basement,yr_built,yr_renovated,zipcode,lat,long,sqft_living15,sqft_lot15
0,7129300520,0,221900.0,3,1.0,1180,5650,1.0,0,0,...,7,1180,0,1955,0,98178,47.5112,-122.257,1340,5650
1,6414100192,0,538000.0,3,2.25,2570,7242,2.0,0,0,...,7,2170,400,1951,1991,98125,47.721,-122.319,1690,7639
2,5631500400,0,180000.0,2,1.0,770,10000,1.0,0,0,...,6,770,0,1933,0,98028,47.7379,-122.233,2720,8062
3,2487200875,0,604000.0,4,3.0,1960,5000,1.0,0,0,...,7,1050,910,1965,0,98136,47.5208,-122.393,1360,5000
4,1954400510,0,510000.0,3,2.0,1680,8080,1.0,0,0,...,8,1680,0,1987,0,98074,47.6168,-122.045,1800,7503


In [6]:
x_train, x_test, y_train, y_test = train_test_split(train1, labels, test_size=0.10, random_state=2)


In [7]:
col_imp = ["grade", "lat", "long", "sqft_living", "waterfront", "yr_built"]

In [8]:
clf = GradientBoostingRegressor(n_estimators=400, max_depth=5, min_samples_split=2,
          learning_rate=0.1, loss='ls')

In [9]:
clf.fit(train1[col_imp], labels)



GradientBoostingRegressor(loss='ls', max_depth=5, n_estimators=400)

In [32]:
def predict(dict_values, col_imp=col_imp, clf=clf):
    x = np.array([float(dict_values[col]) for col in col_imp])
    x = x.reshape(1,-1)
    y_pred = clf.predict(x)[0]
    y_pred_string = "$"+ str(round(y_pred,2))
    return y_pred_string

In [41]:
test = x_test[col_imp].iloc[rand.randint(1,2100)].T.replace('},{', '} {')
test

grade             6.0000
lat              47.5767
long           -122.4080
sqft_living    1220.0000
waterfront        0.0000
yr_built       1905.0000
Name: 8584, dtype: float64

In [42]:
predict(test)

  "X does not have valid feature names, but"


'$495919.86'

In [52]:
score_btn = widgets.Button(description='Predict Price', 
                           button_style='primary',
                           layout = widgets.Layout(width='auto', height='40px'))
btn_select_random = widgets.Button(description = 'Select Random House', 
                                   layout = widgets.Layout(width='auto', height='auto'))
out_select = widgets.Output(layout={'border': '1px solid black'})
out_score = widgets.Output(layout={'border': '1px solid black'})


widgets.Layout(width = 'auto')

def btn_select_random_eventhandler(obj):
    with out_select:
        global x_test_json
        x_test_json = x_test[col_imp].iloc[rand.randint(1,2100)].T.replace('},{', '} {')
        clear_output()
        print(x_test_json)
    with out_score:
        clear_output()
          
        
        
def btn_score_eventhandler(obj):
    with out_score:
        clear_output()
        print("Predicted Price: $",predict(x_test_json) )

In [53]:
display(widgets.VBox([btn_select_random,out_select,score_btn,out_score]))

btn_select_random.on_click(btn_select_random_eventhandler)
score_btn.on_click(btn_score_eventhandler)

VBox(children=(Button(description='Select Random House', layout=Layout(height='auto', width='auto'), style=But…

### Render notebook as web app using Viola