## Lighthouse Labs
### W07D2 Deployment of ML Models
Instructor: Socorro Dominguez  
June 08, 2021

Let's create a super fast model for predicting Boston's house pricing.

In [13]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn import metrics

#importing dataset from sklearn
from sklearn.datasets import load_boston
boston_data = load_boston()

In [14]:
# initializing dataset
data_ = pd.DataFrame(boston_data.data)

### Top five rows of dataset
data_.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12
0,0.00632,18.0,2.31,0.0,0.538,6.575,65.2,4.09,1.0,296.0,15.3,396.9,4.98
1,0.02731,0.0,7.07,0.0,0.469,6.421,78.9,4.9671,2.0,242.0,17.8,396.9,9.14
2,0.02729,0.0,7.07,0.0,0.469,7.185,61.1,4.9671,2.0,242.0,17.8,392.83,4.03
3,0.03237,0.0,2.18,0.0,0.458,6.998,45.8,6.0622,3.0,222.0,18.7,394.63,2.94
4,0.06905,0.0,2.18,0.0,0.458,7.147,54.2,6.0622,3.0,222.0,18.7,396.9,5.33


In [15]:
# Adding names to our columns
data_.columns = boston_data.feature_names
data_.head()

Unnamed: 0,CRIM,ZN,INDUS,CHAS,NOX,RM,AGE,DIS,RAD,TAX,PTRATIO,B,LSTAT
0,0.00632,18.0,2.31,0.0,0.538,6.575,65.2,4.09,1.0,296.0,15.3,396.9,4.98
1,0.02731,0.0,7.07,0.0,0.469,6.421,78.9,4.9671,2.0,242.0,17.8,396.9,9.14
2,0.02729,0.0,7.07,0.0,0.469,7.185,61.1,4.9671,2.0,242.0,17.8,392.83,4.03
3,0.03237,0.0,2.18,0.0,0.458,6.998,45.8,6.0622,3.0,222.0,18.7,394.63,2.94
4,0.06905,0.0,2.18,0.0,0.458,7.147,54.2,6.0622,3.0,222.0,18.7,396.9,5.33


In [16]:
# Target feature of Boston Housing data
data_['PRICE'] = boston_data.target

In [17]:
# creating feature and target variable 
X = data_.drop(['PRICE'], axis=1)
y = data_['PRICE']

In [19]:
X.head(1)

Unnamed: 0,CRIM,ZN,INDUS,CHAS,NOX,RM,AGE,DIS,RAD,TAX,PTRATIO,B,LSTAT
0,0.00632,18.0,2.31,0.0,0.538,6.575,65.2,4.09,1.0,296.0,15.3,396.9,4.98


In [20]:
# splitting into training and testing set
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.2, random_state=1)
print("X training shape : ", X_train.shape)
print("X test shape : ", X_test.shape )
print("y training shape :", y_train.shape )
print("y test shape :", y_test.shape )

X training shape :  (404, 13)
X test shape :  (102, 13)
y training shape : (404,)
y test shape : (102,)


In [21]:
 # creating model
from sklearn.ensemble import RandomForestRegressor
regressor = RandomForestRegressor()
regressor.fit(X_train, y_train)

RandomForestRegressor()

In [22]:
# Model evaluation for training data
prediction = regressor.predict(X_train)
print("r^2 : ", metrics.r2_score(y_train, prediction))
print("Mean Absolute Error: ", metrics.mean_absolute_error(y_train, prediction))
print("Mean Squared Error: ", metrics.mean_squared_error(y_train, prediction))
print("Root Mean Squared Error : ", np.sqrt(metrics.mean_squared_error(y_train, prediction)))

r^2 :  0.9826702669109675
Mean Absolute Error:  0.8056262376237603
Mean Squared Error:  1.399919839108909
Root Mean Squared Error :  1.1831820819759353


In [9]:
# Model evaluation for testing data
prediction_test = regressor.predict(X_test)
print("r^2 : ", metrics.r2_score(y_test, prediction_test))
print("Mean Absolute Error : ", metrics.mean_absolute_error(y_test, prediction_test))
print("Mean Squared Error : ", metrics.mean_squared_error(y_test, prediction_test))
print("Root Mean Absolute Error : ", np.sqrt(metrics.mean_squared_error(y_test, prediction_test)))

r^2 :  0.9077066477425952
Mean Absolute Error :  2.350656862745097
Mean Squared Error :  9.12111106862745
Root Mean Absolute Error :  3.0201177242994106


In [10]:
y_test

307    28.2
343    23.9
47     16.6
67     22.0
362    20.8
       ... 
92     22.9
224    44.8
110    21.7
426    10.2
443    15.4
Name: PRICE, Length: 102, dtype: float64

In [11]:
prediction_test

array([29.477, 27.605, 19.205, 20.461, 19.442, 20.114, 28.121, 19.146,
       20.542, 23.076, 29.224, 31.246, 20.789, 20.361, 20.503, 24.455,
       11.901, 41.285, 24.49 , 14.799, 20.417, 16.244, 24.706, 23.877,
       25.865,  9.244, 14.853, 19.853, 42.973, 12.396, 26.933, 19.283,
       47.681, 15.965, 23.307, 20.737, 15.856, 33.04 , 13.179, 19.684,
       24.339, 23.028, 25.524, 16.381, 15.103, 10.594, 47.581, 11.431,
       22.148, 18.6  , 23.249, 21.522, 24.986, 21.196, 10.924, 23.663,
       11.937, 23.109, 18.858, 42.519, 14.522, 26.905, 13.126, 15.201,
       17.413, 32.954, 42.104, 25.45 , 21.596, 20.458, 23.974,  6.919,
       18.301, 21.153, 19.361, 20.742, 43.135, 24.481, 28.781, 33.154,
       17.285, 20.598, 34.035, 11.904, 25.191, 25.637, 14.882, 24.534,
       19.994, 17.161, 27.881, 45.377, 16.484, 21.229, 15.964, 20.807,
       24.798, 23.669, 42.386, 20.879, 15.406, 15.927])

In [12]:
# saving the model
import pickle

# saving the columns
model_columns = list(X.columns)
with open('model_columns.pkl','wb') as file:
    pickle.dump(model_columns, file)
    
    
pickle.dump(regressor, open('regressor.pkl', 'wb'))

## Running our Work on an API
### Checking on Postman

1. Let's review how to do a script and do the script in your favourite IDE.
2. From Terminal, navigate to the root folder of your app and run:
`python3 app.py`
3. An HTML link will pop up, copy it.
4. Open Postman. In the URL section, paste the link. You will see the greeting.
5. Append to the link `/predict` and in the JSON copy the following Examples:



Example 1

```
[
    {
    "CRIM" : 0.0063,
    "ZN" : 10.0,
    "INDUS" : 2.31,
    "CHAS" : 0.0,
    "NOX" : 0.0538,
    "RM" : 6.575,
    "AGE" : 65.2,
    "DIS" : 4.0900,
    "RAD" : 1.0,
    "TAX" : 296.0,
    "PTRATIO" : 15.3,
    "B" : 369.90,
    "LSTAT": 0
    }
]
```

Example 2
```
[
    {
    "CRIM" : 0.00,
    "ZN" : 0.00,
    "INDUS" : 0.00,
    "CHAS" : 0.00,
    "NOX" : 0.00,
    "RM" : 0.00,
    "AGE" : 0.00,
    "DIS" : 0.00,
    "RAD" : 0.00,
    "TAX" : 0.00,
    "PTRATIO" : 0.00,
    "B" : 0.00,
    "LSTAT": 0.00
    }
]
```