## Lighthouse Labs
### W07D2 Deployment of ML Models
Instructor: Socorro Dominguez  
April 13, 2021

Let's create a super fast model for predicting Boston's house pricing.

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn import metrics

#importing dataset from sklearn
from sklearn.datasets import load_boston
boston_data = load_boston()

In [2]:
# initializing dataset
data_ = pd.DataFrame(boston_data.data)

### Top five rows of dataset
data_.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12
0,0.00632,18.0,2.31,0.0,0.538,6.575,65.2,4.09,1.0,296.0,15.3,396.9,4.98
1,0.02731,0.0,7.07,0.0,0.469,6.421,78.9,4.9671,2.0,242.0,17.8,396.9,9.14
2,0.02729,0.0,7.07,0.0,0.469,7.185,61.1,4.9671,2.0,242.0,17.8,392.83,4.03
3,0.03237,0.0,2.18,0.0,0.458,6.998,45.8,6.0622,3.0,222.0,18.7,394.63,2.94
4,0.06905,0.0,2.18,0.0,0.458,7.147,54.2,6.0622,3.0,222.0,18.7,396.9,5.33


In [3]:
# Adding names to our columns
data_.columns = boston_data.feature_names
data_.head()

Unnamed: 0,CRIM,ZN,INDUS,CHAS,NOX,RM,AGE,DIS,RAD,TAX,PTRATIO,B,LSTAT
0,0.00632,18.0,2.31,0.0,0.538,6.575,65.2,4.09,1.0,296.0,15.3,396.9,4.98
1,0.02731,0.0,7.07,0.0,0.469,6.421,78.9,4.9671,2.0,242.0,17.8,396.9,9.14
2,0.02729,0.0,7.07,0.0,0.469,7.185,61.1,4.9671,2.0,242.0,17.8,392.83,4.03
3,0.03237,0.0,2.18,0.0,0.458,6.998,45.8,6.0622,3.0,222.0,18.7,394.63,2.94
4,0.06905,0.0,2.18,0.0,0.458,7.147,54.2,6.0622,3.0,222.0,18.7,396.9,5.33


In [4]:
# Target feature of Boston Housing data
data_['PRICE'] = boston_data.target

In [5]:
# creating feature and target variable 
X = data_.drop(['PRICE'], axis=1)
y = data_['PRICE']

In [6]:
# splitting into training and testing set
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.2, random_state=1)
print("X training shape : ", X_train.shape)
print("X test shape : ", X_test.shape )
print("y training shape :", y_train.shape )
print("y test shape :", y_test.shape )

X training shape :  (404, 13)
X test shape :  (102, 13)
y training shape : (404,)
y test shape : (102,)


In [7]:
 # creating model
from sklearn.ensemble import RandomForestRegressor
regressor = RandomForestRegressor()
regressor.fit(X_train, y_train)

RandomForestRegressor()

In [8]:
# Model evaluation for training data
prediction = regressor.predict(X_train)
print("r^2 : ", metrics.r2_score(y_train, prediction))
print("Mean Absolute Error: ", metrics.mean_absolute_error(y_train, prediction))
print("Mean Squared Error: ", metrics.mean_squared_error(y_train, prediction))
print("Root Mean Squared Error : ", np.sqrt(metrics.mean_squared_error(y_train, prediction)))

r^2 :  0.9825331877979274
Mean Absolute Error:  0.8050173267326721
Mean Squared Error:  1.4109932797029692
Root Mean Squared Error :  1.1878523812759603


In [9]:
# Model evaluation for testing data
prediction_test = regressor.predict(X_test)
print("r^2 : ", metrics.r2_score(y_test, prediction_test))
print("Mean Absolute Error : ", metrics.mean_absolute_error(y_test, prediction_test))
print("Mean Squared Error : ", metrics.mean_squared_error(y_test, prediction_test))
print("Root Mean Absolute Error : ", np.sqrt(metrics.mean_squared_error(y_test, prediction_test)))

r^2 :  0.9104349718288314
Mean Absolute Error :  2.342882352941176
Mean Squared Error :  8.851477921568632
Root Mean Absolute Error :  2.9751433447093993


In [10]:
y_test

307    28.2
343    23.9
47     16.6
67     22.0
362    20.8
       ... 
92     22.9
224    44.8
110    21.7
426    10.2
443    15.4
Name: PRICE, Length: 102, dtype: float64

In [11]:
prediction_test

array([30.282, 27.601, 20.187, 20.823, 20.012, 19.572, 27.758, 18.641,
       20.606, 23.338, 27.889, 30.764, 20.663, 20.176, 20.314, 23.66 ,
       12.042, 41.47 , 24.168, 14.179, 19.975, 16.766, 24.616, 23.735,
       25.956,  9.574, 14.76 , 19.846, 43.79 , 12.283, 26.597, 20.058,
       48.263, 16.185, 23.318, 20.618, 15.575, 32.998, 13.661, 19.806,
       24.382, 23.306, 25.997, 16.142, 16.342, 10.776, 47.223, 11.64 ,
       21.75 , 18.687, 23.753, 21.38 , 25.382, 20.662, 11.363, 23.766,
       11.855, 23.074, 18.624, 42.538, 13.728, 26.379, 12.67 , 14.732,
       17.881, 32.756, 41.944, 24.808, 21.54 , 20.524, 23.861,  6.845,
       18.48 , 21.395, 19.539, 20.577, 41.872, 24.376, 27.64 , 32.593,
       17.108, 20.857, 34.251, 11.497, 25.05 , 26.353, 14.708, 24.865,
       19.794, 17.529, 26.52 , 45.806, 16.449, 21.472, 15.086, 20.924,
       23.847, 23.54 , 42.663, 20.82 , 15.887, 15.811])

In [12]:
# saving the model
import pickle

# saving the columns
model_columns = list(X.columns)
with open('model_columns.pkl','wb') as file:
    pickle.dump(model_columns, file)
    
    
pickle.dump(regressor, open('regressor.pkl', 'wb'))

## Running our Work on an API
### Checking on Postman

1. Let's review how to do a script and do the script in your favourite IDE.
2. From Terminal, navigate to the root folder of your app and run:
`python3 app.py`
3. An HTML link will pop up, copy it.
4. Open Postman. In the URL section, paste the link. You will see the greeting.
5. Append to the link `/predict` and in the JSON copy the following Examples:



Example 1

```
[
    {
    "CRIM" : 0.0063,
    "ZN" : 10.0,
    "INDUS" : 2.31,
    "CHAS" : 0.0,
    "NOX" : 0.0538,
    "RM" : 6.575,
    "AGE" : 65.2,
    "DIS" : 4.0900,
    "RAD" : 1.0,
    "TAX" : 296.0,
    "PTRATIO" : 15.3,
    "B" : 369.90,
    "LSTAT": 0
    }
]
```

Example 2
```
[
    {
    "CRIM" : 0.00,
    "ZN" : 0.00,
    "INDUS" : 0.00,
    "CHAS" : 0.00,
    "NOX" : 0.00,
    "RM" : 0.00,
    "AGE" : 0.00,
    "DIS" : 0.00,
    "RAD" : 0.00,
    "TAX" : 0.00,
    "PTRATIO" : 0.00,
    "B" : 0.00,
    "LSTAT": 0.00
    }
]
```