<p align="center"><img width="50%" src="https://aimodelsharecontent.s3.amazonaws.com/aimodshare_banner.jpg" /></p>


---




<p align="center"><h1 align="center">Boston Housing Prices Regression Tutorial</h1> <h3 align="center">(Prepare to deploy model and preprocessor to REST API/Web Dashboard in four easy steps...)</h3></p>
<p align="center"><img width="80%" src="https://aimodelsharecontent.s3.amazonaws.com/ModelandPreprocessorObjectPreparation.jpeg" /></p>


---



## **(1) Preprocessor Function & Setup**




In [None]:
! pip install scikit-learn --upgrade # Load newest version of sklearn.

In [1]:
import pandas as pd
from sklearn import datasets, linear_model

# Obtaining the Boston Housing Prices dataset...
boston = datasets.load_boston()
X = pd.DataFrame(boston.data)

X.columns = boston.feature_names
y = boston.target # Or Price, i.e. median value of a house to be predicted.
X.head(2)

Unnamed: 0,CRIM,ZN,INDUS,CHAS,NOX,RM,AGE,DIS,RAD,TAX,PTRATIO,B,LSTAT
0,0.00632,18.0,2.31,0.0,0.538,6.575,65.2,4.09,1.0,296.0,15.3,396.9,4.98
1,0.02731,0.0,7.07,0.0,0.469,6.421,78.9,4.9671,2.0,242.0,17.8,396.9,9.14


In [2]:
# Set up training and test data...
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20, random_state=1987)

print(X_train.shape)
print(y_train.shape)
print(X.columns.tolist())

(404, 13)
(404,)
['CRIM', 'ZN', 'INDUS', 'CHAS', 'NOX', 'RM', 'AGE', 'DIS', 'RAD', 'TAX', 'PTRATIO', 'B', 'LSTAT']


In [3]:
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer, make_column_transformer
from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import StandardScaler, OneHotEncoder

# We create the preprocessing pipelines for both numeric and categorical data.
numeric_features = ['CRIM', 'ZN', 'INDUS', 'NOX', 'RM', 'AGE', 'DIS',  'TAX', 'PTRATIO', 'B', 'LSTAT']
numeric_transformer = Pipeline(steps=[
    ('imputer', SimpleImputer(strategy='median')),
    ('scaler', StandardScaler())])

categorical_features = ['CHAS', 'RAD']

# Replacing missing values with Modal value and then one hot encoding.
categorical_transformer = Pipeline(steps=[
    ('imputer', SimpleImputer(strategy='most_frequent')),
    ('onehot', OneHotEncoder(handle_unknown='ignore'))])

# Final preprocessor object set up with ColumnTransformer.
preprocessor = ColumnTransformer(
    transformers=[
        ('num', numeric_transformer, numeric_features),
        ('cat', categorical_transformer, categorical_features)])


# Fit your preprocessor object.
prediction_input_preprocessor=preprocessor.fit(X_train) 

import pickle
pickle.dump(prediction_input_preprocessor, open("preprocessor.pkl", "wb"))

### **Write a Preprocessor Function**

In [4]:
# This is our preprocessor function to save using ai.export_preprocessor()...
def preprocessor(data):
  preprocessed_data=prediction_input_preprocessor.transform(data)
  return preprocessed_data

## **(2) Build Your Model Using `sklearn`**

In [5]:
# Ridge...
from sklearn.linear_model import Ridge
import numpy as np

model = Ridge(alpha=0.1)

ridge01 = model.fit(preprocessor(X_train), y_train)
print("Training Set Score: {:.2f}".format(ridge01.score(preprocessor(X_train), y_train)))
print("Number of Features Used: {}".format(np.sum(ridge01.coef_ != 0)))

Training Set Score: 0.76
Number of Features Used: 22


In [6]:
y_pred = model.predict(preprocessor(X_test))

y_pred

array([26.25463487, 23.99629382, 13.69158579, 26.52789063, 19.481455  ,
       27.061102  , 19.84509809, 17.06704589, 37.7724682 , 18.3741863 ,
       21.720748  , 23.0831141 , 19.44467694, 13.40660907, 18.96488121,
       22.84565596, 12.67247963, 25.75084874, 19.44797971, 25.77103517,
       19.40479138, 32.96789011, 16.90885073, 23.53019707, 30.63033334,
       29.09652461, 19.50024896, 23.6724921 , 18.30087037, 17.47380127,
       26.7869328 , 17.59872458, 18.79516165, 30.71429253, 19.48999806,
       17.69167317, 15.24538483, 23.80891891, 11.09866049, 16.93745326,
       28.35236255, 23.1522718 , 23.83119052, 17.0438004 , 24.07431787,
       18.00364799, 24.97364705, 26.34816236, 26.01149912, 14.32372355,
       26.16256073, 22.81466219, 20.51861221, 19.7251867 , 20.93974742,
       13.11764962, 14.61425139, 39.75610896, 18.87160765, 24.47477101,
       17.77435009, 24.680246  , 35.7646022 , 38.31317439, 28.55902541,
       26.55579488, 27.08438021, 19.03856845, 20.36184636, 19.30

In [7]:
from sklearn.metrics import mean_squared_error

print("RMSE: {:.2f}".format(mean_squared_error(y_test, y_pred, squared=False)))

RMSE: 4.66


## **(3) Save Preprocessor**

In [None]:
# ! pip3 install aimodelshare

In [8]:
def export_preprocessor(preprocessor_function, filepath):
    import dill
    with open(filepath, "wb") as f:
        dill.dump(preprocessor_function, f)

# import aimodelshare as ai # Once we can deploy this, we use it in lieu of the below.
# ai.export_preprocessor(preprocessor, "preprocessor.pkl")

export_preprocessor(preprocessor, "preprocessor.pkl")

## **(4) Save `sklearn` Model to Onnx File Format**

In [None]:
! pip3 install skl2onnx

In [10]:
# Convert into ONNX format...

from skl2onnx import convert_sklearn
from skl2onnx.common.data_types import FloatTensorType

initial_type = [('float_input', FloatTensorType([None, 22]))]
onx = convert_sklearn(model, initial_types=initial_type)

# Save model to local .onnx file...
with open("my_model.onnx", "wb") as f:
    f.write(onx.SerializeToString())

In [None]:
# Test the ONNX Model. (Optional.)

In [None]:
! pip3 install onnxruntime

In [12]:
# Test...

import onnxruntime as rt
sess = rt.InferenceSession('/content/my_model.onnx') # ONNX model pathway.

input_name = sess.get_inputs()[0].name
output_name = sess.get_outputs()[0].name

In [14]:
sess_pred = sess.run([output_name], {input_name: preprocessor(X_test).astype(np.float32)}) # dtype=float32.
onx_pred = np.asarray(sess_pred, dtype=np.float32).reshape(102,) # Align shape.

In [16]:
onnx_mse = mean_squared_error(y_test, onx_pred, squared=False)

print('*** Onnx RMSE: {:.2f}\n'.format(onnx_mse)) # Cf. pre-ONNX metric.

*** Onnx RMSE: 4.66

