# [3 Ways to Deploy Machine Learning Models in Production](<https://towardsdatascience.com/3-ways-to-deploy-machine-learning-models-in-production-cdba15b00e>)

## Deploy as web services with three steps

### Step one: create model

In [14]:
# import all libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt 
import seaborn as sns
import re

import sklearn
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import PolynomialFeatures
from sklearn.preprocessing import scale
from sklearn.feature_selection import RFE
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import KFold
from sklearn.model_selection import GridSearchCV
from sklearn.pipeline import make_pipeline

import plotly
import plotly.express as px

import warnings # supress warnings
warnings.filterwarnings('ignore')

In [15]:
# import Housing.csv
housing = pd.read_csv('./data/Housing.csv')

In [16]:
# data preparation

# list of all the "yes-no" binary categorical variables
# we'll map yes to 1 and no to 0
binary_vars_list =  ['mainroad', 'guestroom', 'basement', 'hotwaterheating', 'airconditioning', 'prefarea']

# defining the map function
def binary_map(x):
    return x.map({'yes': 1, "no": 0})

# applying the function to the housing variables list
housing[binary_vars_list] = housing[binary_vars_list].apply(binary_map)
housing.head()

Unnamed: 0,price,area,bedrooms,bathrooms,stories,mainroad,guestroom,basement,hotwaterheating,airconditioning,parking,prefarea,furnishingstatus
0,13300000,7420,4,2,3,1,0,0,0,1,2,1,furnished
1,12250000,8960,4,4,4,1,0,0,0,1,3,0,furnished
2,12250000,9960,3,2,2,1,0,1,0,0,2,1,semi-furnished
3,12215000,7500,4,2,2,1,0,1,0,1,3,1,furnished
4,11410000,7420,4,1,2,1,1,1,0,1,2,0,furnished


In [17]:
# 'dummy' variables
# get dummy variables for 'furnishingstatus' 
# also, drop the first column of the resulting df (since n-1 dummy vars suffice)
status = pd.get_dummies(housing['furnishingstatus'], drop_first = True)
# status.head()
# concat the dummy variable df with the main df
housing = pd.concat([housing, status], axis = 1)
# housing.head()
# 'furnishingstatus' since we alreday have the dummy vars
housing.drop(['furnishingstatus'], axis = 1, inplace = True)
housing.head()

Unnamed: 0,price,area,bedrooms,bathrooms,stories,mainroad,guestroom,basement,hotwaterheating,airconditioning,parking,prefarea,semi-furnished,unfurnished
0,13300000,7420,4,2,3,1,0,0,0,1,2,1,0,0
1,12250000,8960,4,4,4,1,0,0,0,1,3,0,0,0
2,12250000,9960,3,2,2,1,0,1,0,0,2,1,1,0
3,12215000,7500,4,2,2,1,0,1,0,1,3,1,0,0
4,11410000,7420,4,1,2,1,1,1,0,1,2,0,0,0


In [18]:
# train-test 70-30 split
df_train, df_test = train_test_split(housing, 
                                     train_size = 0.7, 
                                     test_size = 0.3, 
                                     random_state = 100)

# rescale the features
scaler = MinMaxScaler()

# apply scaler() to all the numeric columns 
numeric_vars = ['area', 'bedrooms', 'bathrooms', 'stories', 'parking','price']
df_train[numeric_vars] = scaler.fit_transform(df_train[numeric_vars])
df_train.head()

Unnamed: 0,price,area,bedrooms,bathrooms,stories,mainroad,guestroom,basement,hotwaterheating,airconditioning,parking,prefarea,semi-furnished,unfurnished
359,0.169697,0.155227,0.4,0.0,0.0,1,0,0,0,0,0.333333,0,0,1
19,0.615152,0.403379,0.4,0.5,0.333333,1,0,0,0,1,0.333333,1,1,0
159,0.321212,0.115628,0.4,0.5,0.0,1,1,1,0,1,0.0,0,0,0
35,0.548133,0.454417,0.4,0.5,1.0,1,0,0,0,1,0.666667,0,0,0
28,0.575758,0.538015,0.8,0.5,0.333333,1,0,1,1,0,0.666667,0,0,1


In [19]:
# apply rescaling to the test set also
df_test[numeric_vars] = scaler.fit_transform(df_test[numeric_vars])
df_test.head()

Unnamed: 0,price,area,bedrooms,bathrooms,stories,mainroad,guestroom,basement,hotwaterheating,airconditioning,parking,prefarea,semi-furnished,unfurnished
265,0.247651,0.084536,0.333333,0.0,0.333333,1,0,0,0,0,0.0,1,1,0
54,0.530201,0.298969,0.333333,0.333333,0.333333,1,1,0,0,1,0.333333,0,1,0
171,0.328859,0.592371,0.333333,0.0,0.0,1,0,0,0,0,0.333333,1,1,0
244,0.261745,0.252234,0.333333,0.0,0.333333,1,1,1,0,0,0.0,1,1,0
268,0.245638,0.226804,0.666667,0.0,0.333333,1,0,0,0,1,0.0,0,1,0


In [20]:
# divide into X_train, y_train, X_test, y_test
y_train = df_train.pop('price')
X_train = df_train

y_test = df_test.pop('price')
X_test = df_test

In [21]:
# first model with an arbitrary choice of n_features
# running RFE with number of features=10

lm = LinearRegression()
lm.fit(X_train, y_train)

rfe = RFE(lm, n_features_to_select=10)             
rfe = rfe.fit(X_train, y_train)

In [30]:
input0 = (X_train.iloc[0])
input0 = dict(input0)
input0

{'area': 0.15522703273495247,
 'bedrooms': 0.4000000000000001,
 'bathrooms': 0.0,
 'stories': 0.0,
 'mainroad': 1.0,
 'guestroom': 0.0,
 'basement': 0.0,
 'hotwaterheating': 0.0,
 'airconditioning': 0.0,
 'parking': 0.3333333333333333,
 'prefarea': 0.0,
 'semi-furnished': 0.0,
 'unfurnished': 1.0}

### Step 2: persist model

In [23]:
import joblib

joblib.dump(lm, 'classifier.pkl')

['classifier.pkl']

### Step 3: serve the persisted model using Flask

In [47]:

from flask import Flask, jsonify
from flask import request
import pandas as pd
# from json import jsonify

app = Flask(__name__)

@app.route('/predict', methods=['POST'])
def predict():
     json_ = request.json
     print(json_)
     query_df = pd.DataFrame(json_)
     # print('query_df')
     # print(query_df)
     # query = pd.get_dummies(query_df)
     # print('query')
     # print(query)
     classifier = joblib.load('classifier.pkl')
     prediction = classifier.predict(query_df)
     # prediction = classifier.predict(json_)
     print('prediction')
     print(prediction)
     return jsonify({'prediction': list(prediction)})
     # return jsonify({'prediction': prediction})


if __name__ == '__main__':
     app.run(port=8080)

 * Serving Flask app '__main__' (lazy loading)
 * Environment: production
[2m   Use a production WSGI server instead.[0m
 * Debug mode: off


 * Running on http://127.0.0.1:8080/ (Press CTRL+C to quit)
[2022-01-08 16:20:14,215] ERROR in app: Exception on /predict [POST]
Traceback (most recent call last):
  File "/home/mikec/Desktop/GithubCloud/Kaggle/venv/lib/python3.8/site-packages/flask/app.py", line 2073, in wsgi_app
    response = self.full_dispatch_request()
  File "/home/mikec/Desktop/GithubCloud/Kaggle/venv/lib/python3.8/site-packages/flask/app.py", line 1518, in full_dispatch_request
    rv = self.handle_user_exception(e)
  File "/home/mikec/Desktop/GithubCloud/Kaggle/venv/lib/python3.8/site-packages/flask/app.py", line 1516, in full_dispatch_request
    rv = self.dispatch_request()
  File "/home/mikec/Desktop/GithubCloud/Kaggle/venv/lib/python3.8/site-packages/flask/app.py", line 1502, in dispatch_request
    return self.ensure_sync(self.view_functions[rule.endpoint])(**req.view_args)
  File "/tmp/ipykernel_3385084/4292672965.py", line 12, in predict
    query_df = pd.DataFrame(json_)
  File "/home/mikec/Desktop/Git

{'area': 0.15522703273495247, 'bedrooms': 0.4000000000000001, 'bathrooms': 0.0, 'stories': 0.0, 'mainroad': 1.0, 'guestroom': 0.0, 'basement': 0.0, 'hotwaterheating': 0.0, 'airconditioning': 0.0, 'parking': 0.3333333333333333, 'prefarea': 0.0, 'semi-furnished': 0.0, 'unfurnished': 1.0}


127.0.0.1 - - [08/Jan/2022 16:20:43] "POST /predict HTTP/1.1" 200 -


{'area': [0.15522703273495247], 'bedrooms': [0.4000000000000001], 'bathrooms': [0.0], 'stories': [0.0], 'mainroad': [1.0], 'guestroom': [0.0], 'basement': [0.0], 'hotwaterheating': [0.0], 'airconditioning': [0.0], 'parking': [0.3333333333333333], 'prefarea': [0.0], 'semi-furnished': [0.0], 'unfurnished': [1.0]}
prediction
[0.11483372]


127.0.0.1 - - [08/Jan/2022 16:20:48] "POST /predict HTTP/1.1" 200 -


{'area': [0.15522703273495247], 'bedrooms': [0.4000000000000001], 'bathrooms': [0.0], 'stories': [0.0], 'mainroad': [1.0], 'guestroom': [0.0], 'basement': [0.0], 'hotwaterheating': [0.0], 'airconditioning': [0.0], 'parking': [0.3333333333333333], 'prefarea': [0.0], 'semi-furnished': [0.0], 'unfurnished': [1.0]}
prediction
[0.11483372]
