**Deploying ML Models with Flask**
---

In [1]:
# imports
import pandas as pd
from sklearn.datasets import load_wine

from sklearn.model_selection import GridSearchCV
from sklearn.pipeline import Pipeline, FeatureUnion

from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.feature_selection import SelectKBest

from sklearn.ensemble import RandomForestClassifier

import pickle

In [2]:
# load toy dataset from sklearn
data = load_wine()
df = pd.DataFrame(data['data'])
df.columns = data['feature_names']
y = data['target']
df.head()

Unnamed: 0,alcohol,malic_acid,ash,alcalinity_of_ash,magnesium,total_phenols,flavanoids,nonflavanoid_phenols,proanthocyanins,color_intensity,hue,od280/od315_of_diluted_wines,proline
0,14.23,1.71,2.43,15.6,127.0,2.8,3.06,0.28,2.29,5.64,1.04,3.92,1065.0
1,13.2,1.78,2.14,11.2,100.0,2.65,2.76,0.26,1.28,4.38,1.05,3.4,1050.0
2,13.16,2.36,2.67,18.6,101.0,2.8,3.24,0.3,2.81,5.68,1.03,3.17,1185.0
3,14.37,1.95,2.5,16.8,113.0,3.85,3.49,0.24,2.18,7.8,0.86,3.45,1480.0
4,13.24,2.59,2.87,21.0,118.0,2.8,2.69,0.39,1.82,4.32,1.04,2.93,735.0


In [3]:
#During the model creation, we will work on following tasks:\
#Filter own columns for PCA
#Scaling
#PCA
#SelectKBest
#Random Forest Regressor
#and put them all to one pipeline.

In [None]:
# Filter Own Columns
#Firstly, we will create our own class to keep only features we want in our pipeline. \
#We don't want to run PCA on all features but only on the sample so we create own class that \
#filters the features in the original dataframe. We can put our own classes into the pipelines, \
#as long as they have following methods:

#.fit()
#.transform()
#.fit_transform()
#

In [4]:
# own class that can be inserted to pipeline as any other sklearn object.
class RawFeats:
    def __init__(self, feats):
        self.feats = feats

    def fit(self, X, y=None):
        pass


    def transform(self, X, y=None):
        return X[self.feats]

    def fit_transform(self, X, y=None):
        self.fit(X)
        return self.transform(X)


# features we want to keep for PCA
feats = ['alcohol','malic_acid','ash','alcalinity_of_ash','magnesium',
         'total_phenols','flavanoids','nonflavanoid_phenols']
# creating class object with indexes we want to keep.
raw_feats = RawFeats(feats)

In [5]:
# Scaling with PCA
#We will create a pipeline with two steps: StandardScaler and PCA. \

# StandardScaler will scale the data and PCA will reduce the dimensionality of the data. \

scaller = StandardScaler()
pca = PCA(n_components=2)

In [7]:
# select K best
selection = SelectKBest(k=4)

In [8]:
# random forest
rf = RandomForestClassifier()

In [9]:
# pipeline
# we will apply two different feature extraction techniques: PCA and SelectKBest
#and combine them with FeatureUnion. The small difference is that we will use only sample of features for PCA.

# pipeline
PCA_pipeline = Pipeline([('raw_feats', raw_feats),
                  ('scaler', scaller), 
                  ('pca', pca)
                  ])

kbest_pipeline = Pipeline([("kBest", selection)])

# combine pipelines
all_features = FeatureUnion([('pca', PCA_pipeline),
                                ('kbest', kbest_pipeline)
                                ])

# final pipeline
pipe = Pipeline([('all', all_features),
                    ('rf', rf)
                    ])


In [11]:
#Let's apply grid search to tune the parameters properly:

# grid search
param_grid = {'all__pca__pca__n_components': [1, 2, 3],
                'all__kbest__kBest__k': [1, 2, 3],
                'rf__n_estimators': [2, 5, 10, 15, 25],
                'rf__max_depth': [2, 5, 10, 15, 25]
                }

# create a Grid Search object
grid_search = GridSearchCV(pipe, param_grid=param_grid, n_jobs=-1, verbose=10, refit=True)

# fit the model and tune parameters
grid_search.fit(df, y)

Fitting 5 folds for each of 225 candidates, totalling 1125 fits
[CV 1/5; 1/225] START all__kbest__kBest__k=1, all__pca__pca__n_components=1, rf__max_depth=2, rf__n_estimators=2
[CV 3/5; 1/225] START all__kbest__kBest__k=1, all__pca__pca__n_components=1, rf__max_depth=2, rf__n_estimators=2
[CV 2/5; 1/225] START all__kbest__kBest__k=1, all__pca__pca__n_components=1, rf__max_depth=2, rf__n_estimators=2
[CV 1/5; 1/225] END all__kbest__kBest__k=1, all__pca__pca__n_components=1, rf__max_depth=2, rf__n_estimators=2;, score=0.722 total time=   0.0s
[CV 3/5; 1/225] END all__kbest__kBest__k=1, all__pca__pca__n_components=1, rf__max_depth=2, rf__n_estimators=2;, score=0.500 total time=   0.0s
[CV 2/5; 1/225] END all__kbest__kBest__k=1, all__pca__pca__n_components=1, rf__max_depth=2, rf__n_estimators=2;, score=0.722 total time=   0.0s
[CV 5/5; 1/225] START all__kbest__kBest__k=1, all__pca__pca__n_components=1, rf__max_depth=2, rf__n_estimators=2
[CV 4/5; 1/225] START all__kbest__kBest__k=1, all__p

In [12]:
print(grid_search.best_params_)

{'all__kbest__kBest__k': 3, 'all__pca__pca__n_components': 2, 'rf__max_depth': 25, 'rf__n_estimators': 15}


In [14]:
pickle.dump( grid_search, open("model.p", "wb"))

**Sending requests***
---

In [15]:
json_data = {'alcohol': 14.23,
 'malic_acid': 1.71,
 'ash': 2.43,
 'alcalinity_of_ash': 15.6,
 'magnesium': 127.0,
 'total_phenols': 2.8,
 'flavanoids': 3.06,
 'nonflavanoid_phenols': 0.28,
 'proanthocyanins': 2.29,
 'color_intensity': 5.64,
 'hue': 1.04,
 'od280/od315_of_diluted_wines': 3.92,
 'proline': 1065.0}

In [16]:
import requests
URL = "http://127.0.0.1:888/scoring"
# sending get request and saving the response as response object 
r = requests.post(url = URL, json = json_data) 

In [17]:
print(r.json())

[[1.0, 0.0, 0.0]]
