<a href="https://colab.research.google.com/github/sandeep92134/The-Data-Science-Workshop-By-Packt/blob/master/module%2018/Exercise18.03%3A%20Adding%20Data%20Processing%20Steps%20into%20a%20Web%20API.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import pandas as pd
import joblib
from sklearn.ensemble import RandomForestClassifier

In [2]:
file_url = 'https://raw.githubusercontent.com/sandeep92134/The-Data-Science-Workshop-By-Packt/master/module%2011/datasets/breast-cancer-wisconsin.data'

In [3]:
col_names = ['Sample code number','Clump Thickness','Uniformity of Cell Size','Uniformity of Cell Shape','Marginal Adhesion','Single Epithelial Cell Size',
'Bare Nuclei','Bland Chromatin','Normal Nucleoli','Mitoses','Class']

In [4]:
df = pd.read_csv(file_url, header=None, names=col_names, na_values='?')

In [5]:
y = df.pop('Class')

In [6]:
df.drop('Sample code number', axis=1, inplace=True)

In [7]:
training_rows = int(df.shape[0] * 0.7)
training_rows

489

In [8]:
X_train = df[:training_rows]
y_train = y[:training_rows]
X_test = df[training_rows:]
y_test = y[training_rows:]

In [9]:
X_train.isna().sum()

Clump Thickness                 0
Uniformity of Cell Size         0
Uniformity of Cell Shape        0
Marginal Adhesion               0
Single Epithelial Cell Size     0
Bare Nuclei                    15
Bland Chromatin                 0
Normal Nucleoli                 0
Mitoses                         0
dtype: int64

In [10]:
num_columns = [col for col in X_train.columns if X_train[col].dtype != 'object']
num_columns

['Clump Thickness',
 'Uniformity of Cell Size',
 'Uniformity of Cell Shape',
 'Marginal Adhesion',
 'Single Epithelial Cell Size',
 'Bare Nuclei',
 'Bland Chromatin',
 'Normal Nucleoli',
 'Mitoses']

In [11]:
column_mean = {}
for col in num_columns:
  column_mean[col] = X_train[col].mean()
column_mean

{'Bare Nuclei': 4.0042194092827,
 'Bland Chromatin': 3.61758691206544,
 'Clump Thickness': 4.644171779141105,
 'Marginal Adhesion': 2.9529652351738243,
 'Mitoses': 1.7198364008179958,
 'Normal Nucleoli': 3.1533742331288344,
 'Single Epithelial Cell Size': 3.462167689161554,
 'Uniformity of Cell Shape': 3.4478527607361964,
 'Uniformity of Cell Size': 3.347648261758691}

In [12]:
import pickle
pickle.dump(column_mean, open("columns_mean.pkl", "wb" ) )

In [13]:
for col in num_columns:
  X_train[col].fillna(column_mean[col], inplace=True)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  downcast=downcast,


In [14]:
rf_model = RandomForestClassifier(random_state=1)
rf_model.fit(X_train, y_train)
joblib.dump(rf_model, "model.pkl") 

['model.pkl']

In [15]:
import socket
import threading
import requests
import json
from flask import Flask, jsonify, request
import numpy as np

In [16]:
app = Flask(__name__)

In [17]:
trained_model = joblib.load("model.pkl")
var_means = pickle.load(open("columns_mean.pkl", "rb" ) )

In [18]:
@app.route('/api', methods=['POST'])
def predict():
  data = request.get_json()
  df_test = pd.DataFrame(data, index=[0])
  for col, avg_value in var_means.items():
    df_test[col].fillna(avg_value, inplace=True)
  prediction = trained_model.predict(df_test)
  str_pred = np.array2string(prediction)
  return jsonify(str_pred)

In [19]:
flask_thread = threading.Thread(target=app.run, kwargs={'host':'0.0.0.0','port':80})
flask_thread.start()

 * Serving Flask app "__main__" (lazy loading)
 * Environment: production
[2m   Use a production WSGI server instead.[0m
 * Debug mode: off


 * Running on http://0.0.0.0:80/ (Press CTRL+C to quit)


In [20]:
record = X_test[X_test['Bare Nuclei'].isna()].iloc[0].to_json()
record

'{"Clump Thickness":1.0,"Uniformity of Cell Size":1.0,"Uniformity of Cell Shape":1.0,"Marginal Adhesion":1.0,"Single Epithelial Cell Size":1.0,"Bare Nuclei":null,"Bland Chromatin":1.0,"Normal Nucleoli":1.0,"Mitoses":1.0}'

In [21]:
headers = {'content-type': 'application/json', 'Accept-Charset': 'UTF-8'}
ip_address = socket.gethostbyname(socket.gethostname())

In [22]:
r = requests.post(f"http://{ip_address}/api", data=record, headers=headers)
r.text

172.28.0.2 - - [19/Feb/2021 12:35:41] "[37mPOST /api HTTP/1.1[0m" 200 -


'"[2]"\n'