In [1]:
import numpy as np
import pandas as pd
import sqlalchemy as sa
import pickle, json, requests, base64


## Build a scikit-learn model

In [2]:
from sklearn import datasets

iris = datasets.load_iris()
X = iris.data  
Y = iris.target
# print(iris.DESCR)

In [3]:

from sklearn.neural_network import MLPClassifier
clf = MLPClassifier()
clf.fit(X, Y)



MLPClassifier(activation='relu', alpha=0.0001, batch_size='auto', beta_1=0.9,
       beta_2=0.999, early_stopping=False, epsilon=1e-08,
       hidden_layer_sizes=(100,), learning_rate='constant',
       learning_rate_init=0.001, max_iter=200, momentum=0.9,
       nesterovs_momentum=True, power_t=0.5, random_state=None,
       shuffle=True, solver='adam', tol=0.0001, validation_fraction=0.1,
       verbose=False, warm_start=False)

In [4]:
#clf.__dict__

## Generate SQL Code from the Model

In [5]:

def test_ws_sql_gen(pickle_data):
    WS_URL="http://192.168.88.88:1888/model"
    b64_data = base64.b64encode(pickle_data)
    data={"Name":"model1", "PickleData":b64_data , "SQLDialect":"postgresql"}
    r = requests.post(WS_URL, json=data)
    content = r.json()
    # print(content)
    lSQL = content["model"]["SQLGenrationResult"][0]["SQL"]
    return lSQL;


In [6]:
pickle_data = pickle.dumps(clf)
lSQL = test_ws_sql_gen(pickle_data)
print(lSQL[0:2000])

WITH "IL" AS 
(SELECT "ADS"."KEY" AS "KEY", "ADS"."Feature_0" AS "Feature_0", "ADS"."Feature_1" AS "Feature_1", "ADS"."Feature_2" AS "Feature_2", "ADS"."Feature_3" AS "Feature_3" 
FROM "INPUT_DATA" AS "ADS"), 
"HL_BA_1" AS 
(SELECT "IL"."KEY" AS "KEY", 0.129470833268 * "IL"."Feature_0" + 0.145805854121 * "IL"."Feature_1" + 0.0196195693018 * "IL"."Feature_2" + 0.0239472625367 * "IL"."Feature_3" + -0.0708238333359 AS "NEUR_1_1", 0.00044248222873 * "IL"."Feature_0" + -0.0475999283489 * "IL"."Feature_1" + -0.0246817684864 * "IL"."Feature_2" + -0.0398641929058 * "IL"."Feature_3" + -0.206079889609 AS "NEUR_1_2", 0.0286537578307 * "IL"."Feature_0" + 0.110673940466 * "IL"."Feature_1" + -0.145804623149 * "IL"."Feature_2" + -0.123462231147 * "IL"."Feature_3" + 0.269744391633 AS "NEUR_1_3", 0.238337884561 * "IL"."Feature_0" + 0.215642901729 * "IL"."Feature_1" + 0.0890370404872 * "IL"."Feature_2" + -0.160671021 * "IL"."Feature_3" + -0.106701480758 AS "NEUR_1_4", -0.0984113032436 * "IL"."Feature_0"

## Execute the SQL Code

In [7]:
# save the dataset in a database table

#engine = sa.create_engine('sqlite://' , echo=False)
engine = sa.create_engine("postgresql://db:db@localhost/db?port=5432", echo=False)
conn = engine.connect()

lTable = pd.DataFrame(X);
lTable.columns = ['Feature_0', 'Feature_1', 'Feature_2', 'Feature_3']
lTable['KEY'] = range(lTable.shape[0])
lTable.to_sql("INPUT_DATA" , conn,   if_exists='replace', index=False)


In [8]:
sql_output = pd.read_sql(lSQL , conn);


In [9]:
sql_output.sample(12)

Unnamed: 0,KEY,Score_0,Score_1,Score_2,Proba_0,Proba_1,Proba_2,LogProba_0,LogProba_1,LogProba_2,Decision
144,144,,,,0.333333,0.333333,0.333333,-1.098612,-1.098612,-1.098612,2
148,148,,,,0.333333,0.333333,0.333333,-1.098612,-1.098612,-1.098612,2
76,76,,,,0.333333,0.333333,0.333333,-1.098612,-1.098612,-1.098612,1
137,137,,,,0.333333,0.333333,0.333333,-1.098612,-1.098612,-1.098612,2
58,58,,,,0.333333,0.333333,0.333333,-1.098612,-1.098612,-1.098612,1
18,18,,,,0.333333,0.333333,0.333333,-1.098612,-1.098612,-1.098612,0
101,101,,,,0.333333,0.333333,0.333333,-1.098612,-1.098612,-1.098612,2
20,20,,,,0.333333,0.333333,0.333333,-1.098612,-1.098612,-1.098612,0
100,100,,,,0.333333,0.333333,0.333333,-1.098612,-1.098612,-1.098612,2
85,85,,,,0.333333,0.333333,0.333333,-1.098612,-1.098612,-1.098612,1


In [10]:
sql_output.Decision.value_counts()

2    54
0    50
1    46
Name: Decision, dtype: int64

## Scikit-learn Prediction

In [11]:
skl_outputs = pd.DataFrame()
skl_output_key = pd.DataFrame(list(range(X.shape[0])), columns=['KEY']);
skl_output_score = pd.DataFrame(columns=['Score_0', 'Score_1', 'Score_2']);
skl_output_proba = pd.DataFrame(clf.predict_proba(X), columns=['Proba_0', 'Proba_1', 'Proba_2'])
skl_output_log_proba = pd.DataFrame(clf.predict_log_proba(X), columns=['LogProba_0', 'LogProba_1', 'LogProba_2'])
skl_output_decision = pd.DataFrame(clf.predict(X), columns=['Decision'])
skl_output = pd.concat([skl_output_key, skl_output_score, skl_output_proba, skl_output_log_proba, skl_output_decision] , axis=1)
skl_output.sample(12)


Unnamed: 0,KEY,Score_0,Score_1,Score_2,Proba_0,Proba_1,Proba_2,LogProba_0,LogProba_1,LogProba_2,Decision
131,131,,,,0.002926,0.437083,0.559991,-5.834184,-0.827631,-0.579835,2
96,96,,,,0.035859,0.678685,0.285456,-3.328166,-0.387598,-1.253668,1
0,0,,,,0.971356,0.028196,0.000448,-0.029062,-3.568581,-7.710054,0
86,86,,,,0.02152,0.700187,0.278293,-3.838769,-0.356408,-1.279079,1
116,116,,,,0.003336,0.322319,0.674345,-5.703117,-1.132214,-0.394013,2
56,56,,,,0.023402,0.652885,0.323713,-3.754928,-0.426355,-1.127897,1
85,85,,,,0.034078,0.67256,0.293362,-3.379103,-0.396664,-1.226347,1
147,147,,,,0.004271,0.344273,0.651456,-5.455864,-1.066321,-0.428545,2
30,30,,,,0.931234,0.066962,0.001804,-0.071245,-2.703632,-6.317662,0
23,23,,,,0.918791,0.078874,0.002335,-0.084696,-2.539905,-6.059784,0


## Comparing the SQL and Scikit-learn Predictions

In [12]:
sql_skl_join = skl_output.join(sql_output , how='left', on='KEY', lsuffix='_skl', rsuffix='_sql')

In [13]:
sql_skl_join.sample(12)

Unnamed: 0,KEY_skl,Score_0_skl,Score_1_skl,Score_2_skl,Proba_0_skl,Proba_1_skl,Proba_2_skl,LogProba_0_skl,LogProba_1_skl,LogProba_2_skl,...,Score_0_sql,Score_1_sql,Score_2_sql,Proba_0_sql,Proba_1_sql,Proba_2_sql,LogProba_0_sql,LogProba_1_sql,LogProba_2_sql,Decision_sql
8,8,,,,0.930187,0.067531,0.002281,-0.072369,-2.695165,-6.082997,...,,,,0.333333,0.333333,0.333333,-1.098612,-1.098612,-1.098612,0
116,116,,,,0.003336,0.322319,0.674345,-5.703117,-1.132214,-0.394013,...,,,,0.333333,0.333333,0.333333,-1.098612,-1.098612,-1.098612,2
18,18,,,,0.969173,0.030452,0.000375,-0.031312,-3.491606,-7.88964,...,,,,0.333333,0.333333,0.333333,-1.098612,-1.098612,-1.098612,0
90,90,,,,0.019818,0.560056,0.420125,-3.921147,-0.579718,-0.867202,...,,,,0.333333,0.333333,0.333333,-1.098612,-1.098612,-1.098612,1
4,4,,,,0.973081,0.026506,0.000413,-0.027288,-3.630378,-7.792503,...,,,,0.333333,0.333333,0.333333,-1.098612,-1.098612,-1.098612,0
100,100,,,,0.000572,0.106971,0.892457,-7.467025,-2.235194,-0.113777,...,,,,0.333333,0.333333,0.333333,-1.098612,-1.098612,-1.098612,2
27,27,,,,0.967396,0.032096,0.000509,-0.033148,-3.439029,-7.584036,...,,,,0.333333,0.333333,0.333333,-1.098612,-1.098612,-1.098612,0
37,37,,,,0.949032,0.04986,0.001109,-0.052313,-2.998541,-6.804698,...,,,,0.333333,0.333333,0.333333,-1.098612,-1.098612,-1.098612,0
51,51,,,,0.030679,0.718871,0.25045,-3.484189,-0.330073,-1.384495,...,,,,0.333333,0.333333,0.333333,-1.098612,-1.098612,-1.098612,1
140,140,,,,0.001327,0.193052,0.805621,-6.625021,-1.644796,-0.216141,...,,,,0.333333,0.333333,0.333333,-1.098612,-1.098612,-1.098612,2


In [14]:
condition = (sql_skl_join.Decision_sql != sql_skl_join.Decision_skl)
sql_skl_join[condition]


Unnamed: 0,KEY_skl,Score_0_skl,Score_1_skl,Score_2_skl,Proba_0_skl,Proba_1_skl,Proba_2_skl,LogProba_0_skl,LogProba_1_skl,LogProba_2_skl,...,Score_0_sql,Score_1_sql,Score_2_sql,Proba_0_sql,Proba_1_sql,Proba_2_sql,LogProba_0_sql,LogProba_1_sql,LogProba_2_sql,Decision_sql
