In [1]:
import numpy as np
import pandas as pd
import sqlalchemy as sa
import pickle, json, requests, base64


## Build a scikit-learn model

In [2]:
from sklearn import datasets

iris = datasets.load_iris()
X = iris.data  
Y = iris.target
# print(iris.DESCR)

In [3]:

from sklearn.neural_network import MLPClassifier
clf = MLPClassifier()
clf.fit(X, Y)



MLPClassifier(activation='relu', alpha=0.0001, batch_size='auto', beta_1=0.9,
       beta_2=0.999, early_stopping=False, epsilon=1e-08,
       hidden_layer_sizes=(100,), learning_rate='constant',
       learning_rate_init=0.001, max_iter=200, momentum=0.9,
       nesterovs_momentum=True, power_t=0.5, random_state=None,
       shuffle=True, solver='adam', tol=0.0001, validation_fraction=0.1,
       verbose=False, warm_start=False)

In [4]:
#clf.__dict__

## Generate SQL Code from the Model

In [5]:

def test_ws_sql_gen(pickle_data):
    WS_URL="http://192.168.88.88:1888/model" # "https://sklearn2sql.herokuapp.com/"
    b64_data = base64.b64encode(pickle_data)
    data={"Name":"model1", "PickleData":b64_data , "SQLDialect":"postgresql"}
    r = requests.post(WS_URL, json=data)
    content = r.json()
    # print(content)
    lSQL = content["model"]["SQLGenrationResult"][0]["SQL"]
    return lSQL;


In [6]:
pickle_data = pickle.dumps(clf)
lSQL = test_ws_sql_gen(pickle_data)
print(lSQL[0:2000])

WITH "IL" AS 
(SELECT "ADS"."KEY" AS "KEY", "ADS"."Feature_0" AS "Feature_0", "ADS"."Feature_1" AS "Feature_1", "ADS"."Feature_2" AS "Feature_2", "ADS"."Feature_3" AS "Feature_3" 
FROM "INPUT_DATA" AS "ADS"), 
"HL_BA_1" AS 
(SELECT "IL"."KEY" AS "KEY", 0.140200319383 * "IL"."Feature_0" + 0.222892221809 * "IL"."Feature_1" + -0.242908480813 * "IL"."Feature_2" + 0.0211305110946 * "IL"."Feature_3" + -0.101800133331 AS "NEUR_1_1", -0.0973534214545 * "IL"."Feature_0" + 0.0388225444728 * "IL"."Feature_1" + 0.000469637525458 * "IL"."Feature_2" + 0.00133596277314 * "IL"."Feature_3" + -0.0349337285355 AS "NEUR_1_2", 0.0401791783467 * "IL"."Feature_0" + 0.0135730444765 * "IL"."Feature_1" + -0.146018796192 * "IL"."Feature_2" + 0.174994391266 * "IL"."Feature_3" + 0.0739128376935 AS "NEUR_1_3", 0.0882127467853 * "IL"."Feature_0" + 0.0288537381013 * "IL"."Feature_1" + 0.283151015044 * "IL"."Feature_2" + -0.0520126920017 * "IL"."Feature_3" + -0.0704550056921 AS "NEUR_1_4", 0.13945486256 * "IL"."Featur

## Execute the SQL Code

In [7]:
# save the dataset in a database table

#engine = sa.create_engine('sqlite://' , echo=False)
engine = sa.create_engine("postgresql://db:db@localhost/db?port=5432", echo=False)
conn = engine.connect()

lTable = pd.DataFrame(X);
lTable.columns = ['Feature_0', 'Feature_1', 'Feature_2', 'Feature_3']
lTable['KEY'] = range(lTable.shape[0])
lTable.to_sql("INPUT_DATA" , conn,   if_exists='replace', index=False)


In [8]:
sql_output = pd.read_sql(lSQL , conn);


In [9]:
sql_output.sample(12, random_state=1960)

Unnamed: 0,KEY,Score_0,Score_1,Score_2,Proba_0,Proba_1,Proba_2,LogProba_0,LogProba_1,LogProba_2,Decision
114,114,,,,0.000632,0.056516,0.942851,-7.366313,-2.873224,-0.058847,2
74,74,,,,0.03295,0.85392,0.113131,-3.412777,-0.157918,-2.179212,1
9,9,,,,0.94403,0.055226,0.000744,-0.057597,-2.896319,-7.203641,0
88,88,,,,0.040707,0.800465,0.158828,-3.201355,-0.222563,-1.839933,1
25,25,,,,0.921194,0.077602,0.001204,-0.082084,-2.556165,-6.722067,0
5,5,,,,0.956889,0.042715,0.000395,-0.044067,-3.153198,-7.83594,0
48,48,,,,0.96845,0.031277,0.000273,-0.032058,-3.464878,-8.205885,0
117,117,,,,0.000799,0.174846,0.824356,-7.132562,-1.743852,-0.193153,2
83,83,,,,0.004873,0.295878,0.699249,-5.324048,-1.217807,-0.357749,2
105,105,,,,0.000322,0.080286,0.919391,-8.040042,-2.522156,-0.084043,2


In [10]:
sql_output.Decision.value_counts()

2    54
0    50
1    46
Name: Decision, dtype: int64

## Scikit-learn Prediction

In [11]:
skl_outputs = pd.DataFrame()
skl_output_key = pd.DataFrame(list(range(X.shape[0])), columns=['KEY']);
skl_output_score = pd.DataFrame(columns=['Score_0', 'Score_1', 'Score_2']);
skl_output_proba = pd.DataFrame(clf.predict_proba(X), columns=['Proba_0', 'Proba_1', 'Proba_2'])
skl_output_log_proba = pd.DataFrame(clf.predict_log_proba(X), columns=['LogProba_0', 'LogProba_1', 'LogProba_2'])
skl_output_decision = pd.DataFrame(clf.predict(X), columns=['Decision'])
skl_output = pd.concat([skl_output_key, skl_output_score, skl_output_proba, skl_output_log_proba, skl_output_decision] , axis=1)
skl_output.sample(12, random_state=1960)


Unnamed: 0,KEY,Score_0,Score_1,Score_2,Proba_0,Proba_1,Proba_2,LogProba_0,LogProba_1,LogProba_2,Decision
114,114,,,,0.000632,0.056516,0.942851,-7.366313,-2.873224,-0.058847,2
74,74,,,,0.03295,0.85392,0.113131,-3.412777,-0.157918,-2.179212,1
9,9,,,,0.94403,0.055226,0.000744,-0.057597,-2.896319,-7.203641,0
88,88,,,,0.040707,0.800465,0.158828,-3.201355,-0.222563,-1.839933,1
25,25,,,,0.921194,0.077602,0.001204,-0.082084,-2.556165,-6.722067,0
5,5,,,,0.956889,0.042715,0.000395,-0.044067,-3.153198,-7.83594,0
48,48,,,,0.96845,0.031277,0.000273,-0.032058,-3.464878,-8.205885,0
117,117,,,,0.000799,0.174846,0.824356,-7.132562,-1.743852,-0.193153,2
83,83,,,,0.004873,0.295878,0.699249,-5.324048,-1.217807,-0.357749,2
105,105,,,,0.000322,0.080286,0.919391,-8.040042,-2.522156,-0.084043,2


## Comparing the SQL and Scikit-learn Predictions

In [12]:
sql_skl_join = skl_output.join(sql_output , how='left', on='KEY', lsuffix='_skl', rsuffix='_sql')

In [13]:
sql_skl_join.sample(12)

Unnamed: 0,KEY_skl,Score_0_skl,Score_1_skl,Score_2_skl,Proba_0_skl,Proba_1_skl,Proba_2_skl,LogProba_0_skl,LogProba_1_skl,LogProba_2_skl,...,Score_0_sql,Score_1_sql,Score_2_sql,Proba_0_sql,Proba_1_sql,Proba_2_sql,LogProba_0_sql,LogProba_1_sql,LogProba_2_sql,Decision_sql
42,42,,,,0.947152,0.051988,0.00086,-0.054296,-2.956745,-7.058285,...,,,,0.947152,0.051988,0.00086,-0.054296,-2.956745,-7.058285,0
87,87,,,,0.019307,0.687639,0.293054,-3.94727,-0.374492,-1.227399,...,,,,0.019307,0.687639,0.293054,-3.94727,-0.374492,-1.227399,1
116,116,,,,0.003061,0.262888,0.734052,-5.789105,-1.336028,-0.309176,...,,,,0.003061,0.262888,0.734052,-5.789105,-1.336028,-0.309176,2
56,56,,,,0.020289,0.734222,0.245489,-3.897685,-0.308944,-1.404503,...,,,,0.020289,0.734222,0.245489,-3.897685,-0.308944,-1.404503,1
128,128,,,,0.000757,0.088873,0.91037,-7.186209,-2.420545,-0.093904,...,,,,0.000757,0.088873,0.91037,-7.186209,-2.420545,-0.093904,2
2,2,,,,0.952752,0.046575,0.000674,-0.048401,-3.066699,-7.30264,...,,,,0.952752,0.046575,0.000674,-0.048401,-3.066699,-7.30264,0
132,132,,,,0.000575,0.071573,0.927852,-7.461382,-2.637037,-0.074883,...,,,,0.000575,0.071573,0.927852,-7.461382,-2.637037,-0.074883,2
45,45,,,,0.928374,0.070407,0.001219,-0.07432,-2.653463,-6.710047,...,,,,0.928374,0.070407,0.001219,-0.07432,-2.653463,-6.710047,0
84,84,,,,0.017477,0.54238,0.440143,-4.046857,-0.611789,-0.820656,...,,,,0.017477,0.54238,0.440143,-4.046857,-0.611789,-0.820656,1
22,22,,,,0.974455,0.025252,0.000293,-0.025877,-3.678866,-8.135183,...,,,,0.974455,0.025252,0.000293,-0.025877,-3.678866,-8.135183,0


In [14]:
condition = (sql_skl_join.Decision_sql != sql_skl_join.Decision_skl)
sql_skl_join[condition]


Unnamed: 0,KEY_skl,Score_0_skl,Score_1_skl,Score_2_skl,Proba_0_skl,Proba_1_skl,Proba_2_skl,LogProba_0_skl,LogProba_1_skl,LogProba_2_skl,...,Score_0_sql,Score_1_sql,Score_2_sql,Proba_0_sql,Proba_1_sql,Proba_2_sql,LogProba_0_sql,LogProba_1_sql,LogProba_2_sql,Decision_sql
