In [1]:
import numpy as np
import pandas as pd
import sqlalchemy as sa
import pickle, json, requests, base64


## Build a scikit-learn model

In [2]:
from sklearn import datasets

iris = datasets.load_iris()
X = iris.data  
Y = iris.target
# print(iris.DESCR)

In [3]:

from sklearn.neural_network import MLPClassifier
clf = MLPClassifier()
clf.fit(X, Y)



MLPClassifier(activation='relu', alpha=0.0001, batch_size='auto', beta_1=0.9,
       beta_2=0.999, early_stopping=False, epsilon=1e-08,
       hidden_layer_sizes=(100,), learning_rate='constant',
       learning_rate_init=0.001, max_iter=200, momentum=0.9,
       nesterovs_momentum=True, power_t=0.5, random_state=None,
       shuffle=True, solver='adam', tol=0.0001, validation_fraction=0.1,
       verbose=False, warm_start=False)

In [4]:
#clf.__dict__

## Generate SQL Code from the Model

In [5]:

def test_ws_sql_gen(pickle_data):
    WS_URL="http://192.168.88.88:1888/model"
    b64_data = base64.b64encode(pickle_data)
    data={"Name":"model1", "PickleData":b64_data , "SQLDialect":"postgresql"}
    r = requests.post(WS_URL, json=data)
    content = r.json()
    # print(content)
    lSQL = content["model"]["SQLGenrationResult"][0]["SQL"]
    return lSQL;


In [6]:
pickle_data = pickle.dumps(clf)
lSQL = test_ws_sql_gen(pickle_data)
print(lSQL[0:2000])

WITH "IL" AS 
(SELECT "ADS"."KEY" AS "KEY", "ADS"."Feature_0" AS "Feature_0", "ADS"."Feature_1" AS "Feature_1", "ADS"."Feature_2" AS "Feature_2", "ADS"."Feature_3" AS "Feature_3" 
FROM "INPUT_DATA" AS "ADS"), 
"HL_BA_1" AS 
(SELECT "IL"."KEY" AS "KEY", -0.0417774370844 * "IL"."Feature_0" + 0.0236386835165 * "IL"."Feature_1" + -0.00235006194486 * "IL"."Feature_2" + 0.018305927406 * "IL"."Feature_3" + -0.200777566461 AS "NEUR_1_1", 0.190457831133 * "IL"."Feature_0" + -0.146869841777 * "IL"."Feature_1" + 0.310834235827 * "IL"."Feature_2" + 0.0971423740441 * "IL"."Feature_3" + -0.126988792834 AS "NEUR_1_2", -0.0707499272947 * "IL"."Feature_0" + -0.0272114820893 * "IL"."Feature_1" + -0.00863332222823 * "IL"."Feature_2" + 0.0262130721481 * "IL"."Feature_3" + -0.00510428928886 AS "NEUR_1_3", 0.0303764500919 * "IL"."Feature_0" + 0.0788111746218 * "IL"."Feature_1" + -0.214991520594 * "IL"."Feature_2" + 0.0513027708758 * "IL"."Feature_3" + -0.267760581913 AS "NEUR_1_4", 0.246026619584 * "IL"."Fe

## Execute the SQL Code

In [7]:
# save the dataset in a database table

#engine = sa.create_engine('sqlite://' , echo=False)
engine = sa.create_engine("postgresql://db:db@localhost/db?port=5432", echo=False)
conn = engine.connect()

lTable = pd.DataFrame(X);
lTable.columns = ['Feature_0', 'Feature_1', 'Feature_2', 'Feature_3']
lTable['KEY'] = range(lTable.shape[0])
lTable.to_sql("INPUT_DATA" , conn,   if_exists='replace', index=False)


In [8]:
sql_output = pd.read_sql(lSQL , conn);


In [9]:
sql_output.sample(12)

Unnamed: 0,KEY,Score_0,Score_1,Score_2,Proba_0,Proba_1,Proba_2,LogProba_0,LogProba_1,LogProba_2,Decision
20,20,,,,,,,-1.797693e+308,-1.797693e+308,-1.797693e+308,0
134,134,,,,,,,-1.797693e+308,-1.797693e+308,-1.797693e+308,2
64,64,,,,,,,-1.797693e+308,-1.797693e+308,-1.797693e+308,1
28,28,,,,,,,-1.797693e+308,-1.797693e+308,-1.797693e+308,0
14,14,,,,,,,-1.797693e+308,-1.797693e+308,-1.797693e+308,0
73,73,,,,,,,-1.797693e+308,-1.797693e+308,-1.797693e+308,1
65,65,,,,,,,-1.797693e+308,-1.797693e+308,-1.797693e+308,1
116,116,,,,,,,-1.797693e+308,-1.797693e+308,-1.797693e+308,2
30,30,,,,,,,-1.797693e+308,-1.797693e+308,-1.797693e+308,0
34,34,,,,,,,-1.797693e+308,-1.797693e+308,-1.797693e+308,0


In [10]:
sql_output.Decision.value_counts()

2    54
0    50
1    46
Name: Decision, dtype: int64

## Scikit-learn Prediction

In [11]:
skl_outputs = pd.DataFrame()
skl_output_key = pd.DataFrame(list(range(X.shape[0])), columns=['KEY']);
skl_output_score = pd.DataFrame(columns=['Score_0', 'Score_1', 'Score_2']);
skl_output_proba = pd.DataFrame(clf.predict_proba(X), columns=['Proba_0', 'Proba_1', 'Proba_2'])
skl_output_log_proba = pd.DataFrame(clf.predict_log_proba(X), columns=['LogProba_0', 'LogProba_1', 'LogProba_2'])
skl_output_decision = pd.DataFrame(clf.predict(X), columns=['Decision'])
skl_output = pd.concat([skl_output_key, skl_output_score, skl_output_proba, skl_output_log_proba, skl_output_decision] , axis=1)
skl_output.sample(12)


Unnamed: 0,KEY,Score_0,Score_1,Score_2,Proba_0,Proba_1,Proba_2,LogProba_0,LogProba_1,LogProba_2,Decision
83,83,,,,0.006045,0.327129,0.666826,-5.108454,-1.117401,-0.405227,2
61,61,,,,0.043424,0.679378,0.277199,-3.136752,-0.386578,-1.28302,1
15,15,,,,0.984183,0.015712,0.000105,-0.015944,-4.153314,-9.162667,0
107,107,,,,0.001106,0.200632,0.798262,-6.806967,-1.606282,-0.225319,2
36,36,,,,0.965745,0.034036,0.000218,-0.034855,-3.380327,-8.429333,0
74,74,,,,0.043323,0.779183,0.177494,-3.13907,-0.249509,-1.728821,1
22,22,,,,0.978133,0.021616,0.000251,-0.02211,-3.834317,-8.289716,0
137,137,,,,0.004197,0.272673,0.72313,-5.473413,-1.299481,-0.324167,2
88,88,,,,0.055806,0.680685,0.263509,-2.885874,-0.384655,-1.333669,1
34,34,,,,0.930277,0.068851,0.000873,-0.072273,-2.675813,-7.044072,0


## Comparing the SQL and Scikit-learn Predictions

In [12]:
sql_skl_join = skl_output.join(sql_output , how='left', on='KEY', lsuffix='_skl', rsuffix='_sql')

In [13]:
sql_skl_join.sample(12)

Unnamed: 0,KEY_skl,Score_0_skl,Score_1_skl,Score_2_skl,Proba_0_skl,Proba_1_skl,Proba_2_skl,LogProba_0_skl,LogProba_1_skl,LogProba_2_skl,...,Score_0_sql,Score_1_sql,Score_2_sql,Proba_0_sql,Proba_1_sql,Proba_2_sql,LogProba_0_sql,LogProba_1_sql,LogProba_2_sql,Decision_sql
125,125,,,,0.002864,0.295943,0.701194,-5.855641,-1.217589,-0.354971,...,,,,,,,-1.797693e+308,-1.797693e+308,-1.797693e+308,2
18,18,,,,0.957199,0.042471,0.00033,-0.043744,-3.158929,-8.015548,...,,,,,,,-1.797693e+308,-1.797693e+308,-1.797693e+308,0
128,128,,,,0.001514,0.162569,0.835916,-6.492768,-1.81665,-0.179227,...,,,,,,,-1.797693e+308,-1.797693e+308,-1.797693e+308,2
89,89,,,,0.037222,0.662766,0.300011,-3.290845,-0.411333,-1.203935,...,,,,,,,-1.797693e+308,-1.797693e+308,-1.797693e+308,1
15,15,,,,0.984183,0.015712,0.000105,-0.015944,-4.153314,-9.162667,...,,,,,,,-1.797693e+308,-1.797693e+308,-1.797693e+308,0
23,23,,,,0.902341,0.096039,0.00162,-0.102763,-2.343002,-6.42506,...,,,,,,,-1.797693e+308,-1.797693e+308,-1.797693e+308,0
27,27,,,,0.955573,0.043994,0.000433,-0.045445,-3.123698,-7.744318,...,,,,,,,-1.797693e+308,-1.797693e+308,-1.797693e+308,0
3,3,,,,0.923704,0.074945,0.001352,-0.079364,-2.591006,-6.606363,...,,,,,,,-1.797693e+308,-1.797693e+308,-1.797693e+308,0
120,120,,,,0.002224,0.213201,0.784575,-6.10862,-1.545518,-0.242613,...,,,,,,,-1.797693e+308,-1.797693e+308,-1.797693e+308,2
104,104,,,,0.001132,0.132371,0.866498,-6.783914,-2.02215,-0.143296,...,,,,,,,-1.797693e+308,-1.797693e+308,-1.797693e+308,2


In [14]:
condition = (sql_skl_join.Decision_sql != sql_skl_join.Decision_skl)
sql_skl_join[condition]


Unnamed: 0,KEY_skl,Score_0_skl,Score_1_skl,Score_2_skl,Proba_0_skl,Proba_1_skl,Proba_2_skl,LogProba_0_skl,LogProba_1_skl,LogProba_2_skl,...,Score_0_sql,Score_1_sql,Score_2_sql,Proba_0_sql,Proba_1_sql,Proba_2_sql,LogProba_0_sql,LogProba_1_sql,LogProba_2_sql,Decision_sql
