In [1]:
import numpy as np
import pandas as pd
import sqlalchemy as sa
import pickle, json, requests, base64


## Build a scikit-learn model

In [2]:
from sklearn import datasets

iris = datasets.load_iris()
X = iris.data  
Y = iris.target
# print(iris.DESCR)

In [3]:

from sklearn.neural_network import MLPClassifier
clf = MLPClassifier()
clf.fit(X, Y)



MLPClassifier(activation='relu', alpha=0.0001, batch_size='auto', beta_1=0.9,
       beta_2=0.999, early_stopping=False, epsilon=1e-08,
       hidden_layer_sizes=(100,), learning_rate='constant',
       learning_rate_init=0.001, max_iter=200, momentum=0.9,
       nesterovs_momentum=True, power_t=0.5, random_state=None,
       shuffle=True, solver='adam', tol=0.0001, validation_fraction=0.1,
       verbose=False, warm_start=False)

In [4]:
#clf.__dict__

## Generate SQL Code from the Model

In [5]:

def test_ws_sql_gen(pickle_data):
    WS_URL="http://192.168.88.88:1888/model"
    b64_data = base64.b64encode(pickle_data)
    data={"Name":"model1", "PickleData":b64_data , "SQLDialect":"postgresql"}
    r = requests.post(WS_URL, json=data)
    content = r.json()
    # print(content)
    lSQL = content["model"]["SQLGenrationResult"][0]["SQL"]
    return lSQL;


In [6]:
pickle_data = pickle.dumps(clf)
lSQL = test_ws_sql_gen(pickle_data)
print(lSQL[0:2000])

WITH "IL" AS 
(SELECT "ADS"."KEY" AS "KEY", "ADS"."Feature_0" AS "Feature_0", "ADS"."Feature_1" AS "Feature_1", "ADS"."Feature_2" AS "Feature_2", "ADS"."Feature_3" AS "Feature_3" 
FROM "INPUT_DATA" AS "ADS"), 
"HL_BA_1" AS 
(SELECT "IL"."KEY" AS "KEY", -5.87519665584e-05 * "IL"."Feature_0" + -0.0330455086509 * "IL"."Feature_1" + -0.0549847615422 * "IL"."Feature_2" + -0.00563685128182 * "IL"."Feature_3" + 0.0810374168438 AS "NEUR_1_1", 0.122810707241 * "IL"."Feature_0" + 0.206300456365 * "IL"."Feature_1" + -0.189952458873 * "IL"."Feature_2" + -0.219113447721 * "IL"."Feature_3" + -0.25765110394 AS "NEUR_1_2", -0.0636296366282 * "IL"."Feature_0" + 0.00443184505507 * "IL"."Feature_1" + 0.000749319976322 * "IL"."Feature_2" + -4.47647266952e-06 * "IL"."Feature_3" + -0.0405774590419 AS "NEUR_1_3", 0.0482191822138 * "IL"."Feature_0" + -0.116759331852 * "IL"."Feature_1" + 0.215837868805 * "IL"."Feature_2" + 0.178018915989 * "IL"."Feature_3" + -0.183451414144 AS "NEUR_1_4", 1.96852581664e-05 * "

## Execute the SQL Code

In [7]:
# save the dataset in a database table

#engine = sa.create_engine('sqlite://' , echo=False)
engine = sa.create_engine("postgresql://db:db@localhost/db?port=5432", echo=False)
conn = engine.connect()

lTable = pd.DataFrame(X);
lTable.columns = ['Feature_0', 'Feature_1', 'Feature_2', 'Feature_3']
lTable['KEY'] = range(lTable.shape[0])
lTable.to_sql("INPUT_DATA" , conn,   if_exists='replace', index=False)


In [8]:
sql_output = pd.read_sql(lSQL , conn);


In [9]:
sql_output.sample(12, random_state=1960)

Unnamed: 0,KEY,Score_0,Score_1,Score_2,Proba_0,Proba_1,Proba_2,LogProba_0,LogProba_1,LogProba_2,Decision
114,114,,,,0.001005,0.081843,0.917152,-6.903167,-2.502953,-0.086482,2
74,74,,,,0.041736,0.804548,0.153715,-3.176385,-0.217474,-1.872653,1
9,9,,,,0.936641,0.062631,0.000727,-0.065455,-2.770491,-7.226145,0
88,88,,,,0.05478,0.702697,0.242523,-2.904422,-0.35283,-1.416659,1
25,25,,,,0.908777,0.090044,0.001178,-0.095655,-2.407455,-6.743661,0
5,5,,,,0.964083,0.03551,0.000407,-0.036578,-3.337934,-7.806831,0
48,48,,,,0.971008,0.028746,0.000247,-0.029421,-3.54927,-8.307576,0
117,117,,,,0.001113,0.186396,0.812491,-6.800784,-1.679883,-0.20765,2
83,83,,,,0.005971,0.335916,0.658113,-5.120849,-1.090895,-0.418378,2
105,105,,,,0.0005,0.148719,0.85078,-7.600008,-1.905695,-0.161601,2


In [10]:
sql_output.Decision.value_counts()

2    53
0    50
1    47
Name: Decision, dtype: int64

## Scikit-learn Prediction

In [11]:
skl_outputs = pd.DataFrame()
skl_output_key = pd.DataFrame(list(range(X.shape[0])), columns=['KEY']);
skl_output_score = pd.DataFrame(columns=['Score_0', 'Score_1', 'Score_2']);
skl_output_proba = pd.DataFrame(clf.predict_proba(X), columns=['Proba_0', 'Proba_1', 'Proba_2'])
skl_output_log_proba = pd.DataFrame(clf.predict_log_proba(X), columns=['LogProba_0', 'LogProba_1', 'LogProba_2'])
skl_output_decision = pd.DataFrame(clf.predict(X), columns=['Decision'])
skl_output = pd.concat([skl_output_key, skl_output_score, skl_output_proba, skl_output_log_proba, skl_output_decision] , axis=1)
skl_output.sample(12, random_state=1960)


Unnamed: 0,KEY,Score_0,Score_1,Score_2,Proba_0,Proba_1,Proba_2,LogProba_0,LogProba_1,LogProba_2,Decision
114,114,,,,0.001005,0.081843,0.917152,-6.903167,-2.502953,-0.086482,2
74,74,,,,0.041736,0.804548,0.153715,-3.176385,-0.217474,-1.872653,1
9,9,,,,0.936641,0.062631,0.000727,-0.065455,-2.770491,-7.226145,0
88,88,,,,0.05478,0.702697,0.242523,-2.904422,-0.35283,-1.416659,1
25,25,,,,0.908777,0.090044,0.001178,-0.095655,-2.407455,-6.743661,0
5,5,,,,0.964083,0.03551,0.000407,-0.036578,-3.337934,-7.806831,0
48,48,,,,0.971008,0.028746,0.000247,-0.029421,-3.54927,-8.307576,0
117,117,,,,0.001113,0.186396,0.812491,-6.800784,-1.679883,-0.20765,2
83,83,,,,0.005971,0.335916,0.658113,-5.120849,-1.090895,-0.418378,2
105,105,,,,0.0005,0.148719,0.85078,-7.600008,-1.905695,-0.161601,2


## Comparing the SQL and Scikit-learn Predictions

In [12]:
sql_skl_join = skl_output.join(sql_output , how='left', on='KEY', lsuffix='_skl', rsuffix='_sql')

In [13]:
sql_skl_join.sample(12)

Unnamed: 0,KEY_skl,Score_0_skl,Score_1_skl,Score_2_skl,Proba_0_skl,Proba_1_skl,Proba_2_skl,LogProba_0_skl,LogProba_1_skl,LogProba_2_skl,...,Score_0_sql,Score_1_sql,Score_2_sql,Proba_0_sql,Proba_1_sql,Proba_2_sql,LogProba_0_sql,LogProba_1_sql,LogProba_2_sql,Decision_sql
82,82,,,,0.058006,0.782234,0.15976,-2.847214,-0.245602,-1.83408,...,,,,0.058006,0.782234,0.15976,-2.847214,-0.245602,-1.83408,1
134,134,,,,0.003245,0.282695,0.714059,-5.730564,-1.263386,-0.336789,...,,,,0.003245,0.282695,0.714059,-5.730564,-1.263386,-0.336789,2
38,38,,,,0.936472,0.062423,0.001105,-0.065636,-2.773828,-6.807614,...,,,,0.936472,0.062423,0.001105,-0.065636,-2.773828,-6.807614,0
119,119,,,,0.004457,0.343226,0.652317,-5.41323,-1.069367,-0.427225,...,,,,0.004457,0.343226,0.652317,-5.41323,-1.069367,-0.427225,2
128,128,,,,0.001129,0.134706,0.864165,-6.786466,-2.004659,-0.145992,...,,,,0.001129,0.134706,0.864165,-6.786466,-2.004659,-0.145992,2
39,39,,,,0.954771,0.044756,0.000473,-0.046284,-3.10652,-7.656698,...,,,,0.954771,0.044756,0.000473,-0.046284,-3.10652,-7.656698,0
28,28,,,,0.960571,0.039067,0.000362,-0.040227,-3.242467,-7.924658,...,,,,0.960571,0.039067,0.000362,-0.040227,-3.242467,-7.924658,0
93,93,,,,0.089788,0.762036,0.148176,-2.4103,-0.271761,-1.909357,...,,,,0.089788,0.762036,0.148176,-2.4103,-0.271761,-1.909357,1
100,100,,,,0.00038,0.050433,0.949187,-7.87414,-2.987119,-0.052149,...,,,,0.00038,0.050433,0.949187,-7.87414,-2.987119,-0.052149,2
130,130,,,,0.001388,0.269923,0.728689,-6.579582,-1.309619,-0.316509,...,,,,0.001388,0.269923,0.728689,-6.579582,-1.309619,-0.316509,2


In [14]:
condition = (sql_skl_join.Decision_sql != sql_skl_join.Decision_skl)
sql_skl_join[condition]


Unnamed: 0,KEY_skl,Score_0_skl,Score_1_skl,Score_2_skl,Proba_0_skl,Proba_1_skl,Proba_2_skl,LogProba_0_skl,LogProba_1_skl,LogProba_2_skl,...,Score_0_sql,Score_1_sql,Score_2_sql,Proba_0_sql,Proba_1_sql,Proba_2_sql,LogProba_0_sql,LogProba_1_sql,LogProba_2_sql,Decision_sql
