In [1]:
import os, numpy as np
import pandas as pd

os.environ["KERAS_BACKEND"] = "tensorflow"

import keras 
from sklearn import datasets

from sklearn.model_selection import train_test_split

from keras.utils import np_utils


keras.backend.set_floatx('float64')

from keras.utils import np_utils


def set_reproducible():
    import tensorflow as tf
    import random as rn
    import os
    os.environ['PYTHONHASHSEED'] = '1960'
    rn.seed(1960)
    np.random.seed(1960)
    session_conf = tf.ConfigProto(intra_op_parallelism_threads=1, inter_op_parallelism_threads=1)

    from keras import backend as K
    tf.set_random_seed(1960)

    sess = tf.Session(graph=tf.get_default_graph(), config=session_conf)
    # K.set_session(sess)

set_reproducible()


Using TensorFlow backend.


# Build a Keras Model

In [2]:

def create_model():
    from keras.models import Sequential
    from keras.layers.core import Dense, Activation, Dropout
    from keras.utils import np_utils

    model = Sequential()
    model.add(Dense(5, input_shape=(4,) , activation='relu'))
    model.add(Dropout(0.3))
    model.add(Dense(3))
    model.add(Activation('softmax'))
    model.compile(loss='categorical_crossentropy', optimizer='adam')
    return model



In [3]:
iris  = datasets.load_iris()
train_X, test_X, train_y, test_y = train_test_split(iris.data, iris.target, train_size=0.8, test_size=0.2, random_state=1960)


In [4]:

from keras.wrappers.scikit_learn import KerasClassifier

clf = KerasClassifier(build_fn=create_model, epochs=12, verbose=0)

print(train_X.shape , train_y.shape)
clf.fit(train_X, train_y, verbose=0)

W0704 20:50:37.434734 139854976644928 deprecation.py:506] From /home/antoine/.local/lib/python3.7/site-packages/keras/backend/tensorflow_backend.py:3445: calling dropout (from tensorflow.python.ops.nn_ops) with keep_prob is deprecated and will be removed in a future version.
Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.


(120, 4) (120,)


<keras.callbacks.History at 0x7f320fb86c50>

In [5]:
print(clf.model.__dict__)

{'name': 'sequential_1', 'trainable': True, '_is_compiled': True, '_expects_training_arg': False, '_initial_weights': None, 'supports_masking': False, 'optimizer': <keras.optimizers.Adam object at 0x7f320fbaec50>, '_updates': [], '_losses': [], '_per_input_losses': {}, '_per_input_updates': {}, '_layers': [<keras.engine.input_layer.InputLayer object at 0x7f326d803cf8>, <keras.layers.core.Dense object at 0x7f326d803860>, <keras.layers.core.Dropout object at 0x7f326d803518>, <keras.layers.core.Dense object at 0x7f326d7dee80>, <keras.layers.core.Activation object at 0x7f320fbaee80>], '_outbound_nodes': [], '_inbound_nodes': [<keras.engine.base_layer.Node object at 0x7f326d803940>], '_is_graph_network': True, '_uses_inputs_arg': True, 'outputs': [<tf.Tensor 'activation_1/Softmax:0' shape=(?, 3) dtype=float64>], 'inputs': [<tf.Tensor 'dense_1_input:0' shape=(?, 4) dtype=float64>], '_built': True, '_build_input_shape': None, '_compute_previous_mask': True, '_input_layers': [<keras.engine.inp

In [6]:
print(test_X.shape)
preds = clf.predict(test_X[0,:].reshape(1,4))
print(preds)


(30, 4)
[2]


# Generate SQL Code from the Model

In [7]:
import json, requests, base64, dill as pickle, sys



sys.setrecursionlimit(200000)
pickle.settings['recurse'] = False

def test_ws_sql_gen(pickle_data):
    WS_URL="https://sklearn2sql.herokuapp.com/model"
    b64_data = base64.b64encode(pickle_data).decode('utf-8')
    data={"Name":"model1", "PickleData":b64_data , "SQLDialect":"postgresql"}
    r = requests.post(WS_URL, json=data)
    # print(r.__dict__)
    content = r.json()
    # print(content)
    lSQL = content["model"]["SQLGenrationResult"][0]["SQL"]
    return lSQL;



In [8]:
# commented .. see above
pickle_data = pickle.dumps(clf)
lSQL = test_ws_sql_gen(pickle_data)
# print(lSQL[0:2000])

In [9]:
print(lSQL)

WITH keras_input AS 
(SELECT "ADS"."KEY" AS "KEY", "ADS"."Feature_0" AS "Feature_0", "ADS"."Feature_1" AS "Feature_1", "ADS"."Feature_2" AS "Feature_2", "ADS"."Feature_3" AS "Feature_3" 
FROM "INPUT_DATA" AS "ADS"), 
keras_input_1 AS 
(SELECT keras_input."KEY" AS "KEY", keras_input."Feature_0" AS "Feature_0", keras_input."Feature_1" AS "Feature_1", keras_input."Feature_2" AS "Feature_2", keras_input."Feature_3" AS "Feature_3" 
FROM keras_input), 
layer_dense_1 AS 
(SELECT keras_input_1."KEY" AS "KEY", 0.03819891008326152 + -0.10551810947064454 * keras_input_1."Feature_0" + 0.6869137954445582 * keras_input_1."Feature_1" + 0.07362650595219875 * keras_input_1."Feature_2" + -0.12291325345623008 * keras_input_1."Feature_3" AS output_1, -0.004487723888804159 + 0.4496606551243778 * keras_input_1."Feature_0" + -0.0020958056354704584 * keras_input_1."Feature_1" + -0.1450205413490414 * keras_input_1."Feature_2" + 0.8403572775801047 * keras_input_1."Feature_3" AS output_2, -0.04482508398542805 + 

# Execute the SQL Code

In [10]:
# save the dataset in a database table


import sqlalchemy as sa

#engine = sa.create_engine('sqlite://' , echo=False)
engine = sa.create_engine("postgresql://db:db@localhost/db?port=5432", echo=False)
conn = engine.connect()

lTable = pd.DataFrame(iris.data);
lTable.columns = ['Feature_0', 'Feature_1', 'Feature_2', 'Feature_3']
lTable['TGT'] = iris.target
lTable['KEY'] = range(iris.data.shape[0])
lTable.to_sql("INPUT_DATA" , conn,   if_exists='replace', index=False)

  """)


In [11]:
sql_output = pd.read_sql(lSQL , conn);
sql_output = sql_output.sort_values(by='KEY').reset_index(drop=True)
conn.close()

In [12]:
sql_output.sample(12, random_state=1960)

Unnamed: 0,KEY,Score_0,Score_1,Score_2,Proba_0,Proba_1,Proba_2,LogProba_0,LogProba_1,LogProba_2,Decision,DecisionProba
114,114,,,,0.335153,0.035072,0.629774,-1.093168,-3.350341,-0.462394,2,0.629774
74,74,,,,0.478754,0.032302,0.488944,-0.736569,-3.432626,-0.715507,2,0.488944
9,9,,,,0.548498,0.050943,0.40056,-0.600573,-2.977053,-0.914892,0,0.548498
88,88,,,,0.404438,0.046393,0.549169,-0.905257,-3.070605,-0.599349,2,0.549169
25,25,,,,0.541715,0.049148,0.409138,-0.613016,-3.012926,-0.893704,0,0.541715
5,5,,,,0.545648,0.035097,0.419255,-0.605781,-3.349647,-0.869276,0,0.545648
48,48,,,,0.562328,0.038155,0.399517,-0.57567,-3.266102,-0.917499,0,0.562328
117,117,,,,0.367966,0.0207,0.611334,-0.999765,-3.877599,-0.492112,2,0.611334
83,83,,,,0.412955,0.042532,0.544513,-0.884417,-3.157492,-0.607863,2,0.544513
105,105,,,,0.441827,0.022182,0.535991,-0.816838,-3.808478,-0.623637,2,0.535991


# Keras Prediction

In [13]:
keras_output = pd.DataFrame()
keras_output_key = pd.DataFrame(list(range(iris.data.shape[0])), columns=['KEY']);
keras_output_score = pd.DataFrame(columns=['Score_0', 'Score_1', 'Score_2']);
keras_output_proba = pd.DataFrame(clf.predict_proba(iris.data), columns=['Proba_0', 'Proba_1', 'Proba_2'])
keras_output = pd.concat([keras_output_key, keras_output_score, keras_output_proba] , axis=1)
for class_label in [0, 1, 2]:
    keras_output['LogProba_' + str(class_label)] = np.log(keras_output_proba['Proba_' + str(class_label)])
keras_output['Decision'] = clf.predict(iris.data)
keras_output.sample(12, random_state=1960)


Unnamed: 0,KEY,Score_0,Score_1,Score_2,Proba_0,Proba_1,Proba_2,LogProba_0,LogProba_1,LogProba_2,Decision
114,114,,,,0.335153,0.035072,0.629774,-1.093168,-3.350341,-0.462394,2
74,74,,,,0.478754,0.032302,0.488944,-0.736569,-3.432626,-0.715507,2
9,9,,,,0.548498,0.050943,0.40056,-0.600573,-2.977053,-0.914892,0
88,88,,,,0.404438,0.046393,0.549169,-0.905257,-3.070605,-0.599349,2
25,25,,,,0.541715,0.049148,0.409138,-0.613016,-3.012926,-0.893704,0
5,5,,,,0.545648,0.035097,0.419255,-0.605781,-3.349647,-0.869276,0
48,48,,,,0.562328,0.038155,0.399517,-0.57567,-3.266102,-0.917499,0
117,117,,,,0.367966,0.0207,0.611334,-0.999765,-3.877599,-0.492112,2
83,83,,,,0.412955,0.042532,0.544513,-0.884417,-3.157492,-0.607863,2
105,105,,,,0.441827,0.022182,0.535991,-0.816838,-3.808478,-0.623637,2


# Comparing the SQL and Keras Predictions

In [14]:
sql_keras_join = keras_output.join(sql_output , how='left', on='KEY', lsuffix='_keras', rsuffix='_sql')


In [15]:
sql_keras_join.head(12)

Unnamed: 0,KEY_keras,Score_0_keras,Score_1_keras,Score_2_keras,Proba_0_keras,Proba_1_keras,Proba_2_keras,LogProba_0_keras,LogProba_1_keras,LogProba_2_keras,...,Score_1_sql,Score_2_sql,Proba_0_sql,Proba_1_sql,Proba_2_sql,LogProba_0_sql,LogProba_1_sql,LogProba_2_sql,Decision_sql,DecisionProba
0,0,,,,0.55745,0.041788,0.400762,-0.584382,-3.175147,-0.914388,...,,,0.55745,0.041788,0.400762,-0.584382,-3.175147,-0.914388,0,0.55745
1,1,,,,0.546366,0.048863,0.40477,-0.604466,-3.01873,-0.904435,...,,,0.546366,0.048863,0.40477,-0.604466,-3.01873,-0.904435,0,0.546366
2,2,,,,0.543171,0.050627,0.406202,-0.61033,-2.98328,-0.900905,...,,,0.543171,0.050627,0.406202,-0.61033,-2.98328,-0.900905,0,0.543171
3,3,,,,0.52906,0.056298,0.414642,-0.636653,-2.877101,-0.880339,...,,,0.52906,0.056298,0.414642,-0.636653,-2.877101,-0.880339,0,0.52906
4,4,,,,0.553694,0.043079,0.403227,-0.591143,-3.144717,-0.908256,...,,,0.553694,0.043079,0.403227,-0.591143,-3.144717,-0.908256,0,0.553694
5,5,,,,0.545648,0.035097,0.419255,-0.605781,-3.349647,-0.869276,...,,,0.545648,0.035097,0.419255,-0.605781,-3.349647,-0.869276,0,0.545648
6,6,,,,0.529159,0.051147,0.419694,-0.636466,-2.973051,-0.86823,...,,,0.529159,0.051147,0.419694,-0.636466,-2.973051,-0.86823,0,0.529159
7,7,,,,0.548251,0.045395,0.406354,-0.601022,-3.092352,-0.90053,...,,,0.548251,0.045395,0.406354,-0.601022,-3.092352,-0.90053,0,0.548251
8,8,,,,0.523391,0.061531,0.415078,-0.647427,-2.788207,-0.879289,...,,,0.523391,0.061531,0.415078,-0.647427,-2.788207,-0.879289,0,0.523391
9,9,,,,0.548498,0.050943,0.40056,-0.600573,-2.977053,-0.914892,...,,,0.548498,0.050943,0.40056,-0.600573,-2.977053,-0.914892,0,0.548498


In [16]:
condition = (sql_keras_join.Decision_sql != sql_keras_join.Decision_keras)
sql_keras_join[condition]

Unnamed: 0,KEY_keras,Score_0_keras,Score_1_keras,Score_2_keras,Proba_0_keras,Proba_1_keras,Proba_2_keras,LogProba_0_keras,LogProba_1_keras,LogProba_2_keras,...,Score_1_sql,Score_2_sql,Proba_0_sql,Proba_1_sql,Proba_2_sql,LogProba_0_sql,LogProba_1_sql,LogProba_2_sql,Decision_sql,DecisionProba
