In [1]:
import os, numpy as np
import pandas as pd

os.environ["KERAS_BACKEND"] = "tensorflow"

import keras 
from sklearn import datasets

from sklearn.model_selection import train_test_split

from keras.utils import np_utils


keras.backend.set_floatx('float64')

from keras.utils import np_utils


def set_reproducible():
    import tensorflow as tf
    import random as rn
    import os
    os.environ['PYTHONHASHSEED'] = '1960'
    rn.seed(1960)
    np.random.seed(1960)
    session_conf = tf.ConfigProto(intra_op_parallelism_threads=1, inter_op_parallelism_threads=1)

    from keras import backend as K
    tf.set_random_seed(1960)

    sess = tf.Session(graph=tf.get_default_graph(), config=session_conf)
    # K.set_session(sess)

set_reproducible()


Using TensorFlow backend.


# Build a Keras Model

In [2]:

def create_model():
    from keras.models import Sequential
    from keras.layers.core import Dense, Activation, Dropout
    from keras.utils import np_utils

    model = Sequential()
    model.add(Dense(5, input_shape=(4,) , activation='relu'))
    model.add(Dropout(0.3))
    model.add(Dense(3))
    model.add(Activation('softmax'))
    model.compile(loss='categorical_crossentropy', optimizer='adam')
    return model



In [3]:
iris  = datasets.load_iris()
train_X, test_X, train_y, test_y = train_test_split(iris.data, iris.target, train_size=0.8, test_size=0.2, random_state=1960)


In [4]:

from keras.wrappers.scikit_learn import KerasClassifier

clf = KerasClassifier(build_fn=create_model, epochs=12, verbose=0)

print(train_X.shape , train_y.shape)
clf.fit(train_X, train_y, verbose=0)

(120, 4) (120,)


<keras.callbacks.History at 0x7fc0ae37b208>

In [5]:
print(clf.model.__dict__)

{'name': 'sequential_1', 'trainable': True, '_is_compiled': True, '_expects_training_arg': False, '_initial_weights': None, 'supports_masking': False, 'optimizer': <keras.optimizers.Adam object at 0x7fc0727f3128>, '_updates': [], '_losses': [], '_per_input_losses': {}, '_per_input_updates': {}, '_layers': [<keras.engine.input_layer.InputLayer object at 0x7fc076a93278>, <keras.layers.core.Dense object at 0x7fc076a87b38>, <keras.layers.core.Dropout object at 0x7fc076a93358>, <keras.layers.core.Dense object at 0x7fc076a931d0>, <keras.layers.core.Activation object at 0x7fc076a93320>], '_outbound_nodes': [], '_inbound_nodes': [<keras.engine.base_layer.Node object at 0x7fc0d5d03160>], '_is_graph_network': True, '_uses_inputs_arg': True, 'outputs': [<tf.Tensor 'activation_1/Softmax:0' shape=(?, 3) dtype=float64>], 'inputs': [<tf.Tensor 'dense_1_input:0' shape=(?, 4) dtype=float64>], '_built': True, '_compute_previous_mask': True, '_input_layers': [<keras.engine.input_layer.InputLayer object a

In [6]:
print(test_X.shape)
preds = clf.predict(test_X[0,:].reshape(1,4))
print(preds)


(30, 4)
[2]


# Generate SQL Code from the Model

In [7]:
import json, requests, base64, dill as pickle, sys



sys.setrecursionlimit(200000)
pickle.settings['recurse'] = False

# no luck for the web service... pickling feature of tensorflow and/or keras objects seems not to be a priority.
# there is a lot of github issues in the two projects when I search for pickle keyword!!!.

def test_ws_sql_gen(pickle_data):
    WS_URL="http://localhost:1888/model"
    b64_data = base64.b64encode(pickle_data).decode('utf-8')
    data={"Name":"model1", "PickleData":b64_data , "SQLDialect":"postgresql"}
    r = requests.post(WS_URL, json=data)
    print(r.__dict__)
    content = r.json()
    # print(content)
    lSQL = content["model"]["SQLGenrationResult"][0]["SQL"]
    return lSQL;



def test_sql_gen(keras_regressor , metadata):
    import sklearn2sql.PyCodeGenerator as codegen
    cg1 = codegen.cAbstractCodeGenerator();
    cg1.mOptions.mActivateMaterialization = False
    lSQL = cg1.generateCodeWithMetadata(clf, metadata, dsn = None, dialect = "postgresql");
    return lSQL[1]


In [8]:
# commented .. see above
# pickle_data = pickle.dumps(clf)
# lSQL = test_ws_sql_gen(pickle_data)
# print(lSQL[0:2000])

In [9]:
lMetaData = {}
lMetaData['features'] = list(iris.feature_names);
#column name adaptation
lMetaData['features'] = [col.replace(" (cm)" , "_cm") for col in lMetaData['features'] ]

lMetaData["targets"] = ['TGT']
lMetaData['primary_key'] = 'KEY'
lMetaData['table'] = 'iris'

    
lSQL = test_sql_gen(clf , lMetaData)


cGenerationWrapperFactory::createWrapper() <class 'keras.wrappers.scikit_learn.KerasClassifier'>
cClassifierMixin_CodeGenWrapper::setObject <class 'keras.wrappers.scikit_learn.KerasClassifier'>
BACKEND_DIALECT postgresql
CREATING_DATABASE_BACKEND_DSN_DIALECT 1.2.7 None postgresql
KERAS_GENERATE_EXPRESSION_START <class 'sqlalchemy.sql.selectable.Alias'> <class 'sklearn2sql.GenericModels.Keras.cSerializedKeras'>
{'mKerasData': <keras.wrappers.scikit_learn.KerasClassifier object at 0x7fc076a87828>, 'mFeatureNames': ['sepal length_cm', 'sepal width_cm', 'petal length_cm', 'petal width_cm'], 'mClasses': array([0, 1, 2]), 'mMode': 'Classification'}
{'build_fn': <function create_model at 0x7fc076a79f28>, 'sk_params': {'epochs': 12, 'verbose': 0}, 'classes_': array([0, 1, 2]), 'n_classes_': 3, 'model': <keras.engine.sequential.Sequential object at 0x7fc076a87ac8>}
GENERATING_LAYER 0 dense_1 <class 'keras.layers.core.Dense'> {'input_spec': InputSpec(min_ndim=2, axes={-1: 4}), 'supports_masking'

In [10]:
print(lSQL)

WITH keras_input AS 
(SELECT "ADS"."KEY" AS "KEY", "ADS"."sepal length_cm" AS "sepal length_cm", "ADS"."sepal width_cm" AS "sepal width_cm", "ADS"."petal length_cm" AS "petal length_cm", "ADS"."petal width_cm" AS "petal width_cm" 
FROM iris AS "ADS"), 
keras_input_1 AS 
(SELECT keras_input."KEY" AS "KEY", keras_input."sepal length_cm" AS "sepal length_cm", keras_input."sepal width_cm" AS "sepal width_cm", keras_input."petal length_cm" AS "petal length_cm", keras_input."petal width_cm" AS "petal width_cm" 
FROM keras_input), 
layer_dense_1 AS 
(SELECT keras_input_1."KEY" AS "KEY", 0.038228313704767404 + -0.10549971165225797 * keras_input_1."sepal length_cm" + 0.687021141477052 * keras_input_1."sepal width_cm" + 0.0736250137994688 * keras_input_1."petal length_cm" + -0.1229013269524524 * keras_input_1."petal width_cm" AS output_1, -0.004227162764735863 + 0.4498965733786842 * keras_input_1."sepal length_cm" + -0.0016264613446735614 * keras_input_1."sepal width_cm" + -0.1449487756556122 * 

# Execute the SQL Code

In [11]:
# save the dataset in a database table


import sqlalchemy as sa

#engine = sa.create_engine('sqlite://' , echo=False)
engine = sa.create_engine("postgresql://db:db@localhost/db?port=5432", echo=False)
conn = engine.connect()

lTable = pd.DataFrame(iris.data);
lTable.columns = lMetaData['features']
lTable['TGT'] = iris.target
lTable['KEY'] = range(iris.data.shape[0])
lTable.to_sql(lMetaData['table'] , conn,   if_exists='replace', index=False)

In [12]:
sql_output = pd.read_sql(lSQL , conn);
conn.close()

In [13]:
sql_output.sample(12, random_state=1960)

Unnamed: 0,KEY,Score_0,Score_1,Score_2,Proba_0,Proba_1,Proba_2,LogProba_0,LogProba_1,LogProba_2,Decision,DecisionProba
114,114,,,,0.33468,0.035035,0.630285,-1.094581,-3.351409,-0.461583,2,0.630285
74,74,,,,0.478222,0.032277,0.489501,-0.73768,-3.433394,-0.714369,2,0.489501
9,9,,,,0.547938,0.05093,0.401132,-0.601593,-2.977311,-0.913464,0,0.547938
88,88,,,,0.403941,0.046353,0.549706,-0.906485,-3.071476,-0.598372,2,0.549706
25,25,,,,0.541158,0.049134,0.409708,-0.614044,-3.013206,-0.892311,0,0.541158
5,5,,,,0.544959,0.035085,0.419957,-0.607046,-3.349985,-0.867604,0,0.544959
48,48,,,,0.561679,0.038144,0.400177,-0.576825,-3.266375,-0.915849,0,0.561679
117,117,,,,0.367306,0.020673,0.612021,-1.001559,-3.878929,-0.490989,2,0.612021
83,83,,,,0.41246,0.042495,0.545045,-0.885616,-3.158363,-0.606888,2,0.545045
105,105,,,,0.441219,0.02216,0.536622,-0.818214,-3.809486,-0.622462,2,0.536622


# Keras Prediction

In [14]:
keras_output = pd.DataFrame()
keras_output_key = pd.DataFrame(list(range(iris.data.shape[0])), columns=['KEY']);
keras_output_score = pd.DataFrame(columns=['Score_0', 'Score_1', 'Score_2']);
keras_output_proba = pd.DataFrame(clf.predict_proba(iris.data), columns=['Proba_0', 'Proba_1', 'Proba_2'])
keras_output = pd.concat([keras_output_key, keras_output_score, keras_output_proba] , axis=1)
for class_label in [0, 1, 2]:
    keras_output['LogProba_' + str(class_label)] = np.log(keras_output_proba['Proba_' + str(class_label)])
keras_output['Decision'] = clf.predict(iris.data)
keras_output.sample(12, random_state=1960)


Unnamed: 0,KEY,Score_0,Score_1,Score_2,Proba_0,Proba_1,Proba_2,LogProba_0,LogProba_1,LogProba_2,Decision
114,114,,,,0.33468,0.035035,0.630285,-1.094581,-3.351409,-0.461583,2
74,74,,,,0.478222,0.032277,0.489501,-0.73768,-3.433394,-0.714369,2
9,9,,,,0.547938,0.05093,0.401132,-0.601593,-2.977311,-0.913464,0
88,88,,,,0.403941,0.046353,0.549706,-0.906485,-3.071476,-0.598372,2
25,25,,,,0.541158,0.049134,0.409708,-0.614044,-3.013206,-0.892311,0
5,5,,,,0.544959,0.035085,0.419957,-0.607046,-3.349985,-0.867604,0
48,48,,,,0.561679,0.038144,0.400177,-0.576825,-3.266375,-0.915849,0
117,117,,,,0.367306,0.020673,0.612021,-1.001559,-3.878929,-0.490989,2
83,83,,,,0.41246,0.042495,0.545045,-0.885616,-3.158363,-0.606888,2
105,105,,,,0.441219,0.02216,0.536622,-0.818214,-3.809486,-0.622462,2


# Comparing the SQL and Keras Predictions

In [15]:
sql_keras_join = keras_output.join(sql_output , how='left', on='KEY', lsuffix='_keras', rsuffix='_sql')


In [16]:
sql_keras_join.head(12)

Unnamed: 0,KEY_keras,Score_0_keras,Score_1_keras,Score_2_keras,Proba_0_keras,Proba_1_keras,Proba_2_keras,LogProba_0_keras,LogProba_1_keras,LogProba_2_keras,...,Score_1_sql,Score_2_sql,Proba_0_sql,Proba_1_sql,Proba_2_sql,LogProba_0_sql,LogProba_1_sql,LogProba_2_sql,Decision_sql,DecisionProba
0,0,,,,0.556833,0.041777,0.40139,-0.585489,-3.175414,-0.912822,...,,,0.556833,0.041777,0.40139,-0.585489,-3.175414,-0.912822,0,0.556833
1,1,,,,0.545817,0.04885,0.405333,-0.605472,-3.018995,-0.903046,...,,,0.545817,0.04885,0.405333,-0.605472,-3.018995,-0.903046,0,0.545817
2,2,,,,0.542607,0.050613,0.40678,-0.611371,-2.983548,-0.899482,...,,,0.542607,0.050613,0.40678,-0.611371,-2.983548,-0.899482,0,0.542607
3,3,,,,0.528508,0.056281,0.41521,-0.637697,-2.87739,-0.87897,...,,,0.528508,0.056281,0.41521,-0.637697,-2.87739,-0.87897,0,0.528508
4,4,,,,0.553069,0.043067,0.403864,-0.592273,-3.144992,-0.906677,...,,,0.553069,0.043067,0.403864,-0.592273,-3.144992,-0.906677,0,0.553069
5,5,,,,0.544959,0.035085,0.419957,-0.607046,-3.349985,-0.867604,...,,,0.544959,0.035085,0.419957,-0.607046,-3.349985,-0.867604,0,0.544959
6,6,,,,0.528569,0.051131,0.4203,-0.637582,-2.973359,-0.866787,...,,,0.528569,0.051131,0.4203,-0.637582,-2.973359,-0.866787,0,0.528569
7,7,,,,0.547648,0.045382,0.40697,-0.602123,-3.092633,-0.899016,...,,,0.547648,0.045382,0.40697,-0.602123,-3.092633,-0.899016,0,0.547648
8,8,,,,0.522872,0.061514,0.415613,-0.648418,-2.788486,-0.878,...,,,0.522872,0.061514,0.415613,-0.648418,-2.788486,-0.878,0,0.522872
9,9,,,,0.547938,0.05093,0.401132,-0.601593,-2.977311,-0.913464,...,,,0.547938,0.05093,0.401132,-0.601593,-2.977311,-0.913464,0,0.547938


In [17]:
condition = (sql_keras_join.Decision_sql != sql_keras_join.Decision_keras)
sql_keras_join[condition]

Unnamed: 0,KEY_keras,Score_0_keras,Score_1_keras,Score_2_keras,Proba_0_keras,Proba_1_keras,Proba_2_keras,LogProba_0_keras,LogProba_1_keras,LogProba_2_keras,...,Score_1_sql,Score_2_sql,Proba_0_sql,Proba_1_sql,Proba_2_sql,LogProba_0_sql,LogProba_1_sql,LogProba_2_sql,Decision_sql,DecisionProba
