In [1]:
import os, numpy as np
import pandas as pd

os.environ["KERAS_BACKEND"] = "tensorflow"

import keras 
from sklearn import datasets

from sklearn.model_selection import train_test_split



keras.backend.set_floatx('float64')

from keras.utils import np_utils


def set_reproducible():
    import tensorflow as tf
    import random as rn
    import os
    os.environ['PYTHONHASHSEED'] = '1960'
    rn.seed(1960)
    np.random.seed(1960)
    session_conf = tf.ConfigProto(intra_op_parallelism_threads=1, inter_op_parallelism_threads=1)

    from keras import backend as K
    tf.set_random_seed(1960)

    sess = tf.Session(graph=tf.get_default_graph(), config=session_conf)
    # K.set_session(sess)

set_reproducible()


Using TensorFlow backend.


# Build a Keras Model

In [2]:


def create_model():
    from keras.models import Sequential
    from keras.layers.core import Dense, Activation, Dropout

    model = Sequential()
    model.add(Dense(16, input_dim=13))
    model.add(Dropout(0.3))
    model.add(Dense(12, activation='relu'))
    model.add(Dense(1))
    model.compile(optimizer='rmsprop', loss='mse')
    return model

In [3]:
boston  = datasets.load_boston()
train_X, test_X, train_y, test_y = train_test_split(boston.data, boston.target, train_size=0.8, test_size=0.2, random_state=1960)


In [4]:

from keras.wrappers.scikit_learn import KerasRegressor

clf = KerasRegressor(build_fn=create_model, epochs=12, verbose=0)

print(train_X.shape , train_y.shape)
clf.fit(train_X, train_y, verbose=0)

W0704 22:14:35.217032 139717328930624 deprecation.py:506] From /home/antoine/.local/lib/python3.7/site-packages/keras/backend/tensorflow_backend.py:3445: calling dropout (from tensorflow.python.ops.nn_ops) with keep_prob is deprecated and will be removed in a future version.
Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.


(404, 13) (404,)


<keras.callbacks.History at 0x7f12610ecb70>

In [5]:
print(test_X.shape)
preds = clf.predict(test_X[0,:].reshape(1,13))
print(preds)


(102, 13)
13.80997968018234


# Generate SQL Code from the Model

In [6]:
import json, requests, base64, dill as pickle, sys



sys.setrecursionlimit(200000)
pickle.settings['recurse'] = False


def test_ws_sql_gen(pickle_data):
    WS_URL="https://sklearn2sql.herokuapp.com/model"
    b64_data = base64.b64encode(pickle_data).decode('utf-8')
    data={"Name":"model1", "PickleData":b64_data , "SQLDialect":"postgresql"}
    r = requests.post(WS_URL, json=data)
    # print(r.__dict__)
    content = r.json()
    # print(content)
    lSQL = content["model"]["SQLGenrationResult"][0]["SQL"]
    return lSQL;


In [7]:

pickle_data = pickle.dumps(clf)
lSQL = test_ws_sql_gen(pickle_data)


In [8]:
print(lSQL)

WITH keras_input AS 
(SELECT "ADS"."KEY" AS "KEY", "ADS"."Feature_0" AS "Feature_0", "ADS"."Feature_1" AS "Feature_1", "ADS"."Feature_2" AS "Feature_2", "ADS"."Feature_3" AS "Feature_3", "ADS"."Feature_4" AS "Feature_4", "ADS"."Feature_5" AS "Feature_5", "ADS"."Feature_6" AS "Feature_6", "ADS"."Feature_7" AS "Feature_7", "ADS"."Feature_8" AS "Feature_8", "ADS"."Feature_9" AS "Feature_9", "ADS"."Feature_10" AS "Feature_10", "ADS"."Feature_11" AS "Feature_11", "ADS"."Feature_12" AS "Feature_12" 
FROM "INPUT_DATA" AS "ADS"), 
keras_input_1 AS 
(SELECT keras_input."KEY" AS "KEY", keras_input."Feature_0" AS "Feature_0", keras_input."Feature_1" AS "Feature_1", keras_input."Feature_2" AS "Feature_2", keras_input."Feature_3" AS "Feature_3", keras_input."Feature_4" AS "Feature_4", keras_input."Feature_5" AS "Feature_5", keras_input."Feature_6" AS "Feature_6", keras_input."Feature_7" AS "Feature_7", keras_input."Feature_8" AS "Feature_8", keras_input."Feature_9" AS "Feature_9", keras_input."Feat

# Execute the SQL Code

In [9]:
# save the dataset in a database table


import sqlalchemy as sa

#engine = sa.create_engine('sqlite://' , echo=False)
engine = sa.create_engine("postgresql://db:db@localhost/db?port=5432", echo=False)
conn = engine.connect()

lTable = pd.DataFrame(boston.data);
lTable.columns = ['Feature_' + str(x) for x in range(boston.data.shape[1]) ]
lTable['TGT'] = boston.target
lTable['KEY'] = range(boston.data.shape[0])
lTable.to_sql("INPUT_DATA" , conn,   if_exists='replace', index=False)

  """)


In [10]:
sql_output = pd.read_sql(lSQL , conn);
sql_output = sql_output.sort_values(by='KEY').reset_index(drop=True)
conn.close()

In [11]:
sql_output.sample(12, random_state=1960)

Unnamed: 0,KEY,Estimator
230,230,13.80998
112,112,14.512192
125,125,15.856553
9,9,14.78883
213,213,12.614681
109,109,14.599835
127,127,13.986859
244,244,13.990173
406,406,13.340092
490,490,9.774919


# Keras Prediction

In [12]:
skl_output = pd.DataFrame()
skl_output_key = pd.DataFrame(list(range(boston.data.shape[0])), columns=['KEY']);
skl_output_estimator = pd.DataFrame(clf.predict(boston.data), columns=['Estimator'])
skl_output = skl_output_key
skl_output['Estimator'] = skl_output_estimator
skl_output.sample(12, random_state=1960)

Unnamed: 0,KEY,Estimator
230,230,13.80998
112,112,14.512192
125,125,15.856553
9,9,14.78883
213,213,12.614681
109,109,14.599835
127,127,13.986859
244,244,13.990173
406,406,13.340092
490,490,9.774919


# Comparing the SQL and Keras Predictions

In [13]:
sql_skl_join = skl_output.join(sql_output , how='left', on='KEY', lsuffix='_skl', rsuffix='_sql')
sql_skl_join['Error'] = sql_skl_join.Estimator_sql - sql_skl_join.Estimator_skl

In [14]:
sql_skl_join.head(12)

Unnamed: 0,KEY_skl,Estimator_skl,KEY_sql,Estimator_sql,Error
0,0,14.719985,0,14.719985,1.776357e-15
1,1,15.320518,1,15.320518,-4.440892e-14
2,2,14.391976,2,14.391976,-1.065814e-14
3,3,14.1747,3,14.1747,-4.618528e-14
4,4,14.609911,4,14.609911,-4.796163e-14
5,5,14.740791,5,14.740791,1.065814e-14
6,6,14.270077,6,14.270077,8.881784e-15
7,7,15.584041,7,15.584041,-3.907985e-14
8,8,15.315529,8,15.315529,1.776357e-14
9,9,14.78883,9,14.78883,-1.24345e-14


In [15]:
sql_skl_join.describe()

Unnamed: 0,KEY_skl,Estimator_skl,KEY_sql,Estimator_sql,Error
count,506.0,506.0,506.0,506.0,506.0
mean,252.5,12.611682,252.5,12.611682,5.04208e-16
std,146.213884,3.457737,146.213884,3.457737,2.769234e-14
min,0.0,0.084647,0.0,0.084647,-5.684342e-14
25%,126.25,12.275996,126.25,12.275996,-2.309264e-14
50%,252.5,13.489934,252.5,13.489934,1.776357e-15
75%,378.75,14.369461,378.75,14.369461,2.131628e-14
max,505.0,18.926894,505.0,18.926894,6.394885e-14
