In [1]:
import os, numpy as np
import pandas as pd

os.environ["KERAS_BACKEND"] = "tensorflow"

import keras 
from sklearn import datasets

from sklearn.model_selection import train_test_split

from keras.utils import np_utils

keras.backend.set_floatx('float64')

def set_reproducible():
    import tensorflow as tf
    import random as rn
    import os
    os.environ['PYTHONHASHSEED'] = '1960'
    rn.seed(1960)
    np.random.seed(1960)
    session_conf = tf.ConfigProto(intra_op_parallelism_threads=1, inter_op_parallelism_threads=1)

    import keras.backend.tensorflow_backend as K
    tf.set_random_seed(1960)

    sess = tf.Session(graph=tf.get_default_graph(), config=session_conf)
    K.set_session(sess)

set_reproducible()


Using TensorFlow backend.


# Build a Keras Model

In [2]:
NC = 13

def create_model():
    from keras.models import Sequential
    from keras.layers import Dense, Activation, GRU
    from keras.constraints import max_norm

    model = Sequential()
    model.add(GRU(1, input_shape=(1, NC), batch_size=1 , 
                  activation='relu', 
                  recurrent_activation = 'hard_sigmoid', 
                  stateful=True,                   
                  kernel_constraint=max_norm(2.),
                  recurrent_constraint=max_norm(2.),
                  kernel_initializer = keras.initializers.Orthogonal(gain=0.1),
                  recurrent_initializer = keras.initializers.Zeros()))
    model.add(Dense(1, activation=None))
    model.compile(optimizer='adam', loss='mse')
    return model

In [3]:
boston  = datasets.load_boston()
boston.data = (boston.data - boston.data.mean(axis=0)) / boston.data.std(axis=0)
boston.target = (boston.target - boston.target.mean(axis=0)) / boston.target.std(axis=0)

boston.data = boston.data[:,0:NC]
train_X, test_X, train_y, test_y = train_test_split(boston.data, boston.target, train_size=0.8, test_size=0.2, random_state=1960)
train_X = train_X.reshape(train_X.shape[0] , 1, train_X.shape[1])
test_X = test_X.reshape(test_X.shape[0] , 1, test_X.shape[1])



In [4]:
# train_X

In [5]:
#boston.target

In [6]:

from keras.wrappers.scikit_learn import KerasRegressor

clf = KerasRegressor(build_fn=create_model, batch_size=1, epochs=12, verbose=2)

print(train_X.shape , train_y.shape, train_X.dtype , train_y.dtype)
clf.fit(train_X, train_y)

(404, 1, 13) (404,) float64 float64
Epoch 1/12
 - 1s - loss: 0.8735
Epoch 2/12
 - 1s - loss: 0.6828
Epoch 3/12
 - 1s - loss: 0.5327
Epoch 4/12
 - 1s - loss: 0.4252
Epoch 5/12
 - 1s - loss: 0.3559
Epoch 6/12
 - 1s - loss: 0.3107
Epoch 7/12
 - 1s - loss: 0.2863
Epoch 8/12
 - 1s - loss: 0.2764
Epoch 9/12
 - 1s - loss: 0.2728
Epoch 10/12
 - 1s - loss: 0.2683
Epoch 11/12
 - 1s - loss: 0.2630
Epoch 12/12
 - 1s - loss: 0.2465


<keras.callbacks.History at 0x7ff449972320>

In [7]:
print(test_X.shape)
preds = clf.predict(test_X)
clf.model.reset_states()
print(preds)


(102, 1, 13)
[-0.32129598 -0.60472545 -0.45489549 -0.64513939  0.06138    -0.53107756
 -0.64210316 -0.68225485 -0.69666026 -0.7022094  -0.70461571 -0.70547029
 -0.03634275 -0.55276541  0.9803505  -0.47387471  0.98600414 -0.03462121
 -0.63731437  0.10537841 -0.32884258  2.25749942 -0.48408869  0.16363296
  1.03405514 -0.49241889 -0.49625269 -0.69632153 -0.31073547 -0.62883759
  0.06508379 -0.59295252 -0.70600241  1.19650378  0.35998787  0.1535076
  0.43653808 -0.62972223 -0.68988051  0.23313738 -0.54192728 -0.18111312
 -0.20155707 -0.66938214 -0.69314518 -0.34651543  0.27176871  0.24868476
 -0.53721032 -0.65049717 -0.68484638 -0.69784241 -0.70450233  2.21662275
 -0.70600241 -0.70600241  0.74109577 -0.1846488  -0.56527046 -0.64564596
 -0.69205816  1.13175126 -0.53898677 -0.07488758 -0.65468292  1.10842835
 -0.38007448 -0.59976445 -0.68583448 -0.34230538  0.01682355  2.27494346
 -0.70600241 -0.3485372   0.34647207 -0.53922522  1.56831957  0.84210346
 -0.45272276  1.0236016  -0.46112093  0

In [8]:
pd.DataFrame(boston.data).describe()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12
count,506.0,506.0,506.0,506.0,506.0,506.0,506.0,506.0,506.0,506.0,506.0,506.0,506.0
mean,-5.616939000000001e-17,6.319056000000001e-17,-3.145486e-15,-2.106352e-17,2.7523e-15,-1.15077e-14,-1.13743e-15,7.582867e-16,5.616939000000001e-17,5.616939000000001e-17,-1.022283e-14,8.593916e-15,-5.897786e-16
std,1.00099,1.00099,1.00099,1.00099,1.00099,1.00099,1.00099,1.00099,1.00099,1.00099,1.00099,1.00099,1.00099
min,-0.4197819,-0.4877224,-1.557842,-0.2725986,-1.465882,-3.880249,-2.335437,-1.267069,-0.9828429,-1.31399,-2.707379,-3.907193,-1.531127
25%,-0.4109696,-0.4877224,-0.8676906,-0.2725986,-0.9130288,-0.5686303,-0.837448,-0.8056878,-0.6379618,-0.767576,-0.4880391,0.2050715,-0.79942
50%,-0.3906665,-0.4877224,-0.2110985,-0.2725986,-0.1442174,-0.1084655,0.3173816,-0.2793234,-0.5230014,-0.4646726,0.274859,0.3811865,-0.1812536
75%,0.00739656,0.04877224,1.015999,-0.2725986,0.598679,0.4827678,0.9067981,0.6623709,1.661245,1.530926,0.8065758,0.433651,0.6030188
max,9.933931,3.804234,2.422565,3.668398,2.732346,3.555044,1.117494,3.960518,1.661245,1.798194,1.638828,0.4410519,3.548771


# Generate SQL Code from the Model

In [9]:
import json, requests, base64, dill as pickle, sys



sys.setrecursionlimit(200000)
pickle.settings['recurse'] = False


def test_ws_sql_gen(pickle_data):
    WS_URL="https://sklearn2sql.herokuapp.com/model"
    b64_data = base64.b64encode(pickle_data).decode('utf-8')
    data={"Name":"model1", "PickleData":b64_data , "SQLDialect":"postgresql"}
    r = requests.post(WS_URL, json=data)
    # print(r.__dict__)
    content = r.json()
    # print(content)
    lSQL = content["model"]["SQLGenrationResult"][0]["SQL"]
    return lSQL;


In [10]:
pickle_data = pickle.dumps(clf)
lSQL = test_ws_sql_gen(pickle_data)


In [11]:
print(lSQL)

WITH RECURSIVE keras_input AS 
(SELECT "ADS"."KEY" AS "KEY", "ADS"."Feature_0" AS "Feature_0", "ADS"."Feature_1" AS "Feature_1", "ADS"."Feature_2" AS "Feature_2", "ADS"."Feature_3" AS "Feature_3", "ADS"."Feature_4" AS "Feature_4", "ADS"."Feature_5" AS "Feature_5", "ADS"."Feature_6" AS "Feature_6", "ADS"."Feature_7" AS "Feature_7", "ADS"."Feature_8" AS "Feature_8", "ADS"."Feature_9" AS "Feature_9", "ADS"."Feature_10" AS "Feature_10", "ADS"."Feature_11" AS "Feature_11", "ADS"."Feature_12" AS "Feature_12" 
FROM "INPUT_DATA" AS "ADS"), 
keras_input_1 AS 
(SELECT keras_input."KEY" AS "KEY", keras_input."Feature_0" AS "Feature_0", keras_input."Feature_1" AS "Feature_1", keras_input."Feature_2" AS "Feature_2", keras_input."Feature_3" AS "Feature_3", keras_input."Feature_4" AS "Feature_4", keras_input."Feature_5" AS "Feature_5", keras_input."Feature_6" AS "Feature_6", keras_input."Feature_7" AS "Feature_7", keras_input."Feature_8" AS "Feature_8", keras_input."Feature_9" AS "Feature_9", keras_i

# Execute the SQL Code

In [12]:
# save the dataset in a database table


import sqlalchemy as sa

#engine = sa.create_engine('sqlite://' , echo=False)
engine = sa.create_engine("postgresql://db:db@localhost/db?port=5432", echo=False)
conn = engine.connect()

lTable = pd.DataFrame(boston.data);
lTable.columns = ['Feature_' + str(x) for x in range(boston.data.shape[1]) ]
lTable['TGT'] = boston.target
lTable['KEY'] = range(boston.data.shape[0])
lTable.to_sql("INPUT_DATA" , conn,   if_exists='replace', index=False)


  """)


In [13]:
sql_output = pd.read_sql(lSQL , conn);
sql_output = sql_output.sort_values(by='KEY').reset_index(drop=True)
conn.close()


In [14]:
sql_output.sample(12, random_state=1960)

Unnamed: 0,KEY,Estimator
230,230,-0.307049
112,112,-0.515965
125,125,-0.477801
9,9,-0.70057
213,213,0.141389
109,109,-0.563168
127,127,-0.681141
244,244,-0.438898
406,406,-0.706002
490,490,-0.695098


# Keras Prediction

In [15]:
clf.model.reset_states()
skl_output = pd.DataFrame()
skl_output_key = pd.DataFrame(list(range(boston.data.shape[0])), columns=['KEY']);
data = boston.data.reshape(boston.data.shape[0] , 1, boston.data.shape[1])
skl_out = clf.predict(data)
print(skl_out.shape)
skl_output_estimator = pd.DataFrame(skl_out, columns=['Estimator'])
skl_output['KEY'] = skl_output_key['KEY']
skl_output['Estimator'] = skl_output_estimator
skl_output.sample(12, random_state=1960)

(506,)


Unnamed: 0,KEY,Estimator
230,230,-0.307049
112,112,-0.515965
125,125,-0.477801
9,9,-0.70057
213,213,0.141389
109,109,-0.563168
127,127,-0.681141
244,244,-0.438898
406,406,-0.706002
490,490,-0.695098


# Comparing the SQL and Keras Predictions

In [16]:
sql_skl_join = skl_output.join(sql_output , how='left', on='KEY', lsuffix='_skl', rsuffix='_sql')
sql_skl_join['Error'] = sql_skl_join.Estimator_sql - sql_skl_join.Estimator_skl

In [17]:
sql_skl_join.head(12)

Unnamed: 0,KEY_skl,Estimator_skl,KEY_sql,Estimator_sql,Error
0,0,0.527276,0,0.527276,0.0
1,1,0.141909,1,0.141909,-5.551115e-16
2,2,1.140778,2,1.140778,-4.662937e-15
3,3,1.055835,3,1.055835,-3.774758e-15
4,4,0.952849,4,0.952849,-1.110223e-16
5,5,0.351297,5,0.351297,6.106227e-16
6,6,-0.31172,6,-0.31172,3.330669e-16
7,7,-0.627199,7,-0.627199,0.0
8,8,-0.685863,8,-0.685863,-1.110223e-16
9,9,-0.70057,9,-0.70057,2.220446e-16


In [18]:
sql_skl_join.describe()

Unnamed: 0,KEY_skl,Estimator_skl,KEY_sql,Estimator_sql,Error
count,506.0,506.0,506.0,506.0,506.0
mean,252.5,-0.013917,252.5,-0.013917,-4.503082e-18
std,146.213884,0.830761,146.213884,0.830761,1.115357e-15
min,0.0,-0.706002,0.0,-0.706002,-5.107026e-15
25%,126.25,-0.676127,126.25,-0.676127,-2.220446e-16
50%,252.5,-0.341314,252.5,-0.341314,0.0
75%,378.75,0.376642,378.75,0.376642,2.220446e-16
max,505.0,2.985721,505.0,2.985721,4.662937e-15
