In [1]:
import os, numpy as np
import pandas as pd

os.environ["KERAS_BACKEND"] = "tensorflow"

import keras 
from sklearn import datasets

from sklearn.model_selection import train_test_split

from keras.utils import np_utils

keras.backend.set_floatx('float64')

def set_reproducible():
    import tensorflow as tf
    import random as rn
    import os
    os.environ['PYTHONHASHSEED'] = '1960'
    rn.seed(1960)
    np.random.seed(1960)
    session_conf = tf.ConfigProto(intra_op_parallelism_threads=1, inter_op_parallelism_threads=1)

    from keras import backend as K
    tf.set_random_seed(1960)

    sess = tf.Session(graph=tf.get_default_graph(), config=session_conf)
    K.set_session(sess)

set_reproducible()


Using TensorFlow backend.


# Build a Keras Model

In [2]:
NC = 13

def create_model():
    from keras.models import Sequential
    from keras.layers import Dense, Activation, SimpleRNN
    from keras.constraints import max_norm

    model = Sequential()
    model.add(SimpleRNN(4, input_shape=(1, NC), batch_size=1 , 
                   activation='relu',
                   stateful=True,
                   kernel_constraint=max_norm(2.),
                   recurrent_constraint=max_norm(2.),
                   kernel_initializer = keras.initializers.Orthogonal(gain=0.1),
                   recurrent_initializer = keras.initializers.Zeros()))
    model.add(Dense(1, activation=None))
    adam_optimizer = keras.optimizers.Adam(lr=0.01, clipnorm=1.)
    model.compile(loss='mse', optimizer=adam_optimizer)
    return model



In [3]:
boston  = datasets.load_boston()
boston.data = (boston.data - boston.data.mean(axis=0)) / boston.data.std(axis=0)
boston.target = (boston.target - boston.target.mean(axis=0)) / boston.target.std(axis=0)

boston.data = boston.data[:,0:NC]

train_X, test_X, train_y, test_y = train_test_split(boston.data, boston.target, train_size=0.6, test_size=0.4, random_state=1960)
train_X = train_X.reshape(train_X.shape[0] , 1, train_X.shape[1])
test_X = test_X.reshape(test_X.shape[0] , 1, test_X.shape[1])



In [4]:
#boston.data

In [5]:
#boston.target

In [6]:

from keras.wrappers.scikit_learn import KerasRegressor

clf = KerasRegressor(build_fn=create_model, batch_size=1, epochs=6, verbose=2)

print(train_X.shape , train_y.shape)
clf.fit(train_X, train_y)

(303, 1, 13) (303,)
Epoch 1/6
 - 1s - loss: 0.3736
Epoch 2/6
 - 0s - loss: 0.2822
Epoch 3/6
 - 0s - loss: 0.2880
Epoch 4/6
 - 0s - loss: 0.2765
Epoch 5/6
 - 0s - loss: 0.2956
Epoch 6/6
 - 0s - loss: 0.3257


<keras.callbacks.History at 0x7f25fd1e3a90>

In [7]:
print(test_X.shape)
preds = clf.predict(test_X)
print(preds)


(203, 1, 13)
[-0.15400091 -0.31443807 -0.20076562 -0.15400091  0.43109361 -0.42913229
 -0.38996782 -0.15400091 -0.91000277 -0.45662413 -0.15400091 -0.51796945
 -0.06204796 -0.87303262  1.01182806 -0.60236015  1.05630171 -0.15400091
 -1.27771481  0.49412818 -0.31432003  3.23656087 -1.06049006  0.19974604
  1.30235407 -0.31813756 -0.15400091 -1.06477565 -0.15400091 -0.9142552
  0.24939548 -0.75862913 -1.27374863  0.98656451 -0.15400091  0.10889413
  0.35331937 -0.81250808 -0.26648952  0.56739454 -0.56415009 -0.15400091
 -0.36127076 -0.75369759 -0.15400091 -0.1317169   0.40734553 -0.10111318
 -1.01631935 -0.15400091 -0.15400091 -0.5953882  -0.71249294  1.98814505
 -0.72417342 -0.33617534  1.26391165 -0.24938593 -0.15400091 -0.21132226
 -0.24943465  1.12295919 -0.15400091  0.0682532  -1.22693992  1.10429868
 -0.15400091 -0.43157739 -0.63814887  0.10879875  0.01592168  3.45731391
 -0.46116141 -0.15400091  0.3804442  -0.84175394  1.7419177  -0.15400091
 -0.74250253  0.5319715  -0.47755424  0

# Generate SQL Code from the Model

In [8]:
import json, requests, base64, dill as pickle, sys



sys.setrecursionlimit(200000)
pickle.settings['recurse'] = False


def test_ws_sql_gen(pickle_data):
    WS_URL="https://sklearn2sql.herokuapp.com/model"
    b64_data = base64.b64encode(pickle_data).decode('utf-8')
    data={"Name":"model1", "PickleData":b64_data , "SQLDialect":"postgresql"}
    r = requests.post(WS_URL, json=data)
    # print(r.__dict__)
    content = r.json()
    # print(content)
    lSQL = content["model"]["SQLGenrationResult"][0]["SQL"]
    return lSQL;



In [9]:
pickle_data = pickle.dumps(clf)
lSQL = test_ws_sql_gen(pickle_data)


In [10]:
print(lSQL)

WITH RECURSIVE keras_input AS 
(SELECT "ADS"."KEY" AS "KEY", "ADS"."Feature_0" AS "Feature_0", "ADS"."Feature_1" AS "Feature_1", "ADS"."Feature_2" AS "Feature_2", "ADS"."Feature_3" AS "Feature_3", "ADS"."Feature_4" AS "Feature_4", "ADS"."Feature_5" AS "Feature_5", "ADS"."Feature_6" AS "Feature_6", "ADS"."Feature_7" AS "Feature_7", "ADS"."Feature_8" AS "Feature_8", "ADS"."Feature_9" AS "Feature_9", "ADS"."Feature_10" AS "Feature_10", "ADS"."Feature_11" AS "Feature_11", "ADS"."Feature_12" AS "Feature_12" 
FROM "INPUT_DATA" AS "ADS"), 
keras_input_1 AS 
(SELECT keras_input."KEY" AS "KEY", keras_input."Feature_0" AS "Feature_0", keras_input."Feature_1" AS "Feature_1", keras_input."Feature_2" AS "Feature_2", keras_input."Feature_3" AS "Feature_3", keras_input."Feature_4" AS "Feature_4", keras_input."Feature_5" AS "Feature_5", keras_input."Feature_6" AS "Feature_6", keras_input."Feature_7" AS "Feature_7", keras_input."Feature_8" AS "Feature_8", keras_input."Feature_9" AS "Feature_9", keras_i

# Execute the SQL Code

In [11]:
# save the dataset in a database table

import sqlalchemy as sa

#engine = sa.create_engine('sqlite://' , echo=False)
engine = sa.create_engine("postgresql://db:db@localhost/db?port=5432", echo=False)
conn = engine.connect()

lTable = pd.DataFrame(boston.data);
lTable.columns = ['Feature_' + str(x) for x in range(boston.data.shape[1]) ]
lTable['TGT'] = boston.target
lTable['KEY'] = range(boston.data.shape[0])
lTable.to_sql("INPUT_DATA" , conn,   if_exists='replace', index=False)


  """)


In [12]:
sql_output = pd.read_sql(lSQL , conn);
sql_output = sql_output.sort_values(by='KEY').reset_index(drop=True)
conn.close()


In [13]:
sql_output.sample(12, random_state=1960)

Unnamed: 0,KEY,Estimator
230,230,-0.146201
112,112,-0.375191
125,125,-0.282834
9,9,-0.154001
213,213,0.431094
109,109,-0.302007
127,127,-0.275022
244,244,-0.154001
406,406,-0.472338
490,490,-0.710604


# Keras Prediction

In [14]:
skl_output = pd.DataFrame()
skl_output_key = pd.DataFrame(list(range(boston.data.shape[0])), columns=['KEY']);
data = boston.data.reshape(boston.data.shape[0] , 1, boston.data.shape[1])
skl_out = clf.predict(data)
print(skl_out.shape)
skl_output_estimator = pd.DataFrame(skl_out, columns=['Estimator'])
skl_output['KEY'] = skl_output_key['KEY']
skl_output['Estimator'] = skl_output_estimator
skl_output.sample(12, random_state=1960)

(506,)


Unnamed: 0,KEY,Estimator
230,230,-0.146201
112,112,-0.375191
125,125,-0.282834
9,9,-0.154001
213,213,0.431094
109,109,-0.302007
127,127,-0.275022
244,244,-0.154001
406,406,-0.472338
490,490,-0.710604


# Comparing the SQL and Keras Predictions

In [15]:
sql_skl_join = skl_output.join(sql_output , how='left', on='KEY', lsuffix='_skl', rsuffix='_sql')
sql_skl_join['Error'] = sql_skl_join.Estimator_sql - sql_skl_join.Estimator_skl

In [16]:
sql_skl_join.head(12)

Unnamed: 0,KEY_skl,Estimator_skl,KEY_sql,Estimator_sql,Error
0,0,0.623391,0,0.98676,0.3633691
1,1,-0.154001,1,-0.154001,4.163336e-16
2,2,1.911148,2,1.911148,1.332268e-15
3,3,-0.001304,3,-0.001304,2.248635e-16
4,4,1.366995,4,1.366995,-2.88658e-15
5,5,-0.154001,5,-0.154001,4.163336e-16
6,6,-0.154001,6,-0.154001,4.163336e-16
7,7,-0.270652,7,-0.270652,1.665335e-16
8,8,-0.505317,8,-0.505317,2.220446e-16
9,9,-0.154001,9,-0.154001,4.163336e-16


In [17]:
sql_skl_join.describe()

Unnamed: 0,KEY_skl,Estimator_skl,KEY_sql,Estimator_sql,Error
count,506.0,506.0,506.0,506.0,506.0
mean,252.5,0.045914,252.5,0.046632,0.0007181207
std,146.213884,0.78935,146.213884,0.790041,0.01615372
min,0.0,-1.663316,0.0,-1.663316,-5.329071e-15
25%,126.25,-0.357708,126.25,-0.357708,-2.220446e-16
50%,252.5,-0.154001,252.5,-0.154001,6.938894000000001e-17
75%,378.75,0.234974,378.75,0.234974,4.163336e-16
max,505.0,3.979998,505.0,3.979998,0.3633691
