In [1]:
import os, numpy as np
import pandas as pd

os.environ["KERAS_BACKEND"] = "tensorflow"

import keras 
from sklearn import datasets

from sklearn.model_selection import train_test_split

from keras.utils import np_utils


def set_reproducible():
    import tensorflow as tf
    import random as rn
    import os
    os.environ['PYTHONHASHSEED'] = '1960'
    rn.seed(1960)
    np.random.seed(1960)
    session_conf = tf.ConfigProto(intra_op_parallelism_threads=1, inter_op_parallelism_threads=1)

    from keras import backend as K
    tf.set_random_seed(1960)

    sess = tf.Session(graph=tf.get_default_graph(), config=session_conf)
    K.set_session(sess)

set_reproducible()


Using TensorFlow backend.


# Build a Keras Model

In [2]:
NC = 13

def create_model():
    from keras.models import Sequential
    from keras.layers import Dense, Activation, SimpleRNN
    from keras.constraints import max_norm

    model = Sequential()
    model.add(SimpleRNN(1, input_shape=(1, NC), batch_size=1 , 
                   activation='relu',
                   stateful=True,
                   kernel_constraint=max_norm(2.),
                   recurrent_constraint=max_norm(2.),
                   kernel_initializer = keras.initializers.Orthogonal(gain=0.1),
                   recurrent_initializer = keras.initializers.Zeros()))
    model.add(Dense(1, activation=None))
    adam_optimizer = keras.optimizers.Adam(lr=0.01, clipnorm=1.)
    model.compile(loss='mse', optimizer=adam_optimizer)
    return model



In [3]:
boston  = datasets.load_boston()
boston.data = (boston.data - boston.data.mean(axis=0)) / boston.data.std(axis=0)
boston.target = (boston.target - boston.target.mean(axis=0)) / boston.target.std(axis=0)

boston.data = boston.data[:,0:NC]

train_X, test_X, train_y, test_y = train_test_split(boston.data, boston.target, train_size=0.6, test_size=0.4, random_state=1960)
train_X = train_X.reshape(train_X.shape[0] , 1, train_X.shape[1])
test_X = test_X.reshape(test_X.shape[0] , 1, test_X.shape[1])



In [4]:
#boston.data

In [5]:
#boston.target

In [6]:

from keras.wrappers.scikit_learn import KerasRegressor

clf = KerasRegressor(build_fn=create_model, batch_size=1, epochs=12, verbose=2)

print(train_X.shape , train_y.shape)
clf.fit(train_X, train_y)

(303, 1, 13) (303,)
Epoch 1/12
 - 1s - loss: 0.5270
Epoch 2/12
 - 1s - loss: 0.3467
Epoch 3/12
 - 1s - loss: 0.3287
Epoch 4/12
 - 1s - loss: 0.3141
Epoch 5/12
 - 1s - loss: 0.3071
Epoch 6/12
 - 1s - loss: 0.2918
Epoch 7/12
 - 1s - loss: 0.3009
Epoch 8/12
 - 1s - loss: 0.2988
Epoch 9/12
 - 1s - loss: 0.2864
Epoch 10/12
 - 1s - loss: 0.3066
Epoch 11/12
 - 1s - loss: 0.3146
Epoch 12/12
 - 1s - loss: 0.3220


<keras.callbacks.History at 0x7f7939340e80>

In [7]:
print(test_X.shape)
preds = clf.predict(test_X)
print(preds)


(203, 1, 13)
[ 1.57322466e-01 -3.55138779e-01  3.30862403e-02 -3.06719005e-01
  5.45046866e-01 -4.09224093e-01 -6.38198018e-01 -5.64940333e-01
 -8.75415742e-01 -8.75415742e-01 -6.94310725e-01 -4.53889102e-01
  2.68777192e-01 -8.75415742e-01  1.29493308e+00 -7.32994914e-01
  1.41234589e+00 -1.15855753e-01 -8.75415742e-01  7.73220837e-01
 -1.23791695e-01  2.02671361e+00 -8.75415742e-01  7.82700598e-01
  9.02883112e-01 -3.65100026e-01 -1.14661038e-01 -8.75415742e-01
  2.77620733e-01 -8.75415742e-01  6.09988272e-01 -8.26449454e-01
 -8.75415742e-01  1.59117198e+00  2.15869725e-01  2.27343261e-01
  5.73326886e-01 -8.75415742e-01 -1.69420123e-01  7.23636568e-01
 -7.19867229e-01  2.39722669e-01  7.43675232e-03 -8.75415742e-01
 -2.60159492e-01  7.24387765e-02  5.60655177e-01  2.41226494e-01
 -8.75415742e-01 -7.42264688e-01 -5.97059131e-01 -7.29745567e-01
 -8.75415742e-01  2.10223198e+00 -5.30124545e-01 -8.75415742e-01
  1.24317360e+00 -1.80245280e-01 -1.91910744e-01 -6.32881939e-01
 -5.36063552

# Generate SQL Code from the Model

In [8]:
import json, requests, base64, dill as pickle, sys



sys.setrecursionlimit(200000)
pickle.settings['recurse'] = False

# no luck for the web service... pickling feature of tensorflow and/or keras objects seems not to be a priority.
# there is a lot of github issues in the two projects when I search for pickle keyword!!!.

def test_ws_sql_gen(pickle_data):
    WS_URL="http://localhost:1888/model"
    b64_data = base64.b64encode(pickle_data).decode('utf-8')
    data={"Name":"model1", "PickleData":b64_data , "SQLDialect":"postgresql"}
    r = requests.post(WS_URL, json=data)
    print(r.__dict__)
    content = r.json()
    # print(content)
    lSQL = content["model"]["SQLGenrationResult"][0]["SQL"]
    return lSQL;



def test_sql_gen(keras_regressor , metadata):
    import sklearn2sql.PyCodeGenerator as codegen
    cg1 = codegen.cAbstractCodeGenerator();
    cg1.mOptions.mActivateMaterialization = False
    lSQL = cg1.generateCodeWithMetadata(clf, metadata, dsn = None, dialect = "postgresql");
    return lSQL[1]


In [9]:
# commented .. see above
# pickle_data = pickle.dumps(clf)
# lSQL = test_ws_sql_gen(pickle_data)
# print(lSQL[0:2000])

In [10]:
lMetaData = {}
lMetaData['features'] = list(boston.feature_names);
lMetaData["targets"] = ['TGT']
lMetaData['primary_key'] = 'KEY'
lMetaData['table'] = 'boston'

    
lSQL = test_sql_gen(clf , lMetaData)


cGenerationWrapperFactory::createWrapper() <class 'keras.wrappers.scikit_learn.KerasRegressor'>
BACKEND_DIALECT postgresql
CREATING_DATABASE_BACKEND_DSN_DIALECT 1.2.7 None postgresql
KERAS_REG_GENERATE_EXPRESSION_START <class 'sqlalchemy.sql.selectable.Alias'> <class 'sklearn2sql.GenericModels.Keras.cSerializedKeras'>
{'mKerasData': <keras.wrappers.scikit_learn.KerasRegressor object at 0x7f793f6ee630>, 'mFeatureNames': ['CRIM', 'ZN', 'INDUS', 'CHAS', 'NOX', 'RM', 'AGE', 'DIS', 'RAD', 'TAX', 'PTRATIO', 'B', 'LSTAT'], 'mClasses': None, 'mMode': 'Regression'}
{'build_fn': <function create_model at 0x7f793f6d4950>, 'sk_params': {'batch_size': 1, 'epochs': 12, 'verbose': 2}, 'model': <keras.engine.sequential.Sequential object at 0x7f793f6ee6a0>}
GENERATING_LAYER 0 simple_rnn_1 <class 'keras.layers.recurrent.SimpleRNN'> {'input_spec': [InputSpec(shape=(1, None, 13), ndim=3)], 'supports_masking': True, 'stateful': True, '_trainable_weights': [], '_non_trainable_weights': [], '_losses': [], '_

In [11]:
print(lSQL)

WITH RECURSIVE keras_input AS 
(SELECT "ADS"."KEY" AS "KEY", "ADS"."CRIM" AS "CRIM", "ADS"."ZN" AS "ZN", "ADS"."INDUS" AS "INDUS", "ADS"."CHAS" AS "CHAS", "ADS"."NOX" AS "NOX", "ADS"."RM" AS "RM", "ADS"."AGE" AS "AGE", "ADS"."DIS" AS "DIS", "ADS"."RAD" AS "RAD", "ADS"."TAX" AS "TAX", "ADS"."PTRATIO" AS "PTRATIO", "ADS"."B" AS "B", "ADS"."LSTAT" AS "LSTAT" 
FROM boston AS "ADS"), 
keras_input_1 AS 
(SELECT keras_input."KEY" AS "KEY", keras_input."CRIM" AS "CRIM", keras_input."ZN" AS "ZN", keras_input."INDUS" AS "INDUS", keras_input."CHAS" AS "CHAS", keras_input."NOX" AS "NOX", keras_input."RM" AS "RM", keras_input."AGE" AS "AGE", keras_input."DIS" AS "DIS", keras_input."RAD" AS "RAD", keras_input."TAX" AS "TAX", keras_input."PTRATIO" AS "PTRATIO", keras_input."B" AS "B", keras_input."LSTAT" AS "LSTAT" 
FROM keras_input), 
keras_input_1_rn AS 
(SELECT row_number() OVER (ORDER BY keras_input_1."KEY" ASC) AS rn, keras_input_1."KEY" AS "KEY", keras_input_1."CRIM" AS "CRIM", keras_input_1."Z

# Execute the SQL Code

In [12]:
# save the dataset in a database table


import sqlalchemy as sa

#engine = sa.create_engine('sqlite://' , echo=False)
engine = sa.create_engine("postgresql://db:db@localhost/db?port=5432", echo=False)
conn = engine.connect()

lTable = pd.DataFrame(boston.data);
lTable.columns = lMetaData['features']
lTable['TGT'] = boston.target
lTable['KEY'] = range(boston.data.shape[0])
lTable.to_sql(lMetaData['table'] , conn,   if_exists='replace', index=False)

In [13]:
sql_output = pd.read_sql(lSQL , conn);

In [14]:
sql_output.sample(12, random_state=1960)

Unnamed: 0,KEY,Estimator
230,230,-0.088796
112,112,-0.428158
125,125,-0.00888
9,9,-0.121876
213,213,0.459976
109,109,-0.306147
127,127,-0.543347
244,244,-0.828347
406,406,-0.875416
490,490,-0.875416


# Keras Prediction

In [15]:
skl_output = pd.DataFrame()
skl_output_key = pd.DataFrame(list(range(boston.data.shape[0])), columns=['KEY']);
data = boston.data.reshape(boston.data.shape[0] , 1, boston.data.shape[1])
skl_out = clf.predict(data)
print(skl_out.shape)
skl_output_estimator = pd.DataFrame(skl_out, columns=['Estimator'])
skl_output['KEY'] = skl_output_key['KEY']
skl_output['Estimator'] = skl_output_estimator
skl_output.sample(12, random_state=1960)

(506,)


Unnamed: 0,KEY,Estimator
230,230,-0.088796
112,112,-0.428158
125,125,-0.00888
9,9,-0.121876
213,213,0.459976
109,109,-0.306147
127,127,-0.543347
244,244,-0.828347
406,406,-0.875416
490,490,-0.875416


# Comparing the SQL and Keras Predictions

In [16]:
sql_skl_join = skl_output.join(sql_output , how='left', on='KEY', lsuffix='_skl', rsuffix='_sql')
sql_skl_join['Error'] = sql_skl_join.Estimator_sql - sql_skl_join.Estimator_skl

In [17]:
sql_skl_join.head(12)

Unnamed: 0,KEY_skl,Estimator_skl,KEY_sql,Estimator_sql,Error
0,0,0.929029,0,0.929029,-5.378071e-08
1,1,0.113459,1,0.113459,1.334226e-08
2,2,1.091642,2,1.091641,-2.715767e-07
3,3,0.755365,3,0.755365,1.021069e-07
4,4,0.791832,4,0.791832,3.515764e-08
5,5,0.270771,5,0.270771,1.105575e-09
6,6,0.023396,6,0.023396,1.382971e-07
7,7,-0.25359,7,-0.25359,-2.313976e-08
8,8,-0.875416,8,-0.875416,3.330669e-16
9,9,-0.121876,9,-0.121876,-1.845071e-08


In [18]:
sql_skl_join.describe()

Unnamed: 0,KEY_skl,Estimator_skl,KEY_sql,Estimator_sql,Error
count,506.0,506.0,506.0,506.0,506.0
mean,252.5,0.029686,252.5,0.029686,1.385797e-08
std,146.213884,0.728562,146.213884,0.728562,7.817812e-08
min,0.0,-0.875416,0.0,-0.875416,-2.715767e-07
25%,126.25,-0.591632,126.25,-0.591632,-1.805192e-08
50%,252.5,-0.036797,252.5,-0.036797,3.330669e-16
75%,378.75,0.521485,378.75,0.521485,4.65641e-08
max,505.0,2.213554,505.0,2.213554,6.02415e-07
