In [1]:
import os, numpy as np
import pandas as pd

os.environ["KERAS_BACKEND"] = "tensorflow"

import keras 
from sklearn import datasets

from sklearn.model_selection import train_test_split

from keras.utils import np_utils


def set_reproducible():
    import tensorflow as tf
    import random as rn
    import os
    os.environ['PYTHONHASHSEED'] = '1960'
    rn.seed(1960)
    np.random.seed(1960)
    session_conf = tf.ConfigProto(intra_op_parallelism_threads=1, inter_op_parallelism_threads=1)

    from keras import backend as K
    tf.set_random_seed(1960)

    sess = tf.Session(graph=tf.get_default_graph(), config=session_conf)
    K.set_session(sess)

set_reproducible()


Using TensorFlow backend.


# Build a Keras Model

In [2]:
NC = 13

def create_model():
    from keras.models import Sequential
    from keras.layers import Dense, Activation, SimpleRNN
    from keras.constraints import max_norm

    model = Sequential()
    model.add(SimpleRNN(4, input_shape=(1, NC), batch_size=1 , 
                   activation='relu',
                   stateful=True,
                   kernel_constraint=max_norm(2.),
                   recurrent_constraint=max_norm(2.),
                   kernel_initializer = keras.initializers.Orthogonal(gain=0.1),
                   recurrent_initializer = keras.initializers.Zeros()))
    model.add(Dense(1, activation=None))
    adam_optimizer = keras.optimizers.Adam(lr=0.01, clipnorm=1.)
    model.compile(loss='mse', optimizer=adam_optimizer)
    return model



In [3]:
boston  = datasets.load_boston()
boston.data = (boston.data - boston.data.mean(axis=0)) / boston.data.std(axis=0)
boston.target = (boston.target - boston.target.mean(axis=0)) / boston.target.std(axis=0)

boston.data = boston.data[:,0:NC]

train_X, test_X, train_y, test_y = train_test_split(boston.data, boston.target, train_size=0.6, test_size=0.4, random_state=1960)
train_X = train_X.reshape(train_X.shape[0] , 1, train_X.shape[1])
test_X = test_X.reshape(test_X.shape[0] , 1, test_X.shape[1])



In [4]:
#boston.data

In [5]:
#boston.target

In [6]:

from keras.wrappers.scikit_learn import KerasRegressor

clf = KerasRegressor(build_fn=create_model, batch_size=1, epochs=12, verbose=2)

print(train_X.shape , train_y.shape)
clf.fit(train_X, train_y)

(303, 1, 13) (303,)
Epoch 1/12
 - 1s - loss: 0.4711
Epoch 2/12
 - 1s - loss: 0.3619
Epoch 3/12
 - 1s - loss: 0.3652
Epoch 4/12
 - 1s - loss: 0.3253
Epoch 5/12
 - 1s - loss: 0.3464
Epoch 6/12
 - 1s - loss: 0.2967
Epoch 7/12
 - 1s - loss: 0.3313
Epoch 8/12
 - 1s - loss: 0.2875
Epoch 9/12
 - 1s - loss: 0.3359
Epoch 10/12
 - 1s - loss: 0.3890
Epoch 11/12
 - 1s - loss: 0.3626
Epoch 12/12
 - 1s - loss: 0.3147


<keras.callbacks.History at 0x7ff1e0f80e80>

In [7]:
print(test_X.shape)
preds = clf.predict(test_X)
print(preds)


(203, 1, 13)
[-1.06120348e-01 -4.68807876e-01 -4.19324756e-01 -4.59029377e-01
  2.38383055e-01 -4.70850110e-01 -6.76228285e-01 -4.40464437e-01
 -1.31780505e+00 -5.03729522e-01 -5.03729522e-01 -6.33671165e-01
 -1.28899515e-02 -1.23498559e+00  1.25148702e+00 -2.08488584e-01
  1.36989331e+00  6.90436542e-01 -5.98816991e-01  5.51429927e-01
 -2.62875915e-01  2.03246975e+00 -5.30728638e-01  1.37380576e+00
  1.46294975e+00 -3.68447244e-01 -2.26225793e-01 -1.64742470e+00
  4.29284513e-01 -1.16125429e+00  9.77003872e-01 -7.53580868e-01
 -5.03729522e-01  1.42702079e+00  4.21265841e-01  1.23010516e-01
  4.62387621e-01 -8.37476552e-01 -9.24494386e-01  7.37173975e-01
 -4.90710199e-01 -1.69981688e-01 -6.16247833e-01 -7.21296251e-01
 -3.55155647e-01 -4.75190878e-02  3.66675198e-01 -1.81154400e-01
 -1.55375600e+00 -3.06081057e-01 -3.17018002e-01 -6.35816753e-01
 -1.70518827e+00  1.57954049e+00  3.34363043e-01 -5.03729522e-01
  1.16390085e+00  1.02061152e-01  8.46289396e-02 -1.88797712e-02
 -1.32567495

# Generate SQL Code from the Model

In [8]:
import json, requests, base64, dill as pickle, sys



sys.setrecursionlimit(200000)
pickle.settings['recurse'] = False

# no luck for the web service... pickling feature of tensorflow and/or keras objects seems not to be a priority.
# there is a lot of github issues in the two projects when I search for pickle keyword!!!.

def test_ws_sql_gen(pickle_data):
    WS_URL="http://localhost:1888/model"
    b64_data = base64.b64encode(pickle_data).decode('utf-8')
    data={"Name":"model1", "PickleData":b64_data , "SQLDialect":"postgresql"}
    r = requests.post(WS_URL, json=data)
    print(r.__dict__)
    content = r.json()
    # print(content)
    lSQL = content["model"]["SQLGenrationResult"][0]["SQL"]
    return lSQL;



def test_sql_gen(keras_regressor , metadata):
    import sklearn2sql.PyCodeGenerator as codegen
    cg1 = codegen.cAbstractCodeGenerator();
    cg1.mOptions.mActivateMaterialization = False
    lSQL = cg1.generateCodeWithMetadata(clf, metadata, dsn = None, dialect = "postgresql");
    return lSQL[1]


In [9]:
# commented .. see above
# pickle_data = pickle.dumps(clf)
# lSQL = test_ws_sql_gen(pickle_data)
# print(lSQL[0:2000])

In [10]:
lMetaData = {}
lMetaData['features'] = list(boston.feature_names);
lMetaData["targets"] = ['TGT']
lMetaData['primary_key'] = 'KEY'
lMetaData['table'] = 'boston'

    
lSQL = test_sql_gen(clf , lMetaData)


cGenerationWrapperFactory::createWrapper() <class 'keras.wrappers.scikit_learn.KerasRegressor'>
BACKEND_DIALECT postgresql
CREATING_DATABASE_BACKEND_DSN_DIALECT 1.2.7 None postgresql
KERAS_REG_GENERATE_EXPRESSION_START <class 'sqlalchemy.sql.selectable.Alias'> <class 'sklearn2sql.GenericModels.Keras.cSerializedKeras'>
{'mKerasData': <keras.wrappers.scikit_learn.KerasRegressor object at 0x7ff1e73286a0>, 'mFeatureNames': ['CRIM', 'ZN', 'INDUS', 'CHAS', 'NOX', 'RM', 'AGE', 'DIS', 'RAD', 'TAX', 'PTRATIO', 'B', 'LSTAT'], 'mClasses': None, 'mMode': 'Regression'}
{'build_fn': <function create_model at 0x7ff1e730f9d8>, 'sk_params': {'batch_size': 1, 'epochs': 12, 'verbose': 2}, 'model': <keras.engine.sequential.Sequential object at 0x7ff1e7328710>}
GENERATING_LAYER 0 simple_rnn_1 <class 'keras.layers.recurrent.SimpleRNN'> {'input_spec': [InputSpec(shape=(1, None, 13), ndim=3)], 'supports_masking': True, 'stateful': True, '_trainable_weights': [], '_non_trainable_weights': [], '_losses': [], '_

In [11]:
print(lSQL)

WITH RECURSIVE keras_input AS 
(SELECT "ADS"."KEY" AS "KEY", "ADS"."CRIM" AS "CRIM", "ADS"."ZN" AS "ZN", "ADS"."INDUS" AS "INDUS", "ADS"."CHAS" AS "CHAS", "ADS"."NOX" AS "NOX", "ADS"."RM" AS "RM", "ADS"."AGE" AS "AGE", "ADS"."DIS" AS "DIS", "ADS"."RAD" AS "RAD", "ADS"."TAX" AS "TAX", "ADS"."PTRATIO" AS "PTRATIO", "ADS"."B" AS "B", "ADS"."LSTAT" AS "LSTAT" 
FROM boston AS "ADS"), 
keras_input_1 AS 
(SELECT keras_input."KEY" AS "KEY", keras_input."CRIM" AS "CRIM", keras_input."ZN" AS "ZN", keras_input."INDUS" AS "INDUS", keras_input."CHAS" AS "CHAS", keras_input."NOX" AS "NOX", keras_input."RM" AS "RM", keras_input."AGE" AS "AGE", keras_input."DIS" AS "DIS", keras_input."RAD" AS "RAD", keras_input."TAX" AS "TAX", keras_input."PTRATIO" AS "PTRATIO", keras_input."B" AS "B", keras_input."LSTAT" AS "LSTAT" 
FROM keras_input), 
keras_input_1_rn AS 
(SELECT row_number() OVER (ORDER BY keras_input_1."KEY" ASC) AS rn, keras_input_1."KEY" AS "KEY", keras_input_1."CRIM" AS "CRIM", keras_input_1."Z

# Execute the SQL Code

In [12]:
# save the dataset in a database table


import sqlalchemy as sa

#engine = sa.create_engine('sqlite://' , echo=False)
engine = sa.create_engine("postgresql://db:db@localhost/db?port=5432", echo=False)
conn = engine.connect()

lTable = pd.DataFrame(boston.data);
lTable.columns = lMetaData['features']
lTable['TGT'] = boston.target
lTable['KEY'] = range(boston.data.shape[0])
lTable.to_sql(lMetaData['table'] , conn,   if_exists='replace', index=False)

In [13]:
sql_output = pd.read_sql(lSQL , conn);

In [14]:
sql_output.sample(12, random_state=1960)

Unnamed: 0,KEY,Estimator
230,230,-0.21893
112,112,-0.444235
125,125,-0.44498
9,9,-0.266716
213,213,0.286313
109,109,-0.50373
127,127,-0.494083
244,244,-0.50373
406,406,-0.50373
490,490,-1.36276


# Keras Prediction

In [15]:
skl_output = pd.DataFrame()
skl_output_key = pd.DataFrame(list(range(boston.data.shape[0])), columns=['KEY']);
data = boston.data.reshape(boston.data.shape[0] , 1, boston.data.shape[1])
skl_out = clf.predict(data)
print(skl_out.shape)
skl_output_estimator = pd.DataFrame(skl_out, columns=['Estimator'])
skl_output['KEY'] = skl_output_key['KEY']
skl_output['Estimator'] = skl_output_estimator
skl_output.sample(12, random_state=1960)

(506,)


Unnamed: 0,KEY,Estimator
230,230,-0.21893
112,112,-0.444235
125,125,-0.44498
9,9,-0.266716
213,213,0.286313
109,109,-0.50373
127,127,-0.494083
244,244,-0.50373
406,406,-0.50373
490,490,-1.362759


# Comparing the SQL and Keras Predictions

In [16]:
sql_skl_join = skl_output.join(sql_output , how='left', on='KEY', lsuffix='_skl', rsuffix='_sql')
sql_skl_join['Error'] = sql_skl_join.Estimator_sql - sql_skl_join.Estimator_skl

In [17]:
sql_skl_join.head(12)

Unnamed: 0,KEY_skl,Estimator_skl,KEY_sql,Estimator_sql,Error
0,0,0.62485,0,0.62485,-1.025512e-07
1,1,0.084729,1,0.084729,3.295648e-08
2,2,0.74835,2,0.74835,-5.327277e-08
3,3,1.26423,3,1.26423,-4.284777e-08
4,4,0.392285,4,0.392285,-4.750618e-08
5,5,0.349176,5,0.349176,-6.075341e-08
6,6,-0.190891,6,-0.190891,1.029333e-08
7,7,-0.664848,7,-0.664848,1.62148e-08
8,8,-0.817493,8,-0.817493,-4.275059e-08
9,9,-0.266716,9,-0.266716,-7.976741e-09


In [18]:
sql_skl_join.describe()

Unnamed: 0,KEY_skl,Estimator_skl,KEY_sql,Estimator_sql,Error
count,506.0,506.0,506.0,506.0,506.0
mean,252.5,-0.134431,252.5,-0.134431,1.847776e-09
std,146.213884,0.800508,146.213884,0.800508,1.981533e-07
min,0.0,-2.341866,0.0,-2.341865,-2.092728e-06
25%,126.25,-0.50373,126.25,-0.50373,-3.220922e-08
50%,252.5,-0.261804,252.5,-0.261804,0.0
75%,378.75,0.239133,378.75,0.239133,3.512225e-08
max,505.0,3.058708,505.0,3.058708,8.854821e-07
