In [1]:
import os, numpy as np
import pandas as pd

os.environ["KERAS_BACKEND"] = "tensorflow"

import keras 
from sklearn import datasets

from sklearn.model_selection import train_test_split

from keras.utils import np_utils


def set_reproducible():
    import tensorflow as tf
    import random as rn
    import os
    os.environ['PYTHONHASHSEED'] = '1960'
    rn.seed(1960)
    np.random.seed(1960)
    session_conf = tf.ConfigProto(intra_op_parallelism_threads=1, inter_op_parallelism_threads=1)

    from keras import backend as K
    tf.set_random_seed(1960)

    sess = tf.Session(graph=tf.get_default_graph(), config=session_conf)
    K.set_session(sess)

set_reproducible()


Using TensorFlow backend.
  return f(*args, **kwds)


# Build a Keras Model

In [2]:
NC = 13

def create_model():
    from keras.models import Sequential
    from keras.layers import Dense, Activation, SimpleRNN

    model = Sequential()
    model.add(SimpleRNN(3, input_shape=(1, NC), batch_size=1 , stateful=True))
    model.add(Dense(1))
    model.compile(optimizer='rmsprop', loss='mse')
    return model

In [3]:
boston  = datasets.load_boston()
train_X, test_X, train_y, test_y = train_test_split(boston.data, boston.target, train_size=0.6, test_size=0.4, random_state=1960)
train_X = train_X.reshape(train_X.shape[0] , 1, train_X.shape[1])
test_X = test_X.reshape(test_X.shape[0] , 1, test_X.shape[1])



In [4]:
#boston.data

In [5]:
#boston.target

In [6]:

from keras.wrappers.scikit_learn import KerasRegressor

clf = KerasRegressor(build_fn=create_model, batch_size=1, epochs=12, verbose=2)

print(train_X.shape , train_y.shape)
clf.fit(train_X, train_y)

(303, 1, 13) (303,)
SIMPLERNNCELL_CALL_IN Tensor("simple_rnn_1/strided_slice_1:0", shape=(1, 13), dtype=float32) [<tf.Variable 'simple_rnn_1/Variable:0' shape=(1, 3) dtype=float32_ref>]
SIMPLERNNCELL_CALL_OUT Tensor("simple_rnn_1/Tanh:0", shape=(1, 3), dtype=float32) [<tf.Tensor 'simple_rnn_1/Tanh:0' shape=(1, 3) dtype=float32>]
SIMPLERNNCELL_CALL_IN Tensor("simple_rnn_1/while/TensorArrayReadV3:0", shape=(1, 13), dtype=float32) (<tf.Tensor 'simple_rnn_1/while/Identity_2:0' shape=(1, 3) dtype=float32>,)
SIMPLERNNCELL_CALL_OUT Tensor("simple_rnn_1/while/Tanh:0", shape=(1, 3), dtype=float32) [<tf.Tensor 'simple_rnn_1/while/Tanh:0' shape=(1, 3) dtype=float32>]
RNN_LAYER_CALL_IN Tensor("simple_rnn_1_input:0", shape=(1, 1, 13), dtype=float32) [<tf.Variable 'simple_rnn_1/Variable:0' shape=(1, 3) dtype=float32_ref>]
RNN_LAYER_CALL_OUT Tensor("simple_rnn_1/TensorArrayReadV3:0", shape=(1, 3), dtype=float32) (<tf.Tensor 'simple_rnn_1/while/Exit_2:0' shape=(1, 3) dtype=float32>,)
Epoch 1/12
 - 1s 

<keras.callbacks.History at 0x7f2a733e4780>

In [7]:
print(test_X.shape)
preds = clf.predict(test_X)
print(preds)


(203, 1, 13)
[11.2611065 11.2611065 11.2611065 11.2611065 11.2611065 11.2611065
 11.2611065 11.2611065 11.2611065 11.2611065 11.2611065 11.2611065
 11.2611065 11.2611065 11.2611065 11.2611065 11.2611065 11.2611065
 11.2611065 11.2611065 11.2611065 11.2611065  6.3539305 11.2611065
 11.2611065 11.2611065 11.2611065  6.3539305 11.2611065 11.2611065
 11.2611065 11.2611065  6.3539305 11.2611065 11.2611065 11.2611065
 11.2611065 11.2611065 11.2611065 11.2611065 11.2611065 11.2611065
 11.2611065  6.3539305 11.2611065 11.2611065 11.2611065 11.2611065
 11.2611065 11.2611065 11.2611065 11.2611065  6.3539305 11.2611065
 11.2611065 11.2611065 11.2611065 11.2611065 11.2611065 11.2611065
  6.3539305 11.2611065 11.2611065 11.2611065 11.2611065 11.2611065
 11.2611065 11.2611065  6.3539305 11.2611065 11.2611065 11.2611065
 11.2611065 11.2611065 11.2611065 11.2611065 11.2611065 11.2611065
 11.2611065 11.2611065 11.2611065 11.2611065 11.2611065 11.2611065
 11.2611065 11.2611065 11.2611065 11.2611065 11.2

# Generate SQL Code from the Model

In [8]:
import json, requests, base64, dill as pickle, sys



sys.setrecursionlimit(200000)
pickle.settings['recurse'] = False

# no luck for the web service... pickling feature of tensorflow and/or keras objects seems not to be a priority.
# there is a lot of github issues in the two projects when I search for pickle keyword!!!.

def test_ws_sql_gen(pickle_data):
    WS_URL="http://localhost:1888/model"
    b64_data = base64.b64encode(pickle_data).decode('utf-8')
    data={"Name":"model1", "PickleData":b64_data , "SQLDialect":"postgresql"}
    r = requests.post(WS_URL, json=data)
    print(r.__dict__)
    content = r.json()
    # print(content)
    lSQL = content["model"]["SQLGenrationResult"][0]["SQL"]
    return lSQL;



def test_sql_gen(keras_regressor , metadata):
    import sklearn2sql.PyCodeGenerator as codegen
    cg1 = codegen.cAbstractCodeGenerator();
    cg1.mOptions.mActivateMaterialization = False
    lSQL = cg1.generateCodeWithMetadata(clf, metadata, dsn = None, dialect = "postgresql");
    return lSQL[1]


In [9]:
# commented .. see above
# pickle_data = pickle.dumps(clf)
# lSQL = test_ws_sql_gen(pickle_data)
# print(lSQL[0:2000])

In [10]:
lMetaData = {}
lMetaData['features'] = list(boston.feature_names);
lMetaData["targets"] = ['TGT']
lMetaData['primary_key'] = 'KEY'
lMetaData['table'] = 'boston'

    
lSQL = test_sql_gen(clf , lMetaData)


cGenerationWrapperFactory::createWrapper() <class 'keras.wrappers.scikit_learn.KerasRegressor'>
BACKEND_DIALECT postgresql
CREATING_DATABASE_BACKEND_DSN_DIALECT 1.2.7 None postgresql
KERAS_REG_GENERATE_EXPRESSION_START <class 'sqlalchemy.sql.selectable.Alias'> <class 'sklearn2sql.GenericModels.Keras.cSerializedKeras'>
{'mKerasData': <keras.wrappers.scikit_learn.KerasRegressor object at 0x7f2a7b4df128>, 'mFeatureNames': ['CRIM', 'ZN', 'INDUS', 'CHAS', 'NOX', 'RM', 'AGE', 'DIS', 'RAD', 'TAX', 'PTRATIO', 'B', 'LSTAT'], 'mClasses': None, 'mMode': 'Regression'}
{'build_fn': <function create_model at 0x7f2a7b4d41e0>, 'sk_params': {'batch_size': 1, 'epochs': 12, 'verbose': 2}, 'model': <keras.models.Sequential object at 0x7f2a7b4df0f0>}
ABSTRACT_GENERATE_CTE keras_input keras_input 14 False False
ABSTRACT_GENERATE_SIMPLE_CTE keras_input 14
GENERATING_LAYER 0 simple_rnn_1 <class 'keras.layers.recurrent.SimpleRNN'> {'input_spec': [InputSpec(shape=(1, None, 13), ndim=3)], 'supports_masking': Tru

In [11]:
print(lSQL)

WITH RECURSIVE keras_input AS 
(SELECT "ADS"."KEY" AS "KEY", "ADS"."CRIM" AS "CRIM", "ADS"."ZN" AS "ZN", "ADS"."INDUS" AS "INDUS", "ADS"."CHAS" AS "CHAS", "ADS"."NOX" AS "NOX", "ADS"."RM" AS "RM", "ADS"."AGE" AS "AGE", "ADS"."DIS" AS "DIS", "ADS"."RAD" AS "RAD", "ADS"."TAX" AS "TAX", "ADS"."PTRATIO" AS "PTRATIO", "ADS"."B" AS "B", "ADS"."LSTAT" AS "LSTAT" 
FROM boston AS "ADS"), 
keras_input_1 AS 
(SELECT keras_input."KEY" AS "KEY", keras_input."CRIM" AS "CRIM", keras_input."ZN" AS "ZN", keras_input."INDUS" AS "INDUS", keras_input."CHAS" AS "CHAS", keras_input."NOX" AS "NOX", keras_input."RM" AS "RM", keras_input."AGE" AS "AGE", keras_input."DIS" AS "DIS", keras_input."RAD" AS "RAD", keras_input."TAX" AS "TAX", keras_input."PTRATIO" AS "PTRATIO", keras_input."B" AS "B", keras_input."LSTAT" AS "LSTAT" 
FROM keras_input), 
keras_input_1_rn AS 
(SELECT row_number() OVER (ORDER BY keras_input_1."KEY" ASC) AS rn, keras_input_1."KEY" AS "KEY", keras_input_1."CRIM" AS "CRIM", keras_input_1."Z

# Execute the SQL Code

In [12]:
# save the dataset in a database table


import sqlalchemy as sa

#engine = sa.create_engine('sqlite://' , echo=False)
engine = sa.create_engine("postgresql://db:db@localhost/db?port=5432", echo=False)
conn = engine.connect()

lTable = pd.DataFrame(boston.data);
lTable.columns = lMetaData['features']
lTable['TGT'] = boston.target
lTable['KEY'] = range(boston.data.shape[0])
lTable.to_sql(lMetaData['table'] , conn,   if_exists='replace', index=False)

In [13]:
sql_output = pd.read_sql(lSQL , conn);

In [14]:
sql_output.sample(12, random_state=1960)

Unnamed: 0,KEY,Estimator
230,230,11.261106
112,112,11.261106
125,125,11.261106
9,9,11.261106
213,213,11.261106
109,109,11.261106
127,127,11.261106
244,244,11.261106
406,406,11.261106
490,490,11.261106


# Keras Prediction

In [15]:
skl_output = pd.DataFrame()
skl_output_key = pd.DataFrame(list(range(boston.data.shape[0])), columns=['KEY']);
data = boston.data.reshape(boston.data.shape[0] , 1, boston.data.shape[1])
skl_out = clf.predict(data)
print(skl_out.shape)
skl_output_estimator = pd.DataFrame(skl_out, columns=['Estimator'])
skl_output['KEY'] = skl_output_key['KEY']
skl_output['Estimator'] = skl_output_estimator
skl_output.sample(12, random_state=1960)

(506,)


Unnamed: 0,KEY,Estimator
230,230,11.261106
112,112,11.261106
125,125,11.261106
9,9,11.261106
213,213,11.261106
109,109,11.261106
127,127,11.261106
244,244,11.261106
406,406,11.261106
490,490,11.261106


# Comparing the SQL and Keras Predictions

In [16]:
sql_skl_join = skl_output.join(sql_output , how='left', on='KEY', lsuffix='_skl', rsuffix='_sql')
sql_skl_join['Error'] = sql_skl_join.Estimator_sql - sql_skl_join.Estimator_skl

In [17]:
sql_skl_join.head(12)

Unnamed: 0,KEY_skl,Estimator_skl,KEY_sql,Estimator_sql,Error
0,0,11.261106,0,11.261106,-4.768372e-07
1,1,11.261106,1,11.261106,-4.768372e-07
2,2,11.261106,2,11.261106,-4.768372e-07
3,3,11.261106,3,11.261106,-4.768372e-07
4,4,11.261106,4,11.261106,-4.768372e-07
5,5,11.261106,5,11.261106,-4.768372e-07
6,6,11.261106,6,11.261106,-4.768372e-07
7,7,11.261106,7,11.261106,-4.768372e-07
8,8,11.261106,8,11.261106,-4.768372e-07
9,9,11.261106,9,11.261106,-4.768372e-07


In [18]:
sql_skl_join.describe()

Unnamed: 0,KEY_skl,Estimator_skl,KEY_sql,Estimator_sql,Error
count,506.0,506.0,506.0,506.0,506.0
mean,252.5,10.890812,252.5,10.89081,-4.366978e-07
std,146.213884,1.294657,146.213884,1.294657,1.628615e-07
min,0.0,6.35393,0.0,6.35393,-6.318834e-07
25%,126.25,11.261106,126.25,11.261106,-4.768372e-07
50%,252.5,11.261106,252.5,11.261106,-4.768372e-07
75%,378.75,11.261106,378.75,11.261106,-4.768372e-07
max,505.0,11.261106,505.0,11.261106,1.887935e-06
