In [1]:
import os, numpy as np
import pandas as pd

os.environ["KERAS_BACKEND"] = "cntk"

import keras 
from sklearn import datasets

from sklearn.model_selection import train_test_split

from keras.utils import np_utils


def set_reproducible():
    import tensorflow as tf
    import random as rn
    import os
    os.environ['PYTHONHASHSEED'] = '1960'
    rn.seed(1960)
    np.random.seed(1960)
    session_conf = tf.ConfigProto(intra_op_parallelism_threads=1, inter_op_parallelism_threads=1)

    import keras.backend.tensorflow_backend as K
    tf.set_random_seed(1960)

    sess = tf.Session(graph=tf.get_default_graph(), config=session_conf)
    K.set_session(sess)

set_reproducible()


Using CNTK backend


# Build a Keras Model

In [2]:
NC = 13

def create_model():
    from keras.models import Sequential
    from keras.layers import Dense, Activation, GRU
    from keras.constraints import max_norm

    model = Sequential()
    model.add(GRU(1, input_shape=(1, NC), batch_size=1 , 
                  activation='relu', 
                  recurrent_activation = 'hard_sigmoid', 
                  stateful=True,                   
                  kernel_constraint=max_norm(2.),
                  recurrent_constraint=max_norm(2.),
                  kernel_initializer = keras.initializers.Orthogonal(gain=0.1),
                  recurrent_initializer = keras.initializers.Zeros()))
    model.add(Dense(1, activation=None))
    model.compile(optimizer='adam', loss='mse')
    return model

In [3]:
boston  = datasets.load_boston()
boston.data = (boston.data - boston.data.mean(axis=0)) / boston.data.std(axis=0)
boston.target = (boston.target - boston.target.mean(axis=0)) / boston.target.std(axis=0)

boston.data = boston.data[:,0:NC]
train_X, test_X, train_y, test_y = train_test_split(boston.data, boston.target, train_size=0.8, test_size=0.2, random_state=1960)
train_X = train_X.reshape(train_X.shape[0] , 1, train_X.shape[1])
test_X = test_X.reshape(test_X.shape[0] , 1, test_X.shape[1])



In [4]:
# train_X

In [5]:
#boston.target

In [6]:

from keras.wrappers.scikit_learn import KerasRegressor

clf = KerasRegressor(build_fn=create_model, batch_size=1, epochs=12, verbose=2)

print(train_X.shape , train_y.shape, train_X.dtype , train_y.dtype)
clf.fit(train_X, train_y)

(404, 1, 13) (404,) float64 float64
Epoch 1/12


  (sample.dtype, var.uid, str(var.dtype)))
  (sample.dtype, var.uid, str(var.dtype)))


 - 2s - loss: 0.8971
Epoch 2/12
 - 2s - loss: 0.7874
Epoch 3/12
 - 2s - loss: 0.6992
Epoch 4/12
 - 2s - loss: 0.6571
Epoch 5/12
 - 2s - loss: 0.6204
Epoch 6/12
 - 2s - loss: 0.5866
Epoch 7/12
 - 2s - loss: 0.5656
Epoch 8/12
 - 2s - loss: 0.5528
Epoch 9/12
 - 2s - loss: 0.5248
Epoch 10/12
 - 2s - loss: 0.5221
Epoch 11/12
 - 2s - loss: 0.4999
Epoch 12/12
 - 2s - loss: 0.4856


<keras.callbacks.History at 0x7f1433475390>

In [7]:
print(test_X.shape)
preds = clf.predict(test_X)
clf.model.reset_states()
print(preds)


(102, 1, 13)


  (sample.dtype, var.uid, str(var.dtype)))


[ 0.11034012 -0.3050962  -0.4815966  -0.12706494  0.23027366 -0.3766204
 -0.8694943  -0.27727652 -1.2002664  -1.73589    -0.60121584 -0.8080609
  0.12290171 -1.0458778   0.60166776 -0.5488049   0.52800196  0.31957176
 -1.5993172   0.48667982 -0.10700881  0.60166776 -0.28586042  0.31040034
  0.5804151   0.10597652 -0.09639454 -0.86521995  0.18639478 -1.10033
  0.19038743 -0.78571916 -1.6309439   0.60166776  0.60166776  0.26244918
  0.47280633 -0.8459916  -0.5825294   0.41974387 -0.47908854 -0.10416585
  0.1030978  -0.78593004 -0.33967787 -0.10166299  0.37457943  0.03951997
 -1.4986042  -1.1289542  -0.60561955 -0.98712313 -1.2020268   0.60166776
 -0.40189183 -0.9748821   0.48966542  0.14338806  0.05383992 -0.06214041
 -0.4063512   0.60166776  0.54024017  0.4209994  -1.3869505   0.60166776
  0.60166776 -0.22811753 -0.7419423   0.26162884  0.12705696  0.60166776
  0.20584458  0.15212286  0.11298868 -0.96350396  0.60166776  0.60166776
 -0.70939934  0.60166776 -0.2968825   0.41964313  0.2929

In [8]:
pd.DataFrame(boston.data).describe()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12
count,506.0,506.0,506.0,506.0,506.0,506.0,506.0,506.0,506.0,506.0,506.0,506.0,506.0
mean,0.0,6.319056000000001e-17,-3.145486e-15,-2.106352e-17,2.7523e-15,-1.15077e-14,-1.13743e-15,7.582867e-16,5.616939000000001e-17,5.616939000000001e-17,-1.022283e-14,8.593916e-15,-5.897786e-16
std,1.00099,1.00099,1.00099,1.00099,1.00099,1.00099,1.00099,1.00099,1.00099,1.00099,1.00099,1.00099,1.00099
min,-0.417713,-0.4877224,-1.557842,-0.2725986,-1.465882,-3.880249,-2.335437,-1.267069,-0.9828429,-1.31399,-2.707379,-3.907193,-1.531127
25%,-0.408896,-0.4877224,-0.8676906,-0.2725986,-0.9130288,-0.5686303,-0.837448,-0.8056878,-0.6379618,-0.767576,-0.4880391,0.2050715,-0.79942
50%,-0.388582,-0.4877224,-0.2110985,-0.2725986,-0.1442174,-0.1084655,0.3173816,-0.2793234,-0.5230014,-0.4646726,0.274859,0.3811865,-0.1812536
75%,0.006248,0.04877224,1.015999,-0.2725986,0.598679,0.4827678,0.9067981,0.6623709,1.661245,1.530926,0.8065758,0.433651,0.6030188
max,9.941735,3.804234,2.422565,3.668398,2.732346,3.555044,1.117494,3.960518,1.661245,1.798194,1.638828,0.4410519,3.548771


# Generate SQL Code from the Model

In [9]:
import json, requests, base64, dill as pickle, sys



sys.setrecursionlimit(200000)
pickle.settings['recurse'] = False

# no luck for the web service... pickling feature of tensorflow and/or keras objects seems not to be a priority.
# there is a lot of github issues in the two projects when I search for pickle keyword!!!.

def test_ws_sql_gen(pickle_data):
    WS_URL="http://localhost:1888/model"
    b64_data = base64.b64encode(pickle_data).decode('utf-8')
    data={"Name":"model1", "PickleData":b64_data , "SQLDialect":"postgresql"}
    r = requests.post(WS_URL, json=data)
    print(r.__dict__)
    content = r.json()
    # print(content)
    lSQL = content["model"]["SQLGenrationResult"][0]["SQL"]
    return lSQL;



def test_sql_gen(keras_regressor , metadata):
    import sklearn2sql.PyCodeGenerator as codegen
    cg1 = codegen.cAbstractCodeGenerator();
    cg1.mOptions.mActivateMaterialization = False
    lSQL = cg1.generateCodeWithMetadata(clf, metadata, dsn = None, dialect = "postgresql");
    return lSQL[1]


In [10]:
# commented .. see above
# pickle_data = pickle.dumps(clf)
# lSQL = test_ws_sql_gen(pickle_data)
# print(lSQL[0:2000])

In [11]:
lMetaData = {}
lMetaData['features'] = list(boston.feature_names)[0:NC];
lMetaData["targets"] = ['TGT']
lMetaData['primary_key'] = 'KEY'
lMetaData['table'] = 'boston'

lSQL = test_sql_gen(clf , lMetaData)


cGenerationWrapperFactory::createWrapper() <class 'keras.wrappers.scikit_learn.KerasRegressor'>
BACKEND_DIALECT postgresql
CREATING_DATABASE_BACKEND_DSN_DIALECT 1.2.7 None postgresql
KERAS_REG_GENERATE_EXPRESSION_START <class 'sqlalchemy.sql.selectable.Alias'> <class 'sklearn2sql.GenericModels.Keras.cSerializedKeras'>
{'mKerasData': <keras.wrappers.scikit_learn.KerasRegressor object at 0x7f14374d6160>, 'mFeatureNames': ['CRIM', 'ZN', 'INDUS', 'CHAS', 'NOX', 'RM', 'AGE', 'DIS', 'RAD', 'TAX', 'PTRATIO', 'B', 'LSTAT'], 'mClasses': None, 'mMode': 'Regression'}
{'build_fn': <function create_model at 0x7f14374c6598>, 'sk_params': {'batch_size': 1, 'epochs': 12, 'verbose': 2}, 'model': <keras.engine.sequential.Sequential object at 0x7f14374d60f0>}
GENERATING_LAYER 0 gru_1 <class 'keras.layers.recurrent.GRU'> {'input_spec': [InputSpec(shape=(1, None, 13), ndim=3)], 'supports_masking': True, 'stateful': True, '_trainable_weights': [], '_non_trainable_weights': [], '_losses': [], '_updates': [(P

In [12]:
print(lSQL)

WITH RECURSIVE keras_input AS 
(SELECT "ADS"."KEY" AS "KEY", "ADS"."CRIM" AS "CRIM", "ADS"."ZN" AS "ZN", "ADS"."INDUS" AS "INDUS", "ADS"."CHAS" AS "CHAS", "ADS"."NOX" AS "NOX", "ADS"."RM" AS "RM", "ADS"."AGE" AS "AGE", "ADS"."DIS" AS "DIS", "ADS"."RAD" AS "RAD", "ADS"."TAX" AS "TAX", "ADS"."PTRATIO" AS "PTRATIO", "ADS"."B" AS "B", "ADS"."LSTAT" AS "LSTAT" 
FROM boston AS "ADS"), 
keras_input_1 AS 
(SELECT keras_input."KEY" AS "KEY", keras_input."CRIM" AS "CRIM", keras_input."ZN" AS "ZN", keras_input."INDUS" AS "INDUS", keras_input."CHAS" AS "CHAS", keras_input."NOX" AS "NOX", keras_input."RM" AS "RM", keras_input."AGE" AS "AGE", keras_input."DIS" AS "DIS", keras_input."RAD" AS "RAD", keras_input."TAX" AS "TAX", keras_input."PTRATIO" AS "PTRATIO", keras_input."B" AS "B", keras_input."LSTAT" AS "LSTAT" 
FROM keras_input), 
keras_input_1_rn AS 
(SELECT row_number() OVER (ORDER BY keras_input_1."KEY" ASC) AS rn, keras_input_1."KEY" AS "KEY", keras_input_1."CRIM" AS "CRIM", keras_input_1."Z

# Execute the SQL Code

In [13]:
# save the dataset in a database table


import sqlalchemy as sa

#engine = sa.create_engine('sqlite://' , echo=False)
engine = sa.create_engine("postgresql://db:db@localhost/db?port=5432", echo=False)
conn = engine.connect()

lTable = pd.DataFrame(boston.data);
lTable.columns = lMetaData['features']
lTable['TGT'] = boston.target
lTable['KEY'] = range(boston.data.shape[0])
lTable.to_sql(lMetaData['table'] , conn,   if_exists='replace', index=False)

In [14]:
sql_output = pd.read_sql(lSQL , conn);
conn.close()

In [15]:
sql_output.sample(12, random_state=1960)

Unnamed: 0,KEY,Estimator
230,230,0.269232
112,112,-0.287135
125,125,-0.564821
9,9,-0.142387
213,213,0.279976
109,109,-0.563204
127,127,-1.021365
244,244,0.381128
406,406,-1.608217
490,490,-1.757649


# Keras Prediction

In [16]:
clf.model.reset_states()
skl_output = pd.DataFrame()
skl_output_key = pd.DataFrame(list(range(boston.data.shape[0])), columns=['KEY']);
data = boston.data.reshape(boston.data.shape[0] , 1, boston.data.shape[1])
skl_out = clf.predict(data)
print(skl_out.shape)
skl_output_estimator = pd.DataFrame(skl_out, columns=['Estimator'])
skl_output['KEY'] = skl_output_key['KEY']
skl_output['Estimator'] = skl_output_estimator
skl_output.sample(12, random_state=1960)

  (sample.dtype, var.uid, str(var.dtype)))


(506,)


Unnamed: 0,KEY,Estimator
230,230,0.269232
112,112,-0.287135
125,125,-0.564821
9,9,-0.142387
213,213,0.279976
109,109,-0.563204
127,127,-1.021365
244,244,0.381128
406,406,-1.608217
490,490,-1.757649


# Comparing the SQL and Keras Predictions

In [17]:
sql_skl_join = skl_output.join(sql_output , how='left', on='KEY', lsuffix='_skl', rsuffix='_sql')
sql_skl_join['Error'] = sql_skl_join.Estimator_sql - sql_skl_join.Estimator_skl

In [18]:
sql_skl_join.head(12)

Unnamed: 0,KEY_skl,Estimator_skl,KEY_sql,Estimator_sql,Error
0,0,0.601668,0,0.601668,-3.330669e-16
1,1,0.348602,1,0.348602,-9.423209e-09
2,2,0.551641,2,0.551641,-1.28222e-08
3,3,0.585413,3,0.585413,-7.159937e-09
4,4,0.596864,4,0.596864,-4.082609e-09
5,5,0.52471,5,0.52471,5.76802e-08
6,6,0.439505,6,0.439505,7.633757e-09
7,7,0.04241,7,0.04241,-1.810101e-08
8,8,-0.584652,8,-0.584652,4.280853e-08
9,9,-0.142387,9,-0.142387,-6.028175e-08


In [19]:
sql_skl_join.describe()

Unnamed: 0,KEY_skl,Estimator_skl,KEY_sql,Estimator_sql,Error
count,506.0,506.0,506.0,506.0,506.0
mean,252.5,-0.104061,252.5,-0.104061,2.848508e-09
std,146.213884,0.708558,146.213884,0.708558,6.393688e-08
min,0.0,-2.712357,0.0,-2.712356,-2.398577e-07
25%,126.25,-0.655868,126.25,-0.655868,-2.306943e-08
50%,252.5,0.096676,252.5,0.096676,-3.330669e-16
75%,378.75,0.579547,378.75,0.579547,2.44603e-08
max,505.0,0.601668,505.0,0.601668,4.615268e-07
