In [1]:
import os, numpy as np
import pandas as pd


from sklearn import datasets

from sklearn.model_selection import train_test_split

import torch
from torch import nn
import torch.nn.functional as F

import skorch
from skorch import NeuralNetClassifier

torch.manual_seed(1960)

torch.set_default_tensor_type('torch.DoubleTensor')


# Build a PyTorch Model

In [2]:
# for the moment, only experimenting with pytorch sequential models, 
# This is a limitation and we will try to see what can be made to make
# recurrent layers and convolutions usable inside sequential models.
# Functional models (with custom forward methods) (are not/will not be) supported anyway.


# This is a toy regression model with one hidden layer, a dropout, a relu and softmax.
def create_model():
    hidden_units = 15
    num_classes = 3
    num_inputs = 4
    model = nn.Sequential(
        nn.Linear(num_inputs, hidden_units),
        nn.ReLU(),
        nn.Dropout(),
        nn.Linear(hidden_units , num_classes),
        nn.Softmax())

    return model



In [3]:
iris  = datasets.load_iris()
train_X, test_X, train_y, test_y = train_test_split(iris.data, iris.target, train_size=0.8, test_size=0.2, random_state=1960)


In [4]:

net = skorch.NeuralNetClassifier(
    create_model(),
    optimizer=torch.optim.Adam,
    max_epochs=10,
)


print(train_X.shape , train_y.shape)
net.fit(train_X, train_y)

(120, 4) (120,)
  epoch    train_loss    valid_acc    valid_loss     dur
-------  ------------  -----------  ------------  ------
      1        [36m1.5288[0m       [32m0.4400[0m        [35m1.1225[0m  0.0088
      2        1.6762       [32m0.6800[0m        [35m1.0600[0m  0.0036
      3        [36m1.3265[0m       0.6800        [35m1.0128[0m  0.0037
      4        [36m1.2542[0m       0.6800        [35m0.9807[0m  0.0036
      5        [36m1.1924[0m       0.6800        [35m0.9628[0m  0.0034
      6        [36m1.1623[0m       0.6800        [35m0.9529[0m  0.0033
      7        [36m1.1261[0m       0.6800        [35m0.9481[0m  0.0032
      8        [36m0.9808[0m       0.4400        [35m0.9450[0m  0.0034
      9        1.0291       0.6000        [35m0.9421[0m  0.0033
     10        [36m0.9788[0m       0.6400        [35m0.9381[0m  0.0034


  input = module(input)


<class 'skorch.classifier.NeuralNetClassifier'>[initialized](
  module_=Sequential(
    (0): Linear(in_features=4, out_features=15, bias=True)
    (1): ReLU()
    (2): Dropout(p=0.5)
    (3): Linear(in_features=15, out_features=3, bias=True)
    (4): Softmax()
  ),
)

In [5]:
print(net.__dict__)

{'module': Sequential(
  (0): Linear(in_features=4, out_features=15, bias=True)
  (1): ReLU()
  (2): Dropout(p=0.5)
  (3): Linear(in_features=15, out_features=3, bias=True)
  (4): Softmax()
), 'criterion': <class 'torch.nn.modules.loss.NLLLoss'>, 'optimizer': <class 'torch.optim.adam.Adam'>, 'lr': 0.01, 'max_epochs': 10, 'batch_size': 128, 'iterator_train': <class 'torch.utils.data.dataloader.DataLoader'>, 'iterator_valid': <class 'torch.utils.data.dataloader.DataLoader'>, 'dataset': <class 'skorch.dataset.Dataset'>, 'train_split': <skorch.dataset.CVSplit object at 0x7fb3985b9630>, 'callbacks': None, 'warm_start': False, 'verbose': 1, 'device': 'cpu', 'history': [{'batches': [{'train_loss': 1.528769080349241, 'train_batch_size': 95}, {'valid_loss': 1.1224694415289365, 'valid_batch_size': 25}], 'epoch': 1, 'dur': 0.008803844451904297, 'train_loss': 1.528769080349241, 'train_loss_best': True, 'valid_loss': 1.1224694415289365, 'valid_loss_best': True, 'valid_acc': 0.44, 'valid_acc_best': 

In [6]:
print(test_X.shape)
preds = net.predict_proba(test_X[0,:].reshape(1,4))
print(preds)


(30, 4)
[[0.18185966 0.45338441 0.36475593]]


# Generate SQL Code from the Model

In [7]:
import json, requests, base64, pickle, sys

sys.setrecursionlimit(200000)

# Pickle the model and send it to the SQL generation web service
# Get back the  SQL code.
def test_ws_sql_gen(pickle_data):
    WS_URL="http://localhost:1888/model"
    b64_data = base64.b64encode(pickle_data).decode('utf-8')
    data={"Name":"model1", "PickleData":b64_data , "SQLDialect":"postgresql"}
    r = requests.post(WS_URL, json=data)
    # print(r.__dict__)
    content = r.json()
    # print(content)
    lSQL = content["model"]["SQLGenrationResult"][0]["SQL"]
    return lSQL;


In [8]:
pickle_data = pickle.dumps(net)
lSQL = test_ws_sql_gen(pickle_data)


In [9]:
print(lSQL)


WITH pytorch_input AS 
(SELECT "ADS"."KEY" AS "KEY", "ADS"."Feature_0" AS "Feature_0", "ADS"."Feature_1" AS "Feature_1", "ADS"."Feature_2" AS "Feature_2", "ADS"."Feature_3" AS "Feature_3" 
FROM "INPUT_DATA" AS "ADS"), 
pytorch_input_1 AS 
(SELECT pytorch_input."KEY" AS "KEY", pytorch_input."Feature_0" AS "Feature_0", pytorch_input."Feature_1" AS "Feature_1", pytorch_input."Feature_2" AS "Feature_2", pytorch_input."Feature_3" AS "Feature_3" 
FROM pytorch_input), 
layer_0 AS 
(SELECT pytorch_input_1."KEY" AS "KEY", 0.26364428222521186 + -0.3508236420356722 * pytorch_input_1."Feature_0" + -0.17123968666518297 * pytorch_input_1."Feature_1" + -0.13823569361080223 * pytorch_input_1."Feature_2" + 0.47668015286731624 * pytorch_input_1."Feature_3" AS output_1, -0.3482905109501817 + 0.12509916834672533 * pytorch_input_1."Feature_0" + -0.21371471020834162 * pytorch_input_1."Feature_1" + 0.42256442335314226 * pytorch_input_1."Feature_2" + -0.39076505831131364 * pytorch_input_1."Feature_3" AS outpu

# Execute the SQL Code

In [10]:
# save the dataset in a database table


import sqlalchemy as sa

#engine = sa.create_engine('sqlite://' , echo=False)
engine = sa.create_engine("postgresql://db:db@localhost/db?port=5432", echo=False)
conn = engine.connect()

lTable = pd.DataFrame(iris.data);
NC = iris.data.shape[1]
lFeatures = ['Feature_' + str(x) for x in range(NC)]
lTable.columns = lFeatures
lTable['TGT'] = iris.target
lTable['KEY'] = range(iris.data.shape[0])
lTable.to_sql("INPUT_DATA" , conn,   if_exists='replace', index=False)

In [11]:
sql_output = pd.read_sql(lSQL , conn);
conn.close()

In [12]:
sql_output.sample(12, random_state=1960)

Unnamed: 0,KEY,Score_0,Score_1,Score_2,Proba_0,Proba_1,Proba_2,LogProba_0,LogProba_1,LogProba_2,Decision,DecisionProba
114,114,,,,0.18186,0.453384,0.364756,-1.70452,-0.791015,-1.008527,1,0.453384
74,74,,,,0.202082,0.416369,0.381548,-1.599079,-0.876183,-0.963518,1,0.416369
9,9,,,,0.367924,0.330176,0.301901,-0.999879,-1.108131,-1.197658,0,0.367924
88,88,,,,0.225944,0.403559,0.370497,-1.487469,-0.907432,-0.99291,1,0.403559
25,25,,,,0.356184,0.337863,0.305953,-1.032307,-1.085114,-1.184325,0,0.356184
5,5,,,,0.364784,0.348123,0.287093,-1.00845,-1.055199,-1.247949,0,0.364784
48,48,,,,0.37526,0.336848,0.287891,-0.980135,-1.088122,-1.245172,0,0.37526
117,117,,,,0.137084,0.456257,0.406659,-1.987164,-0.784699,-0.89978,1,0.456257
83,83,,,,0.182875,0.416904,0.400221,-1.698953,-0.874899,-0.915739,1,0.416904
105,105,,,,0.128363,0.448796,0.422841,-2.052891,-0.801188,-0.860759,1,0.448796


# PyTorch Prediction

In [13]:
pytorch_output = pd.DataFrame()
pytorch_output_key = pd.DataFrame(list(range(iris.data.shape[0])), columns=['KEY']);
pytorch_output_score = pd.DataFrame(columns=['Score_0', 'Score_1', 'Score_2']);
pytorch_output_proba = pd.DataFrame(net.predict_proba(iris.data), columns=['Proba_0', 'Proba_1', 'Proba_2'])
pytorch_output = pd.concat([pytorch_output_key, pytorch_output_score, pytorch_output_proba] , axis=1)
for class_label in [0, 1, 2]:
    pytorch_output['LogProba_' + str(class_label)] = np.log(pytorch_output_proba['Proba_' + str(class_label)])
pytorch_output['Decision'] = net.predict(iris.data)
pytorch_output.sample(12, random_state=1960)


  input = module(input)


Unnamed: 0,KEY,Score_0,Score_1,Score_2,Proba_0,Proba_1,Proba_2,LogProba_0,LogProba_1,LogProba_2,Decision
114,114,,,,0.18186,0.453384,0.364756,-1.70452,-0.791015,-1.008527,1
74,74,,,,0.202082,0.416369,0.381548,-1.599079,-0.876183,-0.963518,1
9,9,,,,0.367924,0.330176,0.301901,-0.999879,-1.108131,-1.197658,0
88,88,,,,0.225944,0.403559,0.370497,-1.487469,-0.907432,-0.99291,1
25,25,,,,0.356184,0.337863,0.305953,-1.032307,-1.085114,-1.184325,0
5,5,,,,0.364784,0.348123,0.287093,-1.00845,-1.055199,-1.247949,0
48,48,,,,0.37526,0.336848,0.287891,-0.980135,-1.088122,-1.245172,0
117,117,,,,0.137084,0.456257,0.406659,-1.987164,-0.784699,-0.89978,1
83,83,,,,0.182875,0.416904,0.400221,-1.698953,-0.874899,-0.915739,1
105,105,,,,0.128363,0.448796,0.422841,-2.052891,-0.801188,-0.860759,1


# Comparing the SQL and PyTorch Predictions

In [14]:
sql_pytorch_join = pytorch_output.join(sql_output , how='left', on='KEY', lsuffix='_pytorch', rsuffix='_sql')


In [15]:
sql_pytorch_join.head(12)

Unnamed: 0,KEY_pytorch,Score_0_pytorch,Score_1_pytorch,Score_2_pytorch,Proba_0_pytorch,Proba_1_pytorch,Proba_2_pytorch,LogProba_0_pytorch,LogProba_1_pytorch,LogProba_2_pytorch,...,Score_1_sql,Score_2_sql,Proba_0_sql,Proba_1_sql,Proba_2_sql,LogProba_0_sql,LogProba_1_sql,LogProba_2_sql,Decision_sql,DecisionProba
0,0,,,,0.378428,0.334165,0.287407,-0.97173,-1.096119,-1.246857,...,,,0.378428,0.334165,0.287407,-0.97173,-1.096119,-1.246857,0,0.378428
1,1,,,,0.36799,0.334633,0.297376,-0.999698,-1.09472,-1.212758,...,,,0.36799,0.334633,0.297376,-0.999698,-1.09472,-1.212758,0,0.36799
2,2,,,,0.382634,0.329002,0.288365,-0.960677,-1.111692,-1.24353,...,,,0.382634,0.329002,0.288365,-0.960677,-1.111692,-1.24353,0,0.382634
3,3,,,,0.371644,0.32994,0.298416,-0.989819,-1.108844,-1.209266,...,,,0.371644,0.32994,0.298416,-0.989819,-1.108844,-1.209266,0,0.371644
4,4,,,,0.38327,0.331753,0.284976,-0.959015,-1.103363,-1.255349,...,,,0.38327,0.331753,0.284976,-0.959015,-1.103363,-1.255349,0,0.38327
5,5,,,,0.364784,0.348123,0.287093,-1.00845,-1.055199,-1.247949,...,,,0.364784,0.348123,0.287093,-1.00845,-1.055199,-1.247949,0,0.364784
6,6,,,,0.382392,0.331352,0.286255,-0.961308,-1.104574,-1.250871,...,,,0.382392,0.331352,0.286255,-0.961308,-1.104574,-1.250871,0,0.382392
7,7,,,,0.372504,0.334211,0.293285,-0.987508,-1.095983,-1.226609,...,,,0.372504,0.334211,0.293285,-0.987508,-1.095983,-1.226609,0,0.372504
8,8,,,,0.374778,0.32731,0.297912,-0.981421,-1.116848,-1.210958,...,,,0.374778,0.32731,0.297912,-0.981421,-1.116848,-1.210958,0,0.374778
9,9,,,,0.367924,0.330176,0.301901,-0.999879,-1.108131,-1.197658,...,,,0.367924,0.330176,0.301901,-0.999879,-1.108131,-1.197658,0,0.367924


In [16]:
condition = (sql_pytorch_join.Decision_sql != sql_pytorch_join.Decision_pytorch)
sql_pytorch_join[condition]

Unnamed: 0,KEY_pytorch,Score_0_pytorch,Score_1_pytorch,Score_2_pytorch,Proba_0_pytorch,Proba_1_pytorch,Proba_2_pytorch,LogProba_0_pytorch,LogProba_1_pytorch,LogProba_2_pytorch,...,Score_1_sql,Score_2_sql,Proba_0_sql,Proba_1_sql,Proba_2_sql,LogProba_0_sql,LogProba_1_sql,LogProba_2_sql,Decision_sql,DecisionProba


In [17]:
assert(sql_pytorch_join[condition].shape[0] == 0)