In [15]:
from pyspark.sql import SparkSession
import math
spark = SparkSession.builder.getOrCreate()

In [2]:
from splicemachine.spark import PySpliceContext
splice = PySpliceContext(spark)

In [3]:
from splicemachine.mlflow_support import *
from splicemachine.mlflow_support.utilities import get_user
mlflow.register_splice_context(splice)
schema = get_user()

## One output node - Regression

In [7]:
# first neural network with keras tutorial
from numpy import loadtxt
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
import pandas as pd
import numpy as np
from random import random

with mlflow.start_run() as run:
    # load the dataset
    dataset = loadtxt('data.csv', delimiter=',')
    # split into input (X) and output (y) variables
    X = dataset[:,0:8]
    y = dataset[:,8]
    # Make it a regression problem
    y = np.array([random() for _ in y])
    # define the keras model
    model = Sequential()
    model.add(Dense(12, input_dim=8, activation='relu'))
    model.add(Dense(8, activation='relu'))
    model.add(Dense(1, activation='sigmoid'))
    # compile the keras model
    model.compile(loss='mean_squared_error', optimizer='adam', metrics=['accuracy'])
    # fit the keras model on the dataset
    model.fit(X, y, epochs=50, batch_size=10)

#     model.save('simple_model_multiclass.h5')
    mlflow.log_model(model, 'keras_model', model_lib='keras')
    print(mlflow.current_run_id())
    splice.dropTableIfExists(f'{schema}.keras_regression')
    
    run_id = mlflow.current_run_id()
    
    df = pd.DataFrame(X,columns=[f'C{i}' for i in range(len(X[0]))])
    jid = mlflow.deploy_db(schema, 'keras_regression', mlflow.current_run_id(), primary_key={'MOMENT': 'INT'}, df=df, create_model_table=True)
    mlflow.watch_job(jid)
    
print(f'Loading model {run.info.run_uuid}')
mlflow.load_model(run.info.run_uuid)

splice.execute('insert into keras_regression (c0,c1,c2,c3,c4,c5,c6,c7,moment) values(2.4,2.2,1.5,2.6,3.6,7.5,77.8,2.4,34512)')
splice.execute('insert into keras_regression (c0,c1,c2,c3,c4,c5,c6,c7,moment) values(66.2,3.78,33.5,-23.4,22.3,0.1,19.3,-339.2,345)')
data = splice.df('select * from keras_regression').toPandas()

features = data[['C0','C1','C2','C3','C4','C5','C6','C7']]
db_preds = data[['PREDICTION']]


raw_preds = model.predict(features)

print("Comparing results")

for db_pred, raw_pred in zip(db_preds.iterrows(), raw_preds):
    # Check reconstruction values
    for db, raw in zip(db_pred[1], raw_pred):
        l = min(len(str(raw)),len(str(db))) # Digit truncation because keras python only returns 7 decimal places
        assert round(float(db),l) == round(float(raw),l), f'Something is wrong. Model Table gives {float(db)} but raw model gives {float(raw)}'

    
print('test passed!')

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50
INFO:tensorflow:Assets written to: /tmp/tmpuqn7jl53/model/data/model/assets
Uploading file... Done.
0d484fd30211
Deploying model to database...
Your Job has been submitted. The returned value of this function is the job id, which you can use to monitor the your task in real-time. Run mlflow.watch_job(<job id>) tostream them to stdout, or mlflow.fetch_logs(<job id>) to read them one time to a list

---Job L

## Use pred_threshold to make it a binary classification

In [25]:
splice.dropTableIfExists(f'{schema}.keras_binary')
jid = mlflow.deploy_db(schema, 'keras_binary', run_id, primary_key={'MOMENT': 'INT'}, df=df, create_model_table=True, classes=['Out1'], library_specific={'pred_threshold':0.5})
mlflow.watch_job(jid)

splice.execute('truncate table keras_binary')
splice.execute('insert into keras_binary (c0,c1,c2,c3,c4,c5,c6,c7,moment) values(2.4,2.2,1.5,2.6,3.6,7.5,77.8,2.4,34512)')
splice.execute('insert into keras_binary (c0,c1,c2,c3,c4,c5,c6,c7,moment) values(66.2,3.78,33.5,-23.4,22.3,0.1,19.3,-339.2,345)')

data = splice.df('select * from keras_binary').toPandas()

features = data[['C0','C1','C2','C3','C4','C5','C6','C7']]
db_preds = data[['OUT1']]
db_classpred = data[['PREDICTION']]

#set in the deploy call
threshold = 0.5

raw_preds = model.predict(features)

print('Comparing results from database to model')

for db_c, db_pred, raw_pred in zip(db_classpred.iterrows(),db_preds.iterrows(), raw_preds):
    for c,db, raw in zip(db_c[1],db_pred[1], raw_pred):
        assert math.isclose(db,1-raw), f'Something is wrong. Model Table gives {float(db)} but raw model gives {float(raw)}'
        
        raw_classpred = 1.0 if raw > threshold else 0.0
        classpred = 0.0 if c=='OUT1' else 1.0
        assert raw_classpred==classpred, f'Something is wrong. Model Table gives {float(c)} but raw model gives {float(raw_classpred)}'
    
print('test passed!')

Table exists. Dropping table
Deploying model to database...
Your Job has been submitted. The returned value of this function is the job id, which you can use to monitor the your task in real-time. Run mlflow.watch_job(<job id>) tostream them to stdout, or mlflow.fetch_logs(<job id>) to read them one time to a list

---Job Logs---
INFO     2021-06-04 23:36:27.736 - A service worker has found your request
INFO     2021-06-04 23:36:27.815 - Checking whether handler DEPLOY_DATABASE is enabled
INFO     2021-06-04 23:36:27.899 - Handler is available
INFO     2021-06-04 23:36:27.912 - Retrieving Run from MLFlow Tracking Server...
INFO     2021-06-04 23:36:28.016 - Retrieved MLFlow Run
INFO     2021-06-04 23:36:28.036 - Updating MLFlow Run for the UI
INFO     2021-06-04 23:36:28.126 - Reading Model Artifact Stream from Splice Machine
INFO     2021-06-04 23:36:28.142 - Extracting Model from DB with Name: keras_model
INFO     2021-06-04 23:36:28.168 - Decoding Model Artifact Binary Stream for De

## Multiclass prediction

In [50]:
# first neural network with keras tutorial
from numpy import loadtxt
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
import pandas as pd
import numpy as np
from random import random

with mlflow.start_run() as run:
    # load the dataset
    dataset = loadtxt('data.csv', delimiter=',')
    # split into input (X) and output (y) variables
    X = dataset[:,0:8]
    y = dataset[:,8]
    # Make it a regression problem
    y = np.array([random() for _ in y])
    # define the keras model
    model = Sequential()
    model.add(Dense(12, input_dim=8, activation='relu'))
    model.add(Dense(8, activation='relu'))
    model.add(Dense(3, activation='sigmoid'))
    # compile the keras model
    model.compile(loss='mean_absolute_error', optimizer='adam', metrics=['accuracy'])
    # fit the keras model on the dataset
    model.fit(X, y, epochs=50, batch_size=10)

#     model.save('simple_model_multiclass.h5')
    mlflow.keras.log_model(model, 'keras_model')
    print(mlflow.current_run_id())
    splice.dropTableIfExists(f'{schema}.keras_multiclass')
    
    run_id = mlflow.current_run_id()
    
    df = pd.DataFrame(X,columns=[f'C{i}' for i in range(len(X[0]))])
    jid = mlflow.deploy_db(schema, 'keras_multiclass', mlflow.current_run_id(), primary_key={'MOMENT': 'INT'}, classes=['class1','class2','class3'], df=df, create_model_table=True)
    mlflow.watch_job(jid)
    
    
print(f'Loading model {run.info.run_uuid}')
mlflow.load_model(run.info.run_uuid)

splice.execute('truncate table keras_multiclass')
splice.execute('insert into keras_multiclass (c0,c1,c2,c3,c4,c5,c6,c7,moment) values(2.4,2.2,1.5,2.6,3.6,7.5,77.8,2.4,34512)')
splice.execute('insert into keras_multiclass (c0,c1,c2,c3,c4,c5,c6,c7,moment) values(66.2,3.78,33.5,-23.4,22.3,0.1,19.3,-339.2,345)')
data = splice.df('select * from keras_multiclass').toPandas()

print('Comparing results from database to model')

features = data[['C0','C1','C2','C3','C4','C5','C6','C7']]
db_preds = data[['PREDICTION', 'CLASS1', 'CLASS2', 'CLASS3']]

#set in the deploy call
threshold = 0.5

raw_preds = list(model.predict(features))

for db_pred, raw_pred in zip(db_preds.iterrows(), raw_preds):

    # Add prediction
    raw_pred = [list(raw_pred).index(max(raw_pred))] + list(raw_pred)
    for db, raw in zip(db_pred[1], raw_pred):
        if isinstance(db, str):
            db_p = {'CLASS1': 0, 'CLASS2':1,'CLASS3':2}[db]
            assert db_p==raw, f'Something is wrong. Model Table gives {db_p} ({db}) but raw model gives {raw}'
        else:
            assert math.isclose(float(db), float(raw), abs_tol=1e-06), f'Something is wrong. Model Table gives {db} but raw model gives {raw}'
    
print('test passed!')

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50
INFO:tensorflow:Assets written to: /tmp/tmpyq_902xx/model/data/model/assets
Uploading file... Done.
61f7e8a7372c
Table exists. Dropping table
Deploying model to database...
Your Job has been submitted. The returned value of this function is the job id, which you can use to monitor the your task in real-time. Run mlflow.watch_job(<job id>) tostream them to stdout, or mlflow.fetch_logs(<job id>) to read them

In [51]:
spark.stop()