In [8]:
%load_ext sql
%matplotlib notebook

The sql extension is already loaded. To reload it, use:
  %reload_ext sql


In [9]:
%sql postgresql://localhost:6000/mnist

u'Connected: @mnist'

In [10]:
%sql select madlib.version();

 * postgresql://localhost:6000/mnist
1 rows affected.


version
"MADlib version: 1.18.0-dev, git revision: rc/1.17.0-rc2-100-g4987e8fe, cmake configuration time: Fri Mar 12 20:51:57 UTC 2021, build type: debug, build system: Darwin-20.3.0, C compiler: Clang, C++ compiler: Clang"


In [11]:
from tensorflow import keras
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Flatten, Dense, Reshape

# Multi Output Mnist Example

## Define Model

In [None]:
# creating model
inputs = Input(shape = (28,28))
flatten1 = Flatten()(inputs)
dense1 = Dense(512, activation = 'relu')(flatten1)
dense2 = Dense(128, activation = 'relu')(dense1)
dense3 = Dense(32, activation = 'relu')(dense2)

# create classification output
classification_output = Dense(10, activation = 'softmax')(dense3)

# use output from dense layer 3 to create autoencder output
up_dense1 = Dense(128, activation = 'relu')(dense3)
up_dense2 = Dense(512, activation = 'relu')(up_dense1)
decoded_flat = Dense(784)(up_dense2)
decoded_outputs = Reshape((28,28))(decoded_flat)

In [None]:
model = Model(inputs, [classification_output,decoded_outputs])
model.summary()

In [None]:
## From https://theailearner.com/2019/01/25/multi-input-and-multi-output-models-in-keras/
# m = 256
# n_epoch = 25
# model.compile(optimizer='adam', loss=['categorical_crossentropy', 'mse'], loss_weights = [1.0, 0.5], metrics = ['accuracy'])
# model.fit(output_X_train,[Y_train, output_X_train], epochs=n_epoch, batch_size=m, shuffle=True)

## Load Model

In [None]:
json_model = model.to_json()

In [None]:
%%sql
DROP TABLE IF EXISTS model_arch_library;
SELECT madlib.load_keras_model('model_arch_library', :json_model ::json,
                               NULL,                  -- Weights
                               'Multi Output MNIST',              -- Name
                               'Output 1: classification, Output 2: autoencoder' -- Descr
);

SELECT * FROM model_arch_library;

In [None]:
%sql \dt

In [None]:
%sql SELECT * FROM model_arch_library

## Run Preprocessor

In [None]:
%%sql
DROP TABLE IF EXISTS mnist_train_data_packed, mnist_train_data_packed_summary;
SELECT madlib.training_preprocessor_dl('mnist_train_data',        -- Source table
                                       'mnist_train_data_packed', -- Output table
                                       'y',                          -- Dependent variable
                                       'x',                          -- Independent variable
                                        1000,                        -- Buffer size
                                        255                          -- Normalizing constant
                                        );

SELECT * FROM mnist_train_data_packed_summary;

In [None]:
%%sql
DROP TABLE IF EXISTS mnist_test_data_packed, mnist_test_data_packed_summary;

SELECT madlib.training_preprocessor_dl('mnist_test_data',        -- Source table
                                       'mnist_test_data_packed', -- Output table
                                       'y',                          -- Dependent variable
                                       'x',                          -- Independent variable
                                        1000,                        -- Buffer size
                                        255                          -- Normalizing constant
                                        );

SELECT * FROM mnist_train_data_packed_summary;

In [None]:
%sql SET optimizer=off

In [None]:
%sql SHOW optimizer

## Fit Model

In [None]:
%%sql
DROP TABLE IF EXISTS mnist_multout_model, mnist_multout_model_summary;

SELECT madlib.madlib_keras_fit('mnist_train_data_packed',    -- source table
                               'mnist_multout_model',                -- model output table
                               'model_arch_library',            -- model arch table
                                1,                              -- model arch id
                                $$ loss='mse', loss_weights = [1.0, 0.5], optimizer='adam', metrics=['accuracy']$$,  -- compile_params
                                $$ batch_size=256, epochs=1, shuffle=True $$,  -- fit_params
                                3,                              -- num_iterations
                                FALSE,                          -- use GPUs
                                'mnist_test_data_packed',    -- validation dataset
                                2                               -- metrics compute frequency
                              );

## Evaluate

In [None]:
%%sql
DROP TABLE IF EXISTS mnist_multout_eval, mnist_multout_eval_summary;
SELECT madlib.madlib_keras_evaluate('mnist_model',               -- model
                                    'mnist_test_data_packed',   -- test table
                                    'mnist_multout_eval'             -- output table
                                    );

SELECT * FROM cifar10_validate;

## Predict

In [None]:
%%sql
DROP TABLE IF EXISTS mnist_multout_predict, mnist_multout_predict;
SELECT madlib.madlib_keras_predict()

## Plots

In [None]:
# get accuracy and iteration number
iters_proxy = %sql SELECT metrics_iters FROM cifar_10_model_summary;
train_accuracy_proxy = %sql SELECT training_metrics FROM cifar_10_model_summary;
test_accuracy_proxy = %sql SELECT validation_metrics FROM cifar_10_model_summary;

# get number of points
num_points_proxy = %sql SELECT array_length(metrics_iters,1) FROM cifar_10_model_summary;
num_points = num_points_proxy[0]

# reshape to np arrays
iters = np.array(iters_proxy).reshape(num_points)
train_accuracy = np.array(train_accuracy_proxy).reshape(num_points)
test_accuracy = np.array(test_accuracy_proxy).reshape(num_points)

#plot
plt.title('CIFAR-10 accuracy by iteration')
plt.xlabel('Iteration number')
plt.ylabel('Accuracy')
plt.grid(True)
plt.plot(iters, train_accuracy, 'g.-', label='Train')
plt.plot(iters, test_accuracy, 'r.-', label='Test')
plt.legend()

Loss by iteration

# get loss
train_loss_proxy = %sql SELECT training_loss FROM cifar_10_model_summary;
test_loss_proxy = %sql SELECT validation_loss FROM cifar_10_model_summary;

# reshape to np arrays
train_loss = np.array(train_loss_proxy).reshape(num_points)
test_loss = np.array(test_loss_proxy).reshape(num_points)

#plot
plt.title('CIFAR-10 loss by iteration')
plt.xlabel('Iteration number')
plt.ylabel('Loss')
plt.grid(True)
plt.plot(iters, train_loss, 'g.-', label='Train')
plt.plot(iters, test_loss, 'r.-', label='Test')
plt.legend()

Accuracy by time

# get time
time_proxy = %sql SELECT metrics_elapsed_time FROM cifar_10_model_summary;

# reshape to np arrays
time = np.array(time_proxy).reshape(num_points)/60.0

#plot
plt.title('CIFAR-10 accuracy by time')
plt.xlabel('Time (min)')
plt.ylabel('Accuracy')
plt.grid(True)
plt.plot(time, train_accuracy, 'g.-', label='Train')
plt.plot(time, test_accuracy, 'r.-', label='Test')
plt.legend()

Time to achieve a given accuracy

#plot
plt.title('CIFAR-10 time by accuracy')
plt.xlabel('Accuracy')
plt.ylabel('Time (min)')
plt.grid(True)
plt.plot(train_accuracy, time, 'g.-', label='Train')
plt.plot(test_accuracy, time, 'r.-', label='Test')
plt.legend()

# Multi Input

In [None]:
from tensorflow.keras.layers import Conv2D, MaxPooling2D, concatenate

In [None]:
# feature extraction from gray scale image
inputs = Input(shape = (28,28,1))
 
conv1 = Conv2D(16, (3,3), activation = 'relu', padding = "SAME")(inputs)
pool1 = MaxPooling2D(pool_size = (2,2), strides = 2)(conv1)
conv2 = Conv2D(32, (3,3), activation = 'relu', padding = "SAME")(pool1)
pool2 = MaxPooling2D(pool_size = (2,2), strides = 2)(conv2)
flat_1 = Flatten()(pool2)
 
# feature extraction from RGB image
inputs_2 = Input(shape = (28,28,3))
 
conv1_2 = Conv2D(16, (3,3), activation = 'relu', padding = "SAME")(inputs_2)
pool1_2 = MaxPooling2D(pool_size = (2,2), strides = 2)(conv1_2)
conv2_2 = Conv2D(32, (3,3), activation = 'relu', padding = "SAME")(pool1_2)
pool2_2 = MaxPooling2D(pool_size = (2,2), strides = 2)(conv2_2)
flat_2 = Flatten()(pool2_2)
 
# concatenate both feature layers and define output layer after some dense layers
concat = concatenate([flat_1,flat_2])
dense1 = Dense(512, activation = 'relu')(concat)
dense2 = Dense(128, activation = 'relu')(dense1)
dense3 = Dense(32, activation = 'relu')(dense2)
output = Dense(10, activation = 'softmax')(dense3)
 
# create model with two inputs
model = Model([inputs,inputs_2], dense1)


In [None]:
model.summary()

In [None]:
json_model = model.to_json()

In [None]:
%%sql
DROP TABLE IF EXISTS model_arch_library;
SELECT madlib.load_keras_model('model_arch_library', :json_model ::json,
                               NULL,                  -- Weights
                               'Multi Output MNIST',              -- Name
                               'Input 1: greyscale, Input 2: color' -- Descr
);

SELECT * FROM model_arch_library;

## Create color mnist input from greyscale input

In [None]:
from tensorflow.keras.datasets import mnist
import numpy as np
import PIL

In [None]:
%%sql
DROP TABLE IF EXISTS random_colors_train;
CREATE TABLE random_colors_train AS
    SELECT id,
        floor(r/(r+b+g)*256)::INT AS r,
        floor(g/(r+b+g)*256)::INT AS g,
        floor(b/(r+b+g)*256)::INT AS b FROM
            (SELECT s.id,random() as r, random() as b, random() as g FROM
                (SELECT generate_series(1,60000) as id) s) x;

In [None]:
%%sql
DROP TABLE IF EXISTS random_colors_test;
CREATE TABLE random_colors_test AS
    SELECT id,
        floor(r/(r+b+g)*256)::INT AS r,
        floor(g/(r+b+g)*256)::INT AS g,
        floor(b/(r+b+g)*256)::INT AS b FROM
            (SELECT s.id,random() as r, random() as b, random() as g FROM
                (SELECT generate_series(1,10000) as id) s) x;

In [None]:
%sql SET search_path=madlib,public

In [None]:
%%sql
DROP TABLE IF EXISTS mnist_color_train;
CREATE TABLE mnist_color_train AS
    SELECT id,
        ARRAY[
            array_scalar_mult(x,(r/256.0)::REAL),
            array_scalar_mult(x,(g/256.0)::REAL),
            array_scalar_mult(x,(b/256.0)::REAL)
        ] AS x,
        y
    FROM mnist_train_data JOIN random_colors_train USING (id);

In [None]:
%%sql
DROP TABLE IF EXISTS mnist_color_test;
CREATE TABLE mnist_color_test AS
    SELECT id,
        ARRAY[
            array_scalar_mult(x,(r/256.0)::REAL),
            array_scalar_mult(x,(g/256.0)::REAL),
            array_scalar_mult(x,(b/256.0)::REAL)
        ] AS x,
        y
    FROM mnist_test_data JOIN random_colors_test USING (id);

In [None]:
%%sql
DROP TABLE IF EXISTS mnist_grey_color_train;
CREATE TABLE mnist_grey_color_train AS
    SELECT id, ARRAY[g.x] AS x_grey, c.x AS x_color, y
FROM mnist_train_data g JOIN mnist_color_train c USING (id,y);

In [None]:
%%sql
DROP TABLE IF EXISTS mnist_grey_color_test;
CREATE TABLE mnist_grey_color_test AS
    SELECT id, ARRAY[g.x] AS x_grey, c.x AS x_color, y
FROM mnist_test_data g JOIN mnist_color_test c USING (id,y);

## Convert data format from channels first to channels last

In [43]:
import pandas as pd
import numpy as np
import PIL

In [13]:
gcd_test = %sql SELECT * FROM mnist_grey_color_test ORDER BY id

 * postgresql://localhost:6000/mnist
10000 rows affected.


In [14]:
gcd_train = %sql SELECT * FROM mnist_grey_color_train ORDER BY id

 * postgresql://localhost:6000/mnist
60000 rows affected.


In [15]:
gcd_test = gcd_test.dict()
gcd_train = gcd_train.dict()

In [None]:
x_grey = [ np.asarray(x) for x in gcd_test['x_grey']]
x_color = [ np.asarray(x) for x in gcd_test['x_color'] ]

In [None]:
grey_color_test_dataset = { 'id' : gcd_test['id'], 'x_grey' : x_grey, 'x_color' : x_color, 'y' : gcd_test['y'] }

In [18]:
x_grey = [ np.asarray(x) for x in gcd_train['x_grey'] ]
x_color = [ np.asarray(x) for x in gcd_train['x_color'] ]

In [19]:
grey_color_train_dataset = { 'id' : gcd_train['id'], 'x_grey' : x_grey, 'x_color' : x_color, 'y' : gcd_train['y'] }

In [None]:
df_test = pd.DataFrame(grey_color_test_dataset)
df_test.set_index('id')

In [None]:
df_train = pd.DataFrame(grey_color_train_dataset)
df_train.set_index('id')

In [23]:
df_train.loc[1]['x_color'].shape, df_train.loc[1]['x_grey'].shape

((3, 28, 28), (1, 28, 28))

In [24]:
def channels_first_to_last(row):
    row['x_grey'] = np.transpose(row['x_grey'], (1,2,0))
    row['x_color'] = np.transpose(row['x_color'], (1,2,0))
    return row

In [None]:
df_test = df_test.apply( channels_first_to_last, axis=1 )

In [25]:
df_train = df_train.apply( channels_first_to_last, axis=1 )

In [26]:
from sqlalchemy import create_engine, INTEGER, ARRAY, REAL, SMALLINT

In [27]:
engine = create_engine('postgres://localhost:6000/mnist')

In [28]:
res = engine.execute("SELECT version()")

In [29]:
res.fetchall()

[(u'PostgreSQL 9.4.24 (Greenplum Database 6.12.0+dev.22.g826699c7b9 build dev) on x86_64-apple-darwin18.7.0, compiled by Apple LLVM version 9.0.0 (clang-900.0.39.2), 64-bit compiled on Nov 10 2020 00:36:49 (with assert checking)',)]

In [None]:
df_train.loc[1]['x_color'].shape, df_train.loc[1]['x_grey'].shape

In [None]:
df = df_test

In [None]:
df['x_color'] = df['x_color'].apply(lambda x : x.tolist())
df['x_grey'] = df['x_grey'].apply(lambda x : x.tolist())

In [None]:
df_test = df

In [30]:
df = df_train

In [31]:
df['x_color'] = df['x_color'].apply(lambda x : x.tolist())
df['x_grey'] = df['x_grey'].apply(lambda x : x.tolist())

In [32]:
df_train = df

In [34]:
dtype = { 'id' : INTEGER, 'x_grey' : ARRAY(REAL), 'x_color' : ARRAY(REAL), 'y' : SMALLINT }

In [None]:
df_test.to_sql('channels_last_test', con=engine, if_exists='replace', dtype=dtype, chunksize=500, method=multi, schema='public', index=False)

In [None]:
df_train.to_sql('channels_last_train', con=engine, if_exists='replace', dtype=dtype, chunksize=1000, method='multi', schema='public', index=False)

In [93]:
grey1 = np.array(df_train['x_grey'][12], dtype='uint8')

In [94]:
color1 = np.array(df_train['x_color'][12], dtype='uint8')

In [95]:
grey1.shape, color1.shape

((28, 28, 1), (28, 28, 3))

In [96]:
grey1slim = grey1.reshape((28,28))

In [97]:
grey1slim.shape

(28, 28)

In [98]:
img1g = PIL.Image.fromarray(grey1slim)

In [99]:
img1c = PIL.Image.fromarray(color1)

In [100]:
img1g.show() # Doesn't work for me

In [101]:
img1g.save('img1g.jpg')
img1c.save('img1c.jpg')

In [102]:
!display img1g.jpg

In [103]:
!display img1c.jpg

In [113]:
%%sql
begin -- Don't drop unless channels last train exists, otherwise we lose the table and will have to re-create
drop table if exists mnist_grey_color_train;
alter table channels_last_train rename to mnist_grey_color_train;
commit

 * postgresql://localhost:6000/mnist


Exception: ipython_sql does not support transactions

In [108]:
%sql SELECT id,array_dims(x_grey), array_dims(x_color),y FROM mnist_grey_color_test ORDER BY id LIMIT 10;

 * postgresql://localhost:6000/mnist
10 rows affected.


id,array_dims,array_dims_1,y
1,[1:28][1:28][1:1],[1:28][1:28][1:3],9
2,[1:28][1:28][1:1],[1:28][1:28][1:3],0
3,[1:28][1:28][1:1],[1:28][1:28][1:3],2
4,[1:28][1:28][1:1],[1:28][1:28][1:3],5
5,[1:28][1:28][1:1],[1:28][1:28][1:3],1
6,[1:28][1:28][1:1],[1:28][1:28][1:3],9
7,[1:28][1:28][1:1],[1:28][1:28][1:3],7
8,[1:28][1:28][1:1],[1:28][1:28][1:3],8
9,[1:28][1:28][1:1],[1:28][1:28][1:3],1
10,[1:28][1:28][1:1],[1:28][1:28][1:3],0


In [111]:
%%sql
DROP TABLE IF EXISTS mnist_color_train_packed, mnist_color_train_packed_summary;
SELECT madlib.training_preprocessor_dl('mnist_grey_color_train',        -- Source table
                                       'mnist_grey_color_train_packed', -- Output table
                                       'y',                          -- Dependent variable
                                       'x_grey, x_color',            -- Independent variables
                                        1000,                        -- Buffer size
                                        255                          -- Normalizing constant
                                        );

SELECT * FROM mnist_train_data_packed_summary;

 * postgresql://localhost:6000/mnist
Done.


InternalError: (psycopg2.errors.InternalError_) spiexceptions.InternalError: no pre-assigned OID for pg_namespace tuple "pg_temp_81" (namespace:0 keyOid1:0 keyOid2:0) (oid_dispatch.c:644)  (seg0 127.0.0.1:6002 pid=27456) (plpy_elog.c:121)
CONTEXT:  Traceback (most recent call last):
  PL/Python function "training_preprocessor_dl", line 24, in <module>
    training_preprocessor_obj.training_preprocessor_dl()
  PL/Python function "training_preprocessor_dl", line 898, in training_preprocessor_dl
  PL/Python function "training_preprocessor_dl", line 448, in input_preprocessor_dl
PL/Python function "training_preprocessor_dl"
 [SQL: "SELECT madlib.training_preprocessor_dl('mnist_grey_color_train',        -- Source table\n                                       'mnist_grey_color_train_packed', -- Output table\n                                       'y',                          -- Dependent variable\n                                       'x_grey, x_color',            -- Independent variables\n                                        1000,                        -- Buffer size\n                                        255                          -- Normalizing constant\n                                        );"] (Background on this error at: http://sqlalche.me/e/2j85)

In [109]:
%%sql
DROP TABLE IF EXISTS mnist_test_data_packed, mnist_test_data_packed_summary;
SELECT madlib.training_preprocessor_dl('mnist_grey_color_test',        -- Source table
                                       'mnist_grey_color_test_packed', -- Output table
                                       'y',                          -- Dependent variable
                                       'x_grey,x_color',             -- Independent variables
                                        1000,                        -- Buffer size
                                        255                          -- Normalizing constant
                                        );

SELECT * FROM mnist_train_data_packed_summary;

 * postgresql://localhost:6000/mnist
Done.
1 rows affected.
1 rows affected.


source_table,output_table,dependent_varname,independent_varname,dependent_vartype,y_class_values,buffer_size,normalizing_const,num_classes,distribution_rules,__internal_gpu_config__
mnist_train_data,mnist_train_data_packed,[u'y'],[u'x'],[u'text'],"[u'0', u'1', u'2', u'3', u'4', u'5', u'6', u'7', u'8', u'9']",1000,255.0,[10],all_segments,all_segments


In [110]:
%%sql
DROP TABLE IF EXISTS mnist_multout_model, mnist_multout_model_summary;
SELECT madlib.madlib_keras_fit('mnist_grey_color_train_packed',    -- source table
                               'mnist_multiin_model',           -- model output table
                               'model_arch_library',            -- model arch table
                                1,                              -- model arch id
                                $$ loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy']$$,  -- compile_params
                                $$ batch_size=256, epochs=1, shuffle=True $$,  -- fit_params
                                3,                              -- num_iterations
                                FALSE,                          -- use GPUs
                                'mnist_grey_color_test_packed',    -- validation dataset
                                2                               -- metrics compute frequency
                              );

 * postgresql://localhost:6000/mnist
Done.


InternalError: (psycopg2.errors.InternalError_) plpy.Error: model_keras error: Input shape 28 in the model architecture does not match the input shape 1 of column ['x_grey', 'x_color'] in table mnist_grey_color_train_packed. (plpy_elog.c:121)
CONTEXT:  Traceback (most recent call last):
  PL/Python function "madlib_keras_fit", line 23, in <module>
    madlib_keras.fit(**globals())
  PL/Python function "madlib_keras_fit", line 42, in wrapper
  PL/Python function "madlib_keras_fit", line 147, in fit
  PL/Python function "madlib_keras_fit", line 418, in validate_input_shapes
  PL/Python function "madlib_keras_fit", line 171, in validate_input_shape
PL/Python function "madlib_keras_fit"
 [SQL: "SELECT madlib.madlib_keras_fit('mnist_grey_color_train_packed',    -- source table\n                               'mnist_multiin_model',                -- model output table\n                               'model_arch_library',            -- model arch table\n                                1,                              -- model arch id\n                                $$ loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy']$$,  -- compile_params\n                                $$ batch_size=256, epochs=1, shuffle=True $$,  -- fit_params\n                                3,                              -- num_iterations\n                                FALSE,                          -- use GPUs\n                                'mnist_grey_color_test_packed',    -- validation dataset\n                                2                               -- metrics compute frequency\n                              );"] (Background on this error at: http://sqlalche.me/e/2j85)