# Environment Setup

In [None]:
from google.colab import drive
drive.mount('/content/drive/')

In [None]:
# unzip files to main content directory:
! unzip -q drive/MyDrive/cisc873-dm-f20-a4/img_test.zip
! unzip -q drive/MyDrive/cisc873-dm-f20-a4/img_train.zip

In [None]:
import collections
from __future__ import absolute_import, division, print_function, unicode_literals
import matplotlib.pyplot as plt
import numpy as np
import os
import pandas as pd
from PIL import Image
from pprint import pprint
from sklearn.model_selection import train_test_split
from tqdm.notebook import tqdm
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.layers import Conv2D, Flatten, Dense, MaxPool2D, Bidirectional, GRU, Attention, Concatenate, Embedding, Dropout
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences

# Data Setup

In [None]:
# Load in train and test data to dataframes
xy_train_df = pd.read_csv('drive/MyDrive/cisc873-dm-f20-a4/train_xy.csv')
x_test_df = pd.read_csv('drive/MyDrive/cisc873-dm-f20-a4/test_x.csv')

In [None]:
# preprocess image data
def load_image(file):
    try:
        # Opening image file, convert to greyscale and resizing
        image = Image.open(
            file
        ).convert('LA').resize((64, 64))
        # make image into numpy array
        arr = np.array(image) 
    except:
        # If the image isn't fine, make array of 0s
        arr = np.zeros((64, 64, 2))
    return arr

# loading images from directory listed in xy_train.csv:
x_image = np.array([load_image(i) for i in tqdm(xy_train_df.image)])

# loading summary: (force convert some of the non-string cell to string)
x_text = xy_train_df.summary.astype('str')

In [None]:
# check image loading
plt.imshow(x_image[0, :, :, 0])

In [None]:
# labels:
y_price = xy_train_df.price
# Convert types to categories for listing type
y_type = xy_train_df.type.astype('category').cat.codes

# Display number of price and type categories
len_price = len(y_price.unique())
len_type = len(y_type.unique())
print('unique values for price category', len_price, y_price.unique())
print('unique values for type category', len_type, y_type.unique())

# splitting:
x_tr_image, x_vl_image, x_tr_text, x_vl_text, y_tr_price, y_vl_price, y_tr_type, y_vl_type = train_test_split(
    x_image, 
    x_text,
    y_price,
    y_type,
    test_size=0.2)

print(np.shape(x_tr_image))
print(np.shape(x_vl_image))
print(np.shape(y_tr_price))
print(np.shape(y_vl_price))
print(np.shape(y_tr_type))
print(np.shape(y_vl_type))

unique values for price category 3 [1 0 2]
unique values for type category 24 [ 1 17 22 10 18 20  5  2  8  4 23 13 15 16 14 11 19  0 21  3  6 12  7  9]
(6101, 64, 64, 2)
(1526, 64, 64, 2)
(6101,)
(1526,)
(6101,)
(1526,)


# Preprocessing

## Template Tokenizer

In [None]:
# preprocess text data
# maximum number of common words to keep in tokenizer
vocab_size = 40000
# maximum sequence length for padding/truncating
max_len = 100


# build vocabulary from training set
tokenizer = Tokenizer(num_words=vocab_size)
tokenizer.fit_on_texts(x_tr_text)


def _preprocess(list_of_text):
    # truncate or pad sequences to max_len
    return pad_sequences(
        tokenizer.texts_to_sequences(list_of_text),
        maxlen=max_len,
        padding='post', # padding with 0s at the end of the sequence
    )
    

# padding is done inside: 
x_tr_text_id = _preprocess(x_tr_text)
x_vl_text_id = _preprocess(x_vl_text)

print(x_tr_text_id.shape) 
print(x_vl_text_id.shape)

(6101, 100)
(1526, 100)


In [None]:
# Visually confirming tokenizer worked
pprint(tokenizer.sequences_to_texts(x_tr_text_id[:5]))

In [None]:
print('total words in the dictionary:', tokenizer.num_words)

total words in the dictionary: 40000


## Advanced Tokenizer

Swap the original tokenizer with the sentencepiece tokenizer.

In [None]:
!pip install sentencepiece

In [None]:
import sentencepiece as spm
import io

# maximum number of common words to keep in tokenizer 
# Note: this value changes. Can set to 40,000 and update based on received error message
sp_vocab_size = 13180
# maximum sequence length for padding/truncating
max_len = 100

# binary stream using an in-memory bytes buffer to write model
token_model = io.BytesIO()

# Train SentencePiece Model
# Training with an iterable version of the train text, writing model to token_model with a maximum of sp_vocab_size words
spm.SentencePieceTrainer.train(sentence_iterator=iter(x_tr_text), model_writer=token_model, vocab_size=sp_vocab_size)

# Write trained model to out.model 
with open('out.model', 'wb') as f:
    f.write(token_model.getvalue())

# Make processor for encoding 
sp=spm.SentencePieceProcessor(model_proto=token_model.getvalue())

def sentence_preprocess(list_of_text):
    return pad_sequences(
        # Encoding text into sentence pieces/ids
        sp.encode((list_of_text)),
        maxlen=max_len,
        padding='post'
    )

# padding is done inside:
# Convert train and validation text from Series to list for sp encoding
x_tr_text_sent_id = sentence_preprocess(x_tr_text.tolist())
x_vl_text_sent_id = sentence_preprocess(x_vl_text.tolist())

print(x_tr_text_sent_id.shape)
print(x_vl_text_sent_id.shape)

(6101, 100)
(1526, 100)


In [None]:
# Confirm encoding worked
sp.decode(x_tr_text_sent_id[:5].tolist())

In [None]:
print('total words in the dictionary:', vocab_size)

total words in the dictionary: 13110


# Model Setup

## Template Model

In [None]:
# Sample Model - model provided in template from Professor Ding
# Input text layer
in_text = keras.Input(batch_shape=(None, max_len)) # [None, max_len]
# Input image layer
in_image = keras.Input(batch_shape=(None, 64, 64, 2)) # [None, 64, 64, 2]

# text part
# Embedding layer for text data
embedded = keras.layers.Embedding(tokenizer.num_words, 100)(in_text) # [None, max_len, 100]
# Reducing to 2D for fusion
averaged = tf.reduce_mean(embedded, axis=1) # [None, 100]

# image part
cov = Conv2D(32, (16, 16))(in_image) # [None, 49, 49, 32]
pl = MaxPool2D((16, 16))(cov) # [None, 3, 3, 32]
# Reducing to 2d for fusion
flattened = Flatten()(pl) # [None, 288]

# fusion: by concatenating the image and text sections
fused = tf.concat([averaged, flattened], axis=-1) # [None, 388]

# multi-objectives (each is a multi-class classification)
p_price = Dense(len_price, activation='softmax', name='price')(fused) # [None, 3]
p_type = Dense(len_type, activation='softmax', name='type')(fused) # [None, 24]

# Build the model
model = keras.Model(
    inputs={
        'summary': in_text,
        'image': in_image
    },
    outputs={
        'price': p_price,
        'type': p_type,
    },
    name="template_model"
)

# Compile the model
model.compile(
    optimizer=Adam(),
    # Using sparse categorical cross entropy for loss function since there are more than two label classes
    loss={
        'price': 'sparse_categorical_crossentropy',
        'type': 'sparse_categorical_crossentropy',
    },
    # Loss weights are coefficients to weight the loss contributions of different model outputs. 
    # The loss value that will be minimized by the model will then be the weighted
    # sum of all individual losses, weighted by the loss_weights coefficients.
    loss_weights={
        'price': 0.5,
        'type': 0.5,       
    },
    # Metrics to be evaluated by the model during training and testing 
    metrics={
        'price': ['SparseCategoricalAccuracy'],
        'type': ['SparseCategoricalAccuracy'],
    },
)

# Display info about model
model.summary()

## Text Layers

Swap the original reduce_mean layer with a BiDirectional GRU layer, and use Attention to aggregate the time dimension (see lecture notes for API & Examples).

Following example in documentation Keras Attention API documentation. 

In [None]:
# Input text layer
in_text = keras.Input(batch_shape=(None, max_len)) # [None, max_len]
# Input image layer
in_image = keras.Input(batch_shape=(None, 64, 64, 2)) # [None, 64, 64, 2]

### TEXT PART ###
embedding = Embedding(tokenizer.num_words, 100)(in_text) # [None, max_len, 100]

# Replace reduce_mean layer with BiDirectional GRU
query_x = Bidirectional(GRU(32, return_sequences=True))(embedding) # [None, max_len, 64]
value_x = Bidirectional(GRU(32, return_sequences=True))(embedding) # [None, max_len, 64]

# Use Attention to aggregate time dimension
fixed_query_x = tf.expand_dims(query_x[:, -1, :], [1]) # [None, 1, 64]
# Not passing in key since it would be equivalent to value and Attention makes key equivalent by default
query_value_attention_seq = Attention()([fixed_query_x, value_x]) # [None, 1, 64]
# Reducing to 2D for fusion layer
text_final = tf.squeeze(query_value_attention_seq, [1]) # [None, 64]

### IMAGE PART ###
cov = Conv2D(32, (16, 16))(in_image) # [None, 49, 49, 32]
pl = MaxPool2D((16, 16))(cov) # [None, 3, 3, 32]
flattened = Flatten()(pl) # [None, 288]

# fusion:
fused = Concatenate()([text_final, flattened]) # [None, 352]

# multi-objectives (each is a multi-class classification)
p_price = Dense(len_price, activation='softmax', name='price')(fused) # [None, 3]
p_type = Dense(len_type, activation='softmax', name='type')(fused) # [None, 24]

# Build the model
model = keras.Model(
    inputs={
        'summary': in_text,
        'image': in_image,
    },
    outputs={
        'price': p_price,
        'type': p_type,
    },
    name="text_layers_model"
)

# Compile the model
model.compile(
    optimizer=Adam(),
    # Sparse categorical cross entropy: Use this crossentropy loss function when
    # there are two or more label classes
    loss={
        'price': 'sparse_categorical_crossentropy',
        'type': 'sparse_categorical_crossentropy',
    },
    # coefficients to weight the loss contributions of different model outputs. 
    # The loss value that will be minimized by the model will then be the weighted
    # sum of all individual losses, weighted by the loss_weights coefficients.
    loss_weights={
        'price': 0.5,
        'type': 0.5,       
    },
    # metrics to be evaluated by the model during training and testing
    metrics={
        'price': ['SparseCategoricalAccuracy'],
        'type': ['SparseCategoricalAccuracy'],
    },
)
# Display info about model
model.summary()

## Image Layers

The original layers for image contain one convolution layer. Customizing those layers and addding drop out layer for regularization.

In [None]:
# Input text layer
in_text = keras.Input(batch_shape=(None, max_len)) # [None, max_len]
# Input image layer
in_image = keras.Input(batch_shape=(None, 64, 64, 2)) # [None, 64, 64, 2]

### TEXT PART ###
embedded = keras.layers.Embedding(tokenizer.num_words, 100)(in_text) # [None, max_len, 100]
averaged = tf.reduce_mean(embedded, axis=1) # [None, 100]

### IMAGE PART ###
# VERSION 1
x = Conv2D(32, (16, 16))(in_image) # [None, 49, 49, 32]
x = Conv2D(32, (16, 16))(x) # [None, 34, 34, 32]
pl = MaxPool2D((16, 16))(x) # [None, 2, 2, 32]
img_final = Flatten()(pl) # [None, 128]

# fusion:
fused = tf.concat([averaged, img_final], axis=-1) # [None, 228]

# multi-objectives (each is a multi-class classification)
p_price = Dense(len_price, activation='softmax', name='price')(fused) # [None, 3]
p_type = Dense(len_type, activation='softmax', name='type')(fused) # [None, 24]

# Build the model
model = keras.Model(
    inputs={
        'summary': in_text,
        'image': in_image
    },
    outputs={
        'price': p_price,
        'type': p_type,
    },
    name="image_layers_model_v1"
)

# Compile the model
model.compile(
    optimizer=Adam(),
    # Sparse categorical cross entropy: Use this crossentropy loss function when
    # there are two or more label classes
    loss={
        'price': 'sparse_categorical_crossentropy',
        'type': 'sparse_categorical_crossentropy',
    },
    # coefficients to weight the loss contributions of different model outputs. 
    # The loss value that will be minimized by the model will then be the weighted
    # sum of all individual losses, weighted by the loss_weights coefficients.
    loss_weights={
        'price': 0.5,
        'type': 0.5,       
    },
    # metrics to be evaluated by the model during training and testing
    metrics={
        'price': ['SparseCategoricalAccuracy'],
        'type': ['SparseCategoricalAccuracy'],
    },
)

# Display info about model
model.summary()

## Fusion Layer

Replacing concatenation fusion with different approaches. First reduce mean and then reduce sum.


### Reduce Mean

In [None]:
# Sample model with fusion layer as reduce mean
# Input text layer
in_text = keras.Input(batch_shape=(None, max_len)) # [None, max_len]
# Input image layer
in_image = keras.Input(batch_shape=(None, 64, 64, 2)) # [None, 64, 64, 2]

### TEXT PART ###
embedded = keras.layers.Embedding(tokenizer.num_words, 100)(in_text) # [None, max_len, 100]
averaged = tf.reduce_mean(embedded, axis=1) # [None, 100]
text_final = Dense(64)(averaged) # [None, 64]


### IMAGE PART ###
cov = Conv2D(32, (16, 16))(in_image) # [None, 49, 49, 32]
pl = MaxPool2D((16, 16))(cov) # [None, 3, 3, 32]
flattened = Flatten()(pl) # [None, 288]
img_final = Dense(64)(flattened) # [None, 64]

### FUSION ### 
fused = tf.reduce_mean([text_final, img_final], axis=0) # [None, 64]

# multi-objectives (each is a multi-class classification)
p_price = Dense(len_price, activation='softmax', name='price')(fused) # [None, 3]
p_type = Dense(len_type, activation='softmax', name='type')(fused) # [None, 24]

# Build the model
model = keras.Model(
    inputs={
        'summary': in_text,
        'image': in_image
    },
    outputs={
        'price': p_price,
        'type': p_type,
    },
    name="reducemean_fusion_model"
)

# Compile the model
model.compile(
    optimizer=Adam(),
    # Sparse categorical cross entropy: Use this crossentropy loss function when
    # there are two or more label classes
    loss={
        'price': 'sparse_categorical_crossentropy',
        'type': 'sparse_categorical_crossentropy',
    },
    # coefficients to weight the loss contributions of different model outputs. 
    # The loss value that will be minimized by the model will then be the weighted
    # sum of all individual losses, weighted by the loss_weights coefficients.
    loss_weights={
        'price': 0.5,
        'type': 0.5,       
    },
    # metrics to be evaluated by the model during training and testing
    metrics={
        'price': ['SparseCategoricalAccuracy'],
        'type': ['SparseCategoricalAccuracy'],
    },
)

# Display info about model
model.summary()

### Reduce Sum 

In [None]:
# Sample model with fusion layer changed to reduce sum
# Input text layer
in_text = keras.Input(batch_shape=(None, max_len)) # [None, max_len]
# Input image layer
in_image = keras.Input(batch_shape=(None, 64, 64, 2)) # [None, 64, 64, 2]

### TEXT PART ###
embedded = keras.layers.Embedding(tokenizer.num_words, 100)(in_text) # [None, max_len, 100]
averaged = tf.reduce_mean(embedded, axis=1) # [None, 100]
txt_final = Dense(64)(averaged) # [None, 64]

### IMAGE PART ###
cov = Conv2D(32, (16, 16))(in_image) # [None, 49, 49, 32]
pl = MaxPool2D((16, 16))(cov) # [None, 3, 3, 32]
flattened = Flatten()(pl) # [None, 288]
img_final = Dense(64)(flattened) # [None, 64]

### FUSION ###
fused = tf.reduce_sum([txt_final, img_final], axis=0) # [None, 64]

# multi-objectives (each is a multi-class classification)
p_price = Dense(len_price, activation='softmax', name='price')(fused) # [None, 3]
p_type = Dense(len_type, activation='softmax', name='type')(fused) # [None, 24]

# Build the model
model = keras.Model(
    inputs={
        'summary': in_text,
        'image': in_image
    },
    outputs={
        'price': p_price,
        'type': p_type,
    },
    name="reducesum_fusion_model"
)

# Compile the model
model.compile(
    optimizer=Adam(),
    # Sparse categorical cross entropy: Use this crossentropy loss function when
    # there are two or more label classes
    loss={
        'price': 'sparse_categorical_crossentropy',
        'type': 'sparse_categorical_crossentropy',
    },
    # coefficients to weight the loss contributions of different model outputs. 
    # The loss value that will be minimized by the model will then be the weighted
    # sum of all individual losses, weighted by the loss_weights coefficients.
    loss_weights={
        'price': 0.5,
        'type': 0.5,       
    },
    # metrics to be evaluated by the model during training and testing
    metrics={
        'price': ['SparseCategoricalAccuracy'],
        'type': ['SparseCategoricalAccuracy'],
    },
)

# Display info about model
model.summary()

## Advanced Tokenizer

Swap the original tokenizer with the sentencepiece tokenizer.

In [None]:
# Sample Model - model provided in template from Professor Ding
# Input text layer
in_text = keras.Input(batch_shape=(None, max_len)) # [None, max_len]
# Input image layer
in_image = keras.Input(batch_shape=(None, 64, 64, 2)) # [None, 64, 64, 2]

# text part
embedded = keras.layers.Embedding(vocab_size, 100)(in_text) # [None, max_len, 100]
averaged = tf.reduce_mean(embedded, axis=1) # [None, 100]

# image part
cov = Conv2D(32, (16, 16))(in_image) # [None, 49, 49, 32]
pl = MaxPool2D((16, 16))(cov) # [None, 3, 3, 32]
flattened = Flatten()(pl) # [None, 288]

# fusion:
fused = tf.concat([averaged, flattened], axis=-1) # [None, 388]

# multi-objectives (each is a multi-class classification)
p_price = Dense(len_price, activation='softmax', name='price')(fused) # [None, 3]
p_type = Dense(len_type, activation='softmax', name='type')(fused) # [None, 24]

# Build the model
model = keras.Model(
    inputs={
        'summary': in_text,
        'image': in_image
    },
    outputs={
        'price': p_price,
        'type': p_type,
    },
    name="advancedtokenizer_model"
)

# Compile the model
model.compile(
    optimizer=Adam(),
    # Sparse categorical cross entropy: Use this crossentropy loss function when
    # there are two or more label classes
    loss={
        'price': 'sparse_categorical_crossentropy',
        'type': 'sparse_categorical_crossentropy',
    },
    # coefficients to weight the loss contributions of different model outputs. 
    # The loss value that will be minimized by the model will then be the weighted
    # sum of all individual losses, weighted by the loss_weights coefficients.
    loss_weights={
        'price': 0.5,
        'type': 0.5,       
    },
    # metrics to be evaluated by the model during training and testing
    metrics={
        'price': ['SparseCategoricalAccuracy'],
        'type': ['SparseCategoricalAccuracy'],
    },
)

# Display info about model
model.summary()

## Putting it all together

Combining the model updates from the above sections into one model to tune.

In [None]:
# VERSION 1
# Input text layer
in_text = keras.Input(batch_shape=(None, max_len)) # [None, max_len]
# Input image layer
in_image = keras.Input(batch_shape=(None, 64, 64, 2)) # [None, 64, 64, 2]

### TEXT PART ###
# Keras tokenizer
embedding = Embedding(tokenizer.num_words, 100)(in_text) # [None, max_len, 100]

# Sentencepiece tokenizer
# embedding = Embedding(sp_vocab_size, 100)(in_text) # [None, max_len, 100]

# Replace reduce_mean layer with BiDirectional GRU
query_x = Bidirectional(GRU(32, return_sequences=True))(embedding) # [None, max_len, 64]
value_x = Bidirectional(GRU(32, return_sequences=True))(embedding) # [None, max_len, 64]

# Use Attention to aggregate time dimension
fixed_query_x = tf.expand_dims(query_x[:, -1, :], [1]) # [None, 1, 64]
# Not passing in key since it would be equivalent to value and Attention makes key equivalent by default
query_value_attention_seq = Attention()([fixed_query_x, value_x]) # [None, 1, 64]
# Reducing to 2D for fusion layer
squeezed = tf.squeeze(query_value_attention_seq, [1]) # [None, 64]
text_final = Dense(64)(squeezed) # [None, 64]

### IMAGE PART ###
x = Conv2D(32, (16, 16))(in_image) # [None, 49, 49, 32]
x = Conv2D(32, (16, 16))(x) # [None, 34, 34, 32]
pl = MaxPool2D((16, 16))(x) # [None, 2, 2, 32]
flattened = Flatten()(pl) # [None, 128]
img_final = Dense(64)(flattened) # [None, 64]

### FUSION ### 
# Reduce Mean fusion
fused = tf.reduce_mean([text_final, img_final], axis=0) # [None, 64]

# multi-objectives (each is a multi-class classification)
p_price = Dense(len_price, activation='softmax', name='price')(fused) # [None, 3]
p_type = Dense(len_type, activation='softmax', name='type')(fused) # [None, 24]

# Build the model
model = keras.Model(
    inputs={
        'summary': in_text,
        'image': in_image
    },
    outputs={
        'price': p_price,
        'type': p_type,
    },
    name="keras_alltogether_model1"
    # name="sp_alltogether_model1"
)

# Compile the model
model.compile(
    optimizer=Adam(),
    # Sparse categorical cross entropy: Use this crossentropy loss function when
    # there are two or more label classes
    loss={
        'price': 'sparse_categorical_crossentropy',
        'type': 'sparse_categorical_crossentropy',
    },
    # coefficients to weight the loss contributions of different model outputs. 
    # The loss value that will be minimized by the model will then be the weighted
    # sum of all individual losses, weighted by the loss_weights coefficients.
    loss_weights={
        'price': 0.5,
        'type': 0.5,       
    },
    # metrics to be evaluated by the model during training and testing
    metrics={
        'price': ['SparseCategoricalAccuracy'],
        'type': ['SparseCategoricalAccuracy'],
    },
)

# Display info about model
model.summary()

Model: "keras_alltogether_model1"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            [(None, 100)]        0                                            
__________________________________________________________________________________________________
embedding (Embedding)           (None, 100, 100)     4000000     input_1[0][0]                    
__________________________________________________________________________________________________
bidirectional (Bidirectional)   (None, 100, 64)      25728       embedding[0][0]                  
__________________________________________________________________________________________________
input_2 (InputLayer)            [(None, 64, 64, 2)]  0                                            
___________________________________________________________________________

In [None]:
# VERSION 2
# Input text layer
in_text = keras.Input(batch_shape=(None, max_len)) # [None, max_len]
# Input image layer
in_image = keras.Input(batch_shape=(None, 64, 64, 2)) # [None, 64, 64, 2]

### TEXT PART ###
# Keras tokenizer
embedding = Embedding(tokenizer.num_words, 100)(in_text) # [None, max_len, 100]

# Sentencepiece tokenizer
# embedding = Embedding(sp_vocab_size, 100)(in_text) # [None, max_len, 100]

# Replace reduce_mean layer with BiDirectional GRU
gru = Bidirectional(GRU(16, return_sequences=True))(embedding) # [None, max_len, 32]
query_x = Bidirectional(GRU(8, return_sequences=True))(gru) # [None, max_len, 16]
value_x = Bidirectional(GRU(8, return_sequences=True))(gru) # [None, max_len, 16]

# Use Attention to aggregate time dimension
fixed_query_x = tf.expand_dims(query_x[:, -1, :], [1]) # [None, 1, 16]
# Not passing in key since it would be equivalent to value and Attention makes key equivalent by default
query_value_attention_seq = Attention()([fixed_query_x, value_x]) # [None, 1, 16]
# Reducing to 2D for fusion layer
squeezed = tf.squeeze(query_value_attention_seq, [1]) # [None, 16]
text_final = Dense(16)(squeezed) # [None, 16]

### IMAGE PART ###
x = Conv2D(8, (3, 3), activation='relu', padding='same')(in_image) # [None, 64, 64, 8]
pl = MaxPool2D((2, 2))(x) # [None, 32, 32, 8]
flattened = Flatten()(pl) # [None, 8192]
img_final = Dense(16)(flattened) # [None, 16]

### FUSION ### 
# Reduce Mean fusion
fused = tf.reduce_mean([text_final, img_final], axis=0) # [None, 16]

# multi-objectives (each is a multi-class classification)
p_price = Dense(len_price, activation='softmax', name='price')(fused) # [None, 3]
p_type = Dense(len_type, activation='softmax', name='type')(fused) # [None, 24]

# Build the model
model = keras.Model(
    inputs={
        'summary': in_text,
        'image': in_image
    },
    outputs={
        'price': p_price,
        'type': p_type,
    },
    name="keras_alltogether_model2"
    # name="sp_alltogether_model2"
)

# Compile the model
model.compile(
    optimizer=Adam(),
    # Sparse categorical cross entropy: Use this crossentropy loss function when
    # there are two or more label classes
    loss={
        'price': 'sparse_categorical_crossentropy',
        'type': 'sparse_categorical_crossentropy',
    },
    # coefficients to weight the loss contributions of different model outputs. 
    # The loss value that will be minimized by the model will then be the weighted
    # sum of all individual losses, weighted by the loss_weights coefficients.
    loss_weights={
        'price': 0.5,
        'type': 0.5,       
    },
    # metrics to be evaluated by the model during training and testing
    metrics={
        'price': ['SparseCategoricalAccuracy'],
        'type': ['SparseCategoricalAccuracy'],
    },
)

# Display info about model
model.summary()

Model: "keras_alltogether_model2"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_3 (InputLayer)            [(None, 100)]        0                                            
__________________________________________________________________________________________________
embedding_1 (Embedding)         (None, 100, 100)     4000000     input_3[0][0]                    
__________________________________________________________________________________________________
bidirectional_2 (Bidirectional) (None, 100, 32)      11328       embedding_1[0][0]                
__________________________________________________________________________________________________
bidirectional_3 (Bidirectional) (None, 100, 16)      2016        bidirectional_2[0][0]            
___________________________________________________________________________

In [None]:
# VERSION 3
# Input text layer
in_text = keras.Input(batch_shape=(None, max_len)) # [None, max_len]
# Input image layer
in_image = keras.Input(batch_shape=(None, 64, 64, 2)) # [None, 64, 64, 2]

### TEXT PART ###
# Keras tokenizer
embedding = Embedding(tokenizer.num_words, 100)(in_text) # [None, max_len, 100]

# Sentencepiece tokenizer
# embedding = Embedding(sp_vocab_size, 100)(in_text) # [None, max_len, 100]

# Replace reduce_mean layer with BiDirectional GRU
gru = Bidirectional(GRU(16, return_sequences=True))(embedding) # [None, max_len, 32]
query_x = Bidirectional(GRU(8, return_sequences=True))(gru) # [None, max_len, 16]
value_x = Bidirectional(GRU(8, return_sequences=True))(gru) # [None, max_len, 16]

# Use Attention to aggregate time dimension
fixed_query_x = tf.expand_dims(query_x[:, -1, :], [1]) # [None, 1, 16]
# Not passing in key since it would be equivalent to value and Attention makes key equivalent by default
query_value_attention_seq = Attention()([fixed_query_x, value_x]) # [None, 1, 16]
# Reducing to 2D for fusion layer
squeezed = tf.squeeze(query_value_attention_seq, [1]) # [None, 16]
text_final = Dense(16)(squeezed) # [None, 16]

### IMAGE PART ###
x = Conv2D(8, (3, 3), activation='relu', padding='same')(in_image) # [None, 64, 64, 8]
x = Dropout(0.3)(x) # [None, 64, 64, 8]
x = Conv2D(8, (3, 3), activation='relu', padding='valid')(x) # [None, 62, 62, 8]
x = Dropout(0.3)(x) # [None, 62, 62, 8]
pl = MaxPool2D((2, 2))(x) # [None, 31, 31, 8]
flattened = Flatten()(pl) # [None, 7688]
img_final = Dense(16)(flattened) # [None, 16]

### FUSION ### 
# Reduce Mean fusion
fused = tf.reduce_mean([text_final, img_final], axis=0) # [None, 16]

# multi-objectives (each is a multi-class classification)
p_price = Dense(len_price, activation='softmax', name='price')(fused) # [None, 3]
p_type = Dense(len_type, activation='softmax', name='type')(fused) # [None, 24]

# Build the model
model = keras.Model(
    inputs={
        'summary': in_text,
        'image': in_image
    },
    outputs={
        'price': p_price,
        'type': p_type,
    },
    name="keras_alltogether_model3"
    # name="sp_alltogether_model3"
)

# Compile the model
model.compile(
    optimizer=Adam(),
    # Sparse categorical cross entropy: Use this crossentropy loss function when
    # there are two or more label classes
    loss={
        'price': 'sparse_categorical_crossentropy',
        'type': 'sparse_categorical_crossentropy',
    },
    # coefficients to weight the loss contributions of different model outputs. 
    # The loss value that will be minimized by the model will then be the weighted
    # sum of all individual losses, weighted by the loss_weights coefficients.
    loss_weights={
        'price': 0.5,
        'type': 0.5,       
    },
    # metrics to be evaluated by the model during training and testing
    metrics={
        'price': ['SparseCategoricalAccuracy'],
        'type': ['SparseCategoricalAccuracy'],
    },
)

# Display info about model
model.summary()

Model: "keras_alltogether_model3"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_5 (InputLayer)            [(None, 100)]        0                                            
__________________________________________________________________________________________________
embedding_2 (Embedding)         (None, 100, 100)     4000000     input_5[0][0]                    
__________________________________________________________________________________________________
input_6 (InputLayer)            [(None, 64, 64, 2)]  0                                            
__________________________________________________________________________________________________
bidirectional_5 (Bidirectional) (None, 100, 32)      11328       embedding_2[0][0]                
___________________________________________________________________________

# Model Training

## With template tokenizer:

In [None]:
# Fitting model to training data
print("Training: " + model.name)
epochs = 20             # Number of forward and backward passes to perform through network 
batch_size = 32         # Number of training examples to use in one iteration
history = model.fit(
    x={
        'summary': x_tr_text_id,
        'image': x_tr_image,
    },
    y={
        'price': y_tr_price,
        'type': y_tr_type,
    },
    epochs=epochs,
    batch_size=batch_size,
    validation_data=(
        {
            'summary': x_vl_text_id,
            'image': x_vl_image
         }, 
        {
            'price': y_vl_price,
            'type': y_vl_type,
        }),
    # Early stopping used to try to prevent overfitting
    # Patience stops training after 5 epochs with no improvement in validation loss for price prediction
    # restore_best_weights puts weights back to epoch with best value of val_price_loss
    callbacks=[
        tf.keras.callbacks.EarlyStopping(monitor='val_price_loss', patience=5, restore_best_weights=True)
    ],
    verbose=1
)

In [None]:
# Generate plots to visually evaluate training and validation for price accuracy
plt.plot(history.history['price_sparse_categorical_accuracy'])
plt.plot(history.history['val_price_sparse_categorical_accuracy'])
plt.title('model accuracy - ' + model.name)
plt.ylabel('Price Accuracy')
plt.xlabel('epoch')
plt.legend(['train', 'validation'], loc='upper left')
plt.show()

## With advanced tokenizer:

In [None]:
# Fitting model to training data
print("Training: " + model.name)
epochs = 20             # Number of forward and backward passes to perform through network 
batch_size = 32         # Number of training examples to use in one iteration
history = model.fit(
    x={
        'summary': x_tr_text_sent_id,
        'image': x_tr_image,
    },
    y={
        'price': y_tr_price,
        'type': y_tr_type,
    },
    epochs=epochs,
    batch_size=batch_size,
    validation_data=(
        {
            'summary': x_vl_text_sent_id,
            'image': x_vl_image
         }, 
        {
            'price': y_vl_price,
            'type': y_vl_type,
        }),
    # Early stopping used to try to prevent overfitting
    # Patience stops training after 5 epochs with no improvement in validation loss for price prediction
    # restore_best_weights puts weights back to epoch with best value of val_price_loss
    callbacks=[
        tf.keras.callbacks.EarlyStopping(monitor='val_price_loss', patience=5, restore_best_weights=True)
    ],
    verbose=1
)

In [None]:
# Generate plots to visually evaluate training and validation for price accuracy
plt.plot(history.history['price_sparse_categorical_accuracy'])
plt.plot(history.history['val_price_sparse_categorical_accuracy'])
plt.title('model accuracy - ' + model.name)
plt.ylabel('Price Accuracy')
plt.xlabel('epoch')
plt.legend(['train', 'validation'], loc='upper left')
plt.show()

# Model Testing and Output

In [None]:
# Preprocess test image data
x_test_image = np.array([load_image(i) for i in tqdm(x_test_df.image)])

HBox(children=(FloatProgress(value=0.0, max=7360.0), HTML(value='')))




## With template tokenizer:

In [None]:
# Preprocess test text data using template tokenizer
# loading summary: (force convert some of the non-string cell to string)
x_test_summary = _preprocess(x_test_df.summary.astype(str))

# Run test data through trained network
y_predict = model.predict(
    {
        'summary': x_test_summary,
        'image': x_test_image
    }
)

price_predicted = y_predict['price']
print(price_predicted)
price_category_predicted = np.argmax(price_predicted, axis=1)
print(price_category_predicted)

# Setup file name as "modelname_epochcount_batchsize.csv" (ex "model1_10_16.csv") in my Results folder on Google Drive
output_name = "drive/MyDrive/cisc873-dm-f20-a4/Results/" + model.name + "_" + str(history.epoch[-1]) + "_" + str(batch_size) + ".csv"

pd.DataFrame(
    {'id': x_test_df.id,
     'price': price_category_predicted}).to_csv(output_name, index=False)

## With advanced tokenizer:

In [None]:
# Using sentencepiece tokenizer
# loading summary: (force convert some of the non-string cell to string) 
# Convert test text from Series to list for sp encoding
x_test_sent_summary = sentence_preprocess(x_test_df.summary.astype(str).tolist())

# Run test data through trained network
y_predict = model.predict(
    {
        'summary': x_test_sent_summary,
        'image': x_test_image
    }
)

price_predicted = y_predict['price']
print(price_predicted)
price_category_predicted = np.argmax(price_predicted, axis=1)
print(price_category_predicted)

output_name = "drive/MyDrive/cisc873-dm-f20-a4/Results/" + model.name + "_sent_token_" + str(history.epoch[-1]) + "_" + str(batch_size) + ".csv"

pd.DataFrame(
    {'id': x_test_df.id,
     'price': price_category_predicted}).to_csv(output_name, index=False)