In [1]:
import os
import glob
import pandas as pd
import numpy as np
from sklearn.utils import shuffle
import json
from keras.preprocessing import sequence, image
from keras.preprocessing.image import array_to_img, save_img, img_to_array
from sklearn.preprocessing import MultiLabelBinarizer

from keras.layers import (
    Flatten,
    Dense,
    Input,
    Activation,
    BatchNormalization,
    Conv2D,
    MaxPool2D,
    Dropout,
    UpSampling2D,
    Lambda,
)

from keras.layers import ReLU, Reshape, Conv2DTranspose, Concatenate, Multiply
from keras.models import Model

from keras.optimizers import Adam
from keras.losses import binary_crossentropy, categorical_crossentropy
from keras import backend as K

from keras.callbacks import ModelCheckpoint, EarlyStopping
from collections import Counter

from evaluation_metrics.multilabel.example_based import (
    hamming_loss,
    example_based_accuracy,
    example_based_precision,
    example_based_recall,
)

from evaluation_metrics.multilabel.label_based import (
    accuracy_macro,
    precision_macro,
    recall_macro,
    accuracy_micro,
    precision_micro,
    recall_micro,
)

from evaluation_metrics.multilabel.alpha_score import alpha_score
from data_loading.load_data import get_tile_data

%load_ext nb_black

<IPython.core.display.Javascript object>

In [4]:
##loading train and testing data

data_directory = "../data/context_data/"
json_directory = "../data/json_files_trimmed_features/"
data = get_tile_data(data_directory, json_directory)
print("\nThe size of total data is", data.shape)
data = shuffle(data)

# split into train-test
from sklearn.model_selection import train_test_split

train_data, test_data = train_test_split(data, test_size=0.10, random_state=42)

print("\nThe size of the train data is ", train_data.shape)
print("The size of the test data is ", test_data.shape)

Games detected in the parent folder ['lode_runner', 'kid_icarus', 'megaman', 'smb', 'loz']
Current Game lode_runner
Reading mappings
Json File Loaded
Reading Sprite Data From ../data/context_data/lode_runner
Current Game kid_icarus
Reading mappings
Json File Loaded
Reading Sprite Data From ../data/context_data/kid_icarus
Current Game megaman
Reading mappings
Json File Loaded
Reading Sprite Data From ../data/context_data/megaman
Current Game smb
Reading mappings
Json File Loaded
Reading Sprite Data From ../data/context_data/smb
Current Game loz
Reading mappings
Json File Loaded
Reading Sprite Data From ../data/context_data/loz

The size of total data is (25394, 5)

The size of the train data is  (22854, 5)
The size of the test data is  (2540, 5)


<IPython.core.display.Javascript object>

In [3]:
# Feature Dictionary
print("Building feature Dictionary..")
mlb = MultiLabelBinarizer()
combined_features = np.concatenate(
    [train_data["features"], test_data["features"]], axis=0
)
mlb_model = mlb.fit(combined_features)
total_features = len(mlb_model.classes_)
print("The feature dictionary has size", total_features)
print("Printing Feature classes")
display(mlb_model.classes_)

Building feature Dictionary..
The feature dictionary has size 13
Printing Feature classes


array(['block', 'breakable', 'climbable', 'collectable', 'element',
       'empty', 'hazard', 'moving', 'openable', 'passable', 'pipe',
       'solid', 'wall'], dtype=object)

<IPython.core.display.Javascript object>

In [4]:
# Build Input Output Training Batches
print("Building Training Batches")

"""Note : Add Generators"""
train_image_batch = []
for train_path in train_data["image_path"]:
    tile = image.load_img(train_path, target_size=(48, 48))
    tile_sprite = image.img_to_array(tile)
    train_image_batch.append(tile_sprite)
train_image_batch = np.array(train_image_batch)
train_text_batch = []
for i in range(len(train_data["features"])):
    text_ = mlb.transform(train_data["features"][i : i + 1])
    train_text_batch.append(text_)
train_text_batch = np.array(train_text_batch).reshape(
    train_data.shape[0], total_features
)

output_image_batch = []
for i in range(len(train_image_batch)):
    current_image = train_image_batch[i]
    current_image_centre = train_image_batch[i][16 : 16 + 16, 16 : 16 + 16, :]
    output_image_batch.append(current_image_centre)
output_image_batch = np.array(output_image_batch)
output_text_batch = []
for i in range(len(train_text_batch)):
    current_text = train_text_batch[i]
    output_text_batch.append(current_text)
output_text_batch = np.array(output_text_batch)
print("Training Data Ready")
print("Train Image batch shape", train_image_batch.shape)
print("Train Text batch shape", train_text_batch.shape)
print("Output Image batch shape", output_image_batch.shape)
print("Output Text batch shape", output_text_batch.shape)


# Build Input Output Test Batches
print("Building Testing Batches")
"""Note : Add Generators"""
test_image_batch = []
for test_path in test_data["image_path"]:
    tile = image.load_img(test_path, target_size=(48, 48))
    tile_sprite = image.img_to_array(tile)
    test_image_batch.append(tile_sprite)
test_image_batch = np.array(test_image_batch)
test_text_batch = []
for i in range(len(test_data["features"])):
    text_ = mlb.transform(test_data["features"][i : i + 1])
    test_text_batch.append(text_)
test_text_batch = np.array(test_text_batch).reshape(test_data.shape[0], total_features)
print("\n\nTesting Data Ready")
print("Train Image batch shape", test_image_batch.shape)
print("Train Text batch shape", test_text_batch.shape)

Building Training Batches
Training Data Ready
Train Image batch shape (22854, 48, 48, 3)
Train Text batch shape (22854, 13)


Output Image batch shape (22854, 16, 16, 3)
Output Text batch shape (22854, 13)
Building Testing Batches


Testing Data Ready
Train Image batch shape (2540, 48, 48, 3)
Train Text batch shape (2540, 13)


<IPython.core.display.Javascript object>

In [12]:
# model definition

latent_dim = 128
batch_size = 1

# image encoder
image_encoder_input = Input(shape=(48, 48, 3), name="image_input")

image_encoder_conv_layer1 = Conv2D(
    32, strides=3, kernel_size=(3, 3), name="iencode_conv1"
)(image_encoder_input)
image_encoder_norm_layer1 = BatchNormalization()(image_encoder_conv_layer1)
image_encoder_actv_layer1 = ReLU()(image_encoder_norm_layer1)

image_encoder_conv_layer2 = Conv2D(32, (3, 3), padding="same", name="iencode_conv2")(
    image_encoder_actv_layer1
)
image_encoder_norm_layer2 = BatchNormalization()(image_encoder_conv_layer2)
image_encoder_actv_layer2 = ReLU()(image_encoder_norm_layer2)

image_encoder_conv_layer3 = Conv2D(16, (3, 3), padding="same", name="iencode_conv3")(
    image_encoder_actv_layer2
)
image_encoder_norm_layer3 = BatchNormalization()(image_encoder_conv_layer3)
image_encoder_actv_layer3 = ReLU()(image_encoder_norm_layer3)

image_shape_before_flatten = K.int_shape(image_encoder_actv_layer3)[1:]
image_flatten = Flatten(name="image_flatten_layer")(image_encoder_actv_layer3)


# text encoder
text_encoder_input = Input(shape=(13,))

text_encoder_dense_layer1 = Dense(32, activation="tanh", name="tencode_dense1")(
    text_encoder_input
)
text_encoder_dense_layer2 = Dense(16, activation="tanh", name="tencode_dense2")(
    text_encoder_dense_layer1
)
text_shape_before_concat = K.int_shape(text_encoder_dense_layer2)[1:]

# image-text concatenation
image_text_concat = Concatenate(name="image_text_concatenation")(
    [image_flatten, text_encoder_dense_layer2]
)

image_text_concat = Dense(256, activation="tanh", name="embedding_dense_1")(
    image_text_concat
)


##
encoding_model = Model(
    inputs=[image_encoder_input, text_encoder_input], outputs=image_text_concat
)

# decoder for image

# decoder_input=Input(shape=(512,))

image_y = Dense(units=np.prod(image_shape_before_flatten), name="image_dense")(
    image_text_concat
)
image_y = Reshape(target_shape=image_shape_before_flatten, name="image_reshape")(
    image_y
)

image_decoder_convt_layer1 = Conv2DTranspose(
    16, (3, 3), padding="same", name="idecode_conv1"
)(image_y)
image_decoder_norm_layer1 = BatchNormalization(name="idecode_norm1")(
    image_decoder_convt_layer1
)
image_decoder_actv_layer1 = ReLU(name="idecode_relu1")(image_decoder_norm_layer1)


image_decoder_convt_layer2 = Conv2DTranspose(
    32, (3, 3), padding="same", name="idecode_conv2"
)(image_decoder_actv_layer1)
image_decoder_norm_layer2 = BatchNormalization(name="idecode_norm2")(
    image_decoder_convt_layer2
)
image_decoder_actv_layer2 = ReLU(name="idecode_relu2")(image_decoder_norm_layer2)

image_decoder_output = Conv2DTranspose(
    3, (3, 3), padding="same", name="image_output_layer"
)(image_decoder_actv_layer2)


# decoder for text

text_decoder_dense_layer1 = Dense(16, activation="tanh", name="tdecode_dense1")(
    image_text_concat
)
text_reshape = Reshape(target_shape=text_shape_before_concat, name="text_reshape")(
    text_decoder_dense_layer1
)
text_decoder_dense_layer2 = Dense(32, activation="tanh", name="tdecode_dense2")(
    text_reshape
)

text_decoder_output = Dense(13, activation="sigmoid", name="text_output_layer")(
    text_decoder_dense_layer2
)


# decoding_model=Model(inputs=[decoder_input],outputs=[image_decoder_output,text_decoder_output])


ae_sep_output = Model(
    [image_encoder_input, text_encoder_input],
    [image_decoder_output, text_decoder_output],
)

<IPython.core.display.Javascript object>

In [13]:
ae_sep_output.get_layer("tdecode_dense1")
print(ae_sep_output.summary())

Model: "model_5"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
image_input (InputLayer)        [(None, 48, 48, 3)]  0                                            
__________________________________________________________________________________________________
iencode_conv1 (Conv2D)          (None, 16, 16, 32)   896         image_input[0][0]                
__________________________________________________________________________________________________
batch_normalization_6 (BatchNor (None, 16, 16, 32)   128         iencode_conv1[0][0]              
__________________________________________________________________________________________________
re_lu_6 (ReLU)                  (None, 16, 16, 32)   0           batch_normalization_6[0][0]      
____________________________________________________________________________________________

<IPython.core.display.Javascript object>

In [14]:
from sklearn.feature_extraction.text import TfidfVectorizer
from ast import literal_eval
import tensorflow as tf

vectorizer = TfidfVectorizer(stop_words=None)
train_data_copy = train_data
train_data_copy["features"] = train_data_copy.features.apply(lambda x: str(x))
vectors = vectorizer.fit_transform(train_data_copy["features"])

idf = vectorizer.idf_

# build the weight dictionary
new_dict = {}
for c in mlb.classes_:
    if c in vectorizer.vocabulary_.keys():
        new_dict[c] = idf[vectorizer.vocabulary_[c]]
    else:
        new_dict[c] = np.max(idf)
print("\n Printing the TF-IDF for the labels\n\n", new_dict)


weight_freq = {k: v / sum(new_dict.values()) for k, v in new_dict.items()}

print("\nPrinting the weight normalised\n\n")
print(weight_freq)

weight_vector = [v * 1000 for v in new_dict.values()]

tensor_from_list = tf.convert_to_tensor(weight_vector)
tensor_from_list = K.cast(tensor_from_list, "float32")

print("Weight Vector")
print(weight_vector)


 Printing the TF-IDF for the labels

 {'block': 5.102030995146257, 'breakable': 2.6009420547751505, 'climbable': 2.770503717781291, 'collectable': 4.106430424814218, 'element': 5.581604075408143, 'empty': 2.0879495829240695, 'hazard': 4.798600565726337, 'moving': 6.177112786404172, 'openable': 6.382964840608321, 'passable': 1.6016029582018123, 'pipe': 6.441805340631255, 'solid': 1.830392057132616, 'wall': 5.243911582381701}

Printing the weight normalised


{'block': 0.0932289019467309, 'breakable': 0.04752675395826563, 'climbable': 0.05062513726275023, 'collectable': 0.07503639232891463, 'element': 0.10199209286394827, 'empty': 0.038152893841948456, 'hazard': 0.08768434806633386, 'moving': 0.11287376396290665, 'openable': 0.11663527795511663, 'passable': 0.029265930624459206, 'pipe': 0.11771046452539688, 'solid': 0.033446570940245716, 'wall': 0.09582147172298304}
Weight Vector
[5102.030995146257, 2600.9420547751506, 2770.503717781291, 4106.430424814218, 5581.604075408143, 2087.949582

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  import sys


<IPython.core.display.Javascript object>

In [15]:
def loss_func1(y_true, y_pred):
    # tile sprite loss
    r_loss=K.mean(K.square(y_true - y_pred), axis=[1,2,3])
    loss  =  r_loss
    return loss

    
def loss_func4(y_true,y_pred):
    # multilabel text weighted bce
    y_true = K.cast(y_true, 'float32')
    y_pred = K.cast(y_pred, 'float32')
    bce_array=-(y_true*K.log(y_pred)+(1-y_true)*K.log(1-y_pred))
    weighted_array=bce_array*tensor_from_list
    bce_sum=K.sum(weighted_array,axis=1)
    loss=bce_sum/13.0
    return loss

    
losses ={'image_output_layer':loss_func1,
          'text_output_layer':loss_func4,
}


#tweak loss weights
lossWeights={'image_output_layer':0.1,
          'text_output_layer':0.9  
        }


def check_nonzero(y_true,y_pred):
    """
    Custom metric
    Returns sum of all embeddings
    """
    return(K.sum(K.cast(y_pred > 0.4, 'int32')))

accuracy={
    'image_output_layer':loss_func1,
    'text_output_layer': check_nonzero
}


<IPython.core.display.Javascript object>

In [16]:
from keras import metrics

ae_sep_output.compile(
    optimizer="adam", loss=losses, loss_weights=lossWeights, metrics=accuracy
)

<IPython.core.display.Javascript object>

In [17]:
# with loss func 2 that is by using in built cross-entropy loss

es = EarlyStopping(
    monitor="val_text_output_layer_loss", mode="min", verbose=1, patience=2
)

ae_history = ae_sep_output.fit(
    [train_image_batch, train_text_batch],
    [output_image_batch, output_text_batch],
    epochs=10,
    batch_size=25,
    shuffle=True,
    validation_split=0.2,
    callbacks=[es],
)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<IPython.core.display.Javascript object>

In [18]:
# build the inference model

decoder_input = Input(shape=(256,))

d_dense = ae_sep_output.get_layer("image_dense")(decoder_input)
d_reshape = ae_sep_output.get_layer("image_reshape")(d_dense)
d_conv1 = ae_sep_output.get_layer("idecode_conv1")(d_reshape)
d_norm1 = ae_sep_output.get_layer("idecode_norm1")(d_conv1)
d_relu1 = ae_sep_output.get_layer("idecode_relu1")(d_norm1)
d_conv2 = ae_sep_output.get_layer("idecode_conv2")(d_relu1)
d_norm2 = ae_sep_output.get_layer("idecode_norm2")(d_conv2)
d_relu2 = ae_sep_output.get_layer("idecode_relu2")(d_norm2)
d_image_output = ae_sep_output.get_layer("image_output_layer")(d_relu2)

t_dense = ae_sep_output.get_layer("tdecode_dense1")(decoder_input)
t_reshape = ae_sep_output.get_layer("text_reshape")(t_dense)
t_dense2 = ae_sep_output.get_layer("tdecode_dense2")(t_reshape)
d_text_output = ae_sep_output.get_layer("text_output_layer")(t_dense2)

decoder_model = Model(inputs=[decoder_input], outputs=[d_image_output, d_text_output])

decoder_model.summary()

Model: "model_6"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_4 (InputLayer)            [(None, 256)]        0                                            
__________________________________________________________________________________________________
image_dense (Dense)             (None, 4096)         1052672     input_4[0][0]                    
__________________________________________________________________________________________________
image_reshape (Reshape)         (None, 16, 16, 16)   0           image_dense[1][0]                
__________________________________________________________________________________________________
idecode_conv1 (Conv2DTranspose) (None, 16, 16, 16)   2320        image_reshape[1][0]              
____________________________________________________________________________________________

<IPython.core.display.Javascript object>

In [34]:
## save model weights, multilabel binarizer

import pickle

# saving
with open("model_tokenizer.pickle", "wb") as handle:
    pickle.dump(mlb, handle, protocol=pickle.HIGHEST_PROTOCOL)

## saving the entire architecture model
model_json = ae_sep_output.to_json()
with open("autoencoder_model.json", "w") as json_file:
    json_file.write(model_json)
# serialize weights to HDF5
ae_sep_output.save_weights("autoencoder_model.h5")
print("Saved Entire Model to disk")

## saving the encoder part
model_json = encoding_model.to_json()
with open("encoder_model.json", "w") as json_file:
    json_file.write(model_json)
# serialize weights to HDF5
encoding_model.save_weights("encoder_model.h5")
print("Saved Encoder Model to disk")
## saving the encoder part

model_json = decoder_model.to_json()
with open("decoder_model.json", "w") as json_file:
    json_file.write(model_json)
# serialize weights to HDF5
decoder_model.save_weights("decoder_model.h5")
print("Saved Decoder Model to disk")

Saved Entire Model to disk
Saved Encoder Model to disk
Saved Decoder Model to disk


<IPython.core.display.Javascript object>

In [24]:
predicted_image, predicted_text = ae_sep_output.predict(
    [test_image_batch, test_text_batch]
)
y_pred = [np.where(text > 0.5, 1, 0) for text in predicted_text]
y_pred = np.array(y_pred)
print("Predicted Y is Ready. Shape : ", y_pred.shape)

y_true = test_text_batch
y_true = np.array(y_true)
print("True Y is Ready. Shape :", y_true.shape)

Predicted Y is Ready. Shape :  (2540, 13)
True Y is Ready. Shape : (2540, 13)


<IPython.core.display.Javascript object>

In [25]:
true_image = []
for i in range(len(test_image_batch)):
    current_image = test_image_batch[i]
    current_image_centre = test_image_batch[i][16 : 16 + 16, 16 : 16 + 16, :]
    true_image.append(current_image_centre)
true_image = np.array(true_image)
print("Predicted Array shape ", predicted_image.shape)
print("True Array shape ", true_image.shape)

mse_dist = []
for idx in range(len(true_image)):
    y_true_image = true_image[idx]
    y_true_image = y_true_image.reshape(16, 16, 3)

    y_pred_image = predicted_image[idx]
    y_pred_image = y_pred_image.reshape(16, 16, 3)

    mse_dist.append(np.mean(np.subtract(y_true_image, y_pred_image) ** 2))

print("Mean MSE", np.mean(mse_dist))
print("Median MSE", np.median(mse_dist))

Predicted Array shape  (2540, 16, 16, 3)
True Array shape  (2540, 16, 16, 3)
Mean MSE 249.02893
Median MSE 35.89482


<IPython.core.display.Javascript object>

In [26]:
def valid_divide(num, den):
    count = 0
    result = {}
    for idx in range(len(num)):
        if num[idx] == den[idx] == 0:
            continue
        elif num[idx] != 0 and den[idx] != 0:
            result[idx] = num[idx] / den[idx]
            count += 1
        elif num[idx] != 0 and den[idx] == 0 or num[idx] == 0 and den[idx] != 0:
            count += 1
            result[idx] = 0.0
    return result, count

<IPython.core.display.Javascript object>

In [33]:
print("\nMacro Label Based Precision", precision_macro(y_true, y_pred))
print("Macro Label Based Recall", recall_macro(y_true, y_pred))
print("Macro Label Based Accuracy", accuracy_macro(y_true, y_pred))

print("\nMicro Label Based Precision", precision_micro(y_true, y_pred))
print("Micro Label Based Recall", recall_micro(y_true, y_pred))
print("Micro Label Based Accuracy", accuracy_micro(y_true, y_pred))

print("\nExample Based Precision", example_based_precision(y_true, y_pred))
print("Example Based Recall", example_based_recall(y_true, y_pred))
print("Example Based Accuracy", example_based_accuracy(y_true, y_pred))


Macro Label Based Precision 0.999739654499619
Macro Label Based Recall 0.993341686330762
Macro Label Based Accuracy 0.9930819063263079

Micro Label Based Precision 0.9991300565463245
Micro Label Based Recall 0.9978279756733276
Micro Label Based Accuracy 0.9969618055555556

Example Based Precision 0.9988188976377953
Example Based Recall 0.9983595800524934
Example Based Accuracy 0.9980971128608923


<IPython.core.display.Javascript object>