# Installing and importing Dependencies

In [None]:
!pip install easy-vqa

In [None]:
import numpy as np
import pandas as pd
import os
import tensorflow as tf
from tensorflow import keras
from easy_vqa import get_train_questions, get_test_questions
from easy_vqa import get_train_image_paths, get_test_image_paths
from matplotlib import pyplot as plt

# Data Loading and Preprocessing

In [None]:
from tensorflow.keras.preprocessing.image import load_img, img_to_array
from tensorflow.keras.utils import image_dataset_from_directory

In [None]:
train_questions, train_answers, train_image_ids = get_train_questions()
test_questions, test_answers, test_image_ids = get_test_questions()

In [None]:
print(train_questions[0]) 
print(train_answers[0])   
print(train_image_ids[0]) 

In [None]:
train_image_paths = get_train_image_paths()
test_image_paths = get_test_image_paths()

print(train_image_paths[1])

In [None]:
img = load_img(train_image_paths[1])
img

## Normalizing image

In [None]:
def normalize_img(image_path):
    img = img_to_array(load_img(image_path))
    return img / 255.0

In [None]:
def read_images(paths):
  # paths is a dict mapping image ID to image path
  # Returns a dict mapping image ID to the processed image
  ims = {}
  for image_id, image_path in paths.items():
    ims[image_id] = normalize_img(image_path)
  return ims


In [None]:
train_imgs = read_images(train_image_paths)
test_imgs = read_images(test_image_paths)

In [None]:
train_imgs[0]

In [None]:
train_imgs[0].shape  # 64 x 64 x 3 image

In [None]:
print(len(train_image_ids))
print(len(train_imgs))

In [None]:
# Adjusting train and test images in order of train and test questions

xtrain = np.asarray([train_imgs[id] for id in train_image_ids])
xtest = np.asarray([test_imgs[id] for id in test_image_ids])

In [None]:
xtrain.shape

In [None]:
type(xtrain)

## Converting answers into one hot encoded vectors

In [None]:
from easy_vqa import get_answers
from tensorflow.keras.utils import to_categorical

In [None]:
answers_list = get_answers()
answers_list

In [None]:
trainA = [answers_list.index(id) for id in train_answers]
testA = [answers_list.index(id) for id in test_answers]

In [None]:
print(trainA[100])
print(train_answers[100])

In [None]:
ytrain = to_categorical(trainA)
ytest = to_categorical(testA)

print(ytrain[100])

# Building a Model

In [None]:
from tensorflow.keras.layers import Input, Conv2D, MaxPooling2D, Flatten, BatchNormalization, Dense, Dropout

## Image model

In [None]:
ip = Input(shape=(64, 64, 3))

x = Conv2D(32, kernel_size=(3, 3), padding='same', activation='relu')(ip)
x = BatchNormalization()(x)
x = MaxPooling2D(pool_size = (2, 2))(x)
x = Dropout(0.3)(x)

x = Conv2D(64, kernel_size=(3, 3), padding='same', activation='relu')(x)
x = BatchNormalization()(x)
x = MaxPooling2D(pool_size = (2, 2))(x)
x = Dropout(0.3)(x)

x = Flatten()(x)

img_op = Dense(32, activation = 'relu')(x)

## Tokenizing texts

In [None]:
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences

In [None]:
train_unique_words = []
train_unique_words = set(train_unique_words)

for idx in range(len(train_questions)):
    sentence = train_questions[idx].split()
    for word in sentence:
        train_unique_words.add(word)
        
print(len(train_unique_words))

In [None]:
train_unique_words

In [None]:
test_unique_words = []
test_unique_words = set(test_unique_words)

for idx in range(len(test_questions)):
    sentence = train_questions[idx].split()
    for word in sentence:
        test_unique_words.add(word)
        
print(len(test_unique_words))

In [None]:
test_unique_words  #same number of unique words in train and test questions

In [None]:
tokenizer = Tokenizer(num_words = 35, oov_token = '<OOV>')
tokenizer.fit_on_texts(train_questions)

In [None]:
word_index = tokenizer.word_index
word_index

In [None]:
train_seq = tokenizer.texts_to_sequences(train_questions)
test_seq = tokenizer.texts_to_sequences(test_questions)

In [None]:
print(train_seq[0])
print(train_questions[0])

In [None]:
padded_train_seq = pad_sequences(train_seq, padding = 'post')
padded_test_seq = pad_sequences(test_seq, padding = 'post')

In [None]:
len(padded_train_seq[69]) # length of the input sequence

In [None]:
padded_train_seq.shape

In [None]:
type(padded_train_seq)

In [None]:
vocab_size = len(word_index) + 1
vocab_size

## Texts model

In [None]:
from tensorflow.keras.layers import Embedding, LSTM, Dense, Input
from tensorflow.keras.layers import Multiply

In [None]:
# t_input = Embedding(vocab_size, 64, input_length = 9)
# y = LSTM(64)(t_input)
# t_op = Dense(32, activation = 'relu')(y)

t_input = Input(shape = (9, ))
y = Embedding(vocab_size, 64)(t_input)
y = LSTM(64)(y)
t_op = Dense(32, activation = 'relu')(y)

In [None]:
output = Multiply()([img_op, t_op])
output = Dense(32, activation = 'relu')(output)
output = Dense(13, activation = 'softmax')(output)

## Combining the image and text model

In [None]:
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam

In [None]:
model = Model(inputs = [ip, t_input], outputs = output)

In [None]:
model.compile(optimizer = Adam(learning_rate = 5e-4), loss = 'categorical_crossentropy', metrics = ['accuracy'])

In [None]:
model.summary()

## Training the Model

In [None]:
from tensorflow.keras.callbacks import ModelCheckpoint, ReduceLROnPlateau

checkpoint = ModelCheckpoint('model.h5', save_best_only=True)
reduce_lr = ReduceLROnPlateau(
    monitor = 'val_loss',
    factor = 0.2,
    patience = 3,
    min_lr = 1e-5
)

callbacks = [checkpoint, reduce_lr]

In [None]:
history = model.fit(
    [xtrain, padded_train_seq],
    ytrain,
    validation_data=([xtest, padded_test_seq], ytest),
    batch_size = 64,
    shuffle = True,
    epochs=30,
    callbacks = callbacks,
)

In [None]:
# list all data in history
print(history.history.keys())
# summarize history for accuracy
plt.plot(history.history['accuracy'])
plt.plot(history.history['val_accuracy'])
plt.title('model accuracy')
plt.ylabel('accuracy')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='upper left')
plt.show()
# summarize history for loss
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('model loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='upper left')
plt.show()