<a href="https://colab.research.google.com/github/rllevy/MMAI-Bae/blob/main/model_lstm_vgg19.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install torch torchvision transformers
!pip install tqdm



In [None]:
# Basic data handling libraries
import pandas as pd
import numpy as np
import os
from os import path
import json
import re
import random
import datetime
import time
import math
import pickle
import joblib
from tqdm import tqdm

# Image processing libraries
from PIL import Image
import cv2
import imgaug.augmenters as iaa

# Visualization libraries
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline

# Ignore warnings
import warnings
warnings.filterwarnings("ignore")

# Machine Learning and Deep Learning libraries
from sklearn import preprocessing
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split
from sklearn.utils import shuffle

# TensorFlow and Keras libraries
import tensorflow as tf
from tensorflow import keras
from keras.layers import (Input, Dense, LSTM, Flatten, Dropout, concatenate,
                          Conv1D, MaxPooling2D, Activation, BatchNormalization, Embedding)
from tensorflow.keras import initializers, regularizers
from tensorflow.keras.models import Model
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.preprocessing import text, sequence
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.preprocessing.image import load_img, img_to_array
from tensorflow.keras.applications.vgg16 import VGG16, preprocess_input
from tensorflow.keras.callbacks import ModelCheckpoint
import tensorflow_hub as hub


from tensorflow.keras.preprocessing.text import Tokenizer


In [None]:
from google.colab import drive
drive.mount('/content/drive')
data_path = '/content/drive/My Drive/Colab Notebooks/'

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
# Load data
def load_data(data_file, feature):

  # Check if the file exists
  if os.path.exists(data_file):
    print("File found:", data_file)
    with open(data_file, 'r') as f:
        data = json.load(f)
    data = pd.json_normalize(data[feature])
    print("Data loaded successfully")
  else:
    print("File not found:", data_file)

  return data
train_questions_file = os.path.join(data_path, '/content/drive/MyDrive/VQA assignment/MultipleChoice_abstract_v002_train2015_questions.json')
train_questions_feature = 'questions'
train_questions = load_data(train_questions_file, train_questions_feature )


train_annotations_file = os.path.join(data_path, '/content/drive/MyDrive/VQA assignment/abstract_v002_train2015_annotations.json')
train_annotations_feature = 'annotations'
train_annotations = load_data(train_annotations_file, train_annotations_feature)


train_captions_file = os.path.join(data_path, '/content/drive/MyDrive/VQA assignment/captions_abstract_v002_train2015.json')
train_captions_feature = 'images'
train_captions = load_data(train_captions_file, train_captions_feature)

train_OpenEnded_file = os.path.join(data_path, '/content/drive/MyDrive/VQA assignment/OpenEnded_abstract_v002_train2015_questions.json')
train_OpenEnded_feature = 'questions'
train_OpenEnded = load_data(train_OpenEnded_file, train_OpenEnded_feature)

File found: /content/drive/MyDrive/VQA assignment/MultipleChoice_abstract_v002_train2015_questions.json
Data loaded successfully
File found: /content/drive/MyDrive/VQA assignment/abstract_v002_train2015_annotations.json
Data loaded successfully
File found: /content/drive/MyDrive/VQA assignment/captions_abstract_v002_train2015.json
Data loaded successfully
File found: /content/drive/MyDrive/VQA assignment/OpenEnded_abstract_v002_train2015_questions.json
Data loaded successfully


In [None]:
#train_data = pd.merge(train_questions, train_annotations, on='question_id')
df_train = pd.merge(train_questions, train_annotations, on=["image_id", "question_id"])

df_train

Unnamed: 0,image_id,question,multiple_choices,question_id,question_type,multiple_choice_answer,answers,answer_type
0,11779,Who looks happier?,"[alive, 1, woman, purple, 2, yes, white, boy, ...",117792,who,man,"[{'answer': 'old person', 'answer_confidence':...",other
1,11779,Where is the woman sitting?,"[3, no, blue, red, 1, slide, monkey bars, jump...",117790,where is the,blanket,"[{'answer': 'on blanket', 'answer_confidence':...",other
2,11779,Where is the man sitting?,"[away, yes, blue, 1, 2, mouse, couch, no, yell...",117791,where is the,bench,"[{'answer': 'on bench', 'answer_confidence': '...",other
3,5536,Is this man hungry?,"[water, yellow, 4, running, blue, pouring, out...",55360,is this,yes,"[{'answer': 'yes', 'answer_confidence': 'yes',...",yes/no
4,5536,What kind of drink is that?,"[wine, girl would fall, soda, white, yes, coke...",55361,what kind of,soda,"[{'answer': 'water', 'answer_confidence': 'no'...",other
...,...,...,...,...,...,...,...,...
59995,11695,What color is the log the little girl sitting on?,"[tan, brown, white, 1, running, red, 4, pink, ...",116950,what color is the,brown,"[{'answer': 'brown', 'answer_confidence': 'yes...",other
59996,11695,Does the boy want to go on the seesaw?,"[red, no table, fetch, kicking soccer ball, ye...",116951,does the,yes,"[{'answer': 'yes', 'answer_confidence': 'yes',...",yes/no
59997,13790,What animal is between the two men?,"[dog, 4, yellow, 1, collectors, no, red, yes, ...",137900,what animal is,cat,"[{'answer': 'cat', 'answer_confidence': 'yes',...",other
59998,13790,What food is by the picnic basket?,"[4, she is happy, grapes, pizza, white, becaus...",137901,what,watermelon,"[{'answer': 'watermelon, sandwich, burger', 'a...",other


In [None]:
mscoco_path = "/content/drive/MyDrive/VQA assignment/scene_img_abstract_v002_train2015"
image_path = list(df_train['image_id'])
question = list(df_train['question'])
answer = list(df_train['multiple_choice_answer'])

In [None]:
def decontractions(phrase):
    phrase = re.sub(r"won\'t", "will not", phrase)
    phrase = re.sub(r"can\'t", "can not", phrase)
    phrase = re.sub(r"won\’t", "will not", phrase)
    phrase = re.sub(r"can\’t", "can not", phrase)

    phrase = re.sub(r"he\'s", "he is", phrase)
    phrase = re.sub(r"she\'s", "she is", phrase)
    phrase = re.sub(r"it\'s", "it is", phrase)

    phrase = re.sub(r"he\’s", "he is", phrase)
    phrase = re.sub(r"she\’s", "she is", phrase)
    phrase = re.sub(r"it\’s", "it is", phrase)

    # general
    phrase = re.sub(r"n\'t", " not", phrase)
    phrase = re.sub(r"\'re", " are", phrase)
    phrase = re.sub(r"\'d", " would", phrase)
    phrase = re.sub(r"\'ll", " will", phrase)
    phrase = re.sub(r"\'t", " not", phrase)
    phrase = re.sub(r"\'ve", " have", phrase)
    phrase = re.sub(r"\'m", " am", phrase)

    phrase = re.sub(r"n\’t", " not", phrase)
    phrase = re.sub(r"\’re", " are", phrase)
    phrase = re.sub(r"\’d", " would", phrase)
    phrase = re.sub(r"\’ll", " will", phrase)
    phrase = re.sub(r"\’t", " not", phrase)
    phrase = re.sub(r"\’ve", " have", phrase)
    phrase = re.sub(r"\’m", " am", phrase)

    return phrase


def text_preprocess(text):
    text = text.lower()
    text = decontractions(text) # replace contractions into natural form
    text = re.sub('[-,:]', ' ', text) # replace the character "-" "," with space
    text = re.sub("(?!<=\d)(\.)(?!\d)", '', text) # remove the character ".", except from floating numbers
    text = re.sub('[^A-Za-z0-9. ]+', '', text) # remove all punctuation, except A-Za-z0-9
    text = re.sub(' +', ' ', text) # remove extra space
    return text

def text_preprocess_ans(text):
    text = text.lower()
    text = decontractions(text) # replace contractions into natural form
    text = re.sub('[-,:]', ' ', text) # replace the character "-" "," with space
    text = re.sub("(?!<=\d)(\.)(?!\d)", '', text) # remove the character ".", except from floating numbers
    text = re.sub('[^A-Za-z0-9.! ]+', '', text) # remove all punctuation, except A-Za-z0-9
    text = re.sub(' +', ' ', text) # remove extra space
    return text

In [None]:
# Question and Answer text preprocessing
df_train["question_preprocessed"] = df_train["question"].map(lambda x: text_preprocess(x))
df_train["answer_preprocessed"] = df_train["multiple_choice_answer"].map(lambda x: text_preprocess_ans(x))

In [None]:
df_train

Unnamed: 0,image_id,question,multiple_choices,question_id,question_type,multiple_choice_answer,answers,answer_type,question_preprocessed,answer_preprocessed
0,11779,Who looks happier?,"[alive, 1, woman, purple, 2, yes, white, boy, ...",117792,who,man,"[{'answer': 'old person', 'answer_confidence':...",other,who looks happier,man
1,11779,Where is the woman sitting?,"[3, no, blue, red, 1, slide, monkey bars, jump...",117790,where is the,blanket,"[{'answer': 'on blanket', 'answer_confidence':...",other,where is the woman sitting,blanket
2,11779,Where is the man sitting?,"[away, yes, blue, 1, 2, mouse, couch, no, yell...",117791,where is the,bench,"[{'answer': 'on bench', 'answer_confidence': '...",other,where is the man sitting,bench
3,5536,Is this man hungry?,"[water, yellow, 4, running, blue, pouring, out...",55360,is this,yes,"[{'answer': 'yes', 'answer_confidence': 'yes',...",yes/no,is this man hungry,yes
4,5536,What kind of drink is that?,"[wine, girl would fall, soda, white, yes, coke...",55361,what kind of,soda,"[{'answer': 'water', 'answer_confidence': 'no'...",other,what kind of drink is that,soda
...,...,...,...,...,...,...,...,...,...,...
59995,11695,What color is the log the little girl sitting on?,"[tan, brown, white, 1, running, red, 4, pink, ...",116950,what color is the,brown,"[{'answer': 'brown', 'answer_confidence': 'yes...",other,what color is the log the little girl sitting on,brown
59996,11695,Does the boy want to go on the seesaw?,"[red, no table, fetch, kicking soccer ball, ye...",116951,does the,yes,"[{'answer': 'yes', 'answer_confidence': 'yes',...",yes/no,does the boy want to go on the seesaw,yes
59997,13790,What animal is between the two men?,"[dog, 4, yellow, 1, collectors, no, red, yes, ...",137900,what animal is,cat,"[{'answer': 'cat', 'answer_confidence': 'yes',...",other,what animal is between the two men,cat
59998,13790,What food is by the picnic basket?,"[4, she is happy, grapes, pizza, white, becaus...",137901,what,watermelon,"[{'answer': 'watermelon, sandwich, burger', 'a...",other,what food is by the picnic basket,watermelon


In [None]:
df_train = df_train.drop(['question', 'multiple_choice_answer'], axis=1)

cols = ['image_id','question_id','question_preprocessed','question_type','answer_preprocessed','answers','answer_type','multiple_choices']
df_train = df_train[cols]

print('Number of Question & Answer in MSCOCO Train Dataset:',len(df_train))
df_train.head(2)

Number of Question & Answer in MSCOCO Train Dataset: 60000


Unnamed: 0,image_id,question_id,question_preprocessed,question_type,answer_preprocessed,answers,answer_type,multiple_choices
0,11779,117792,who looks happier,who,man,"[{'answer': 'old person', 'answer_confidence':...",other,"[alive, 1, woman, purple, 2, yes, white, boy, ..."
1,11779,117790,where is the woman sitting,where is the,blanket,"[{'answer': 'on blanket', 'answer_confidence':...",other,"[3, no, blue, red, 1, slide, monkey bars, jump..."


In [None]:
new_df_train = pd.merge(df_train, train_captions, on="image_id", how="left")
new_df_train

Unnamed: 0,image_id,question_id,question_preprocessed,question_type,answer_preprocessed,answers,answer_type,multiple_choices,url,file_name,width,height
0,11779,117792,who looks happier,who,man,"[{'answer': 'old person', 'answer_confidence':...",other,"[alive, 1, woman, purple, 2, yes, white, boy, ...",http://visualqa.org/data/abstract_v002/scene_i...,abstract_v002_train2015_000000011779.png,700,400
1,11779,117790,where is the woman sitting,where is the,blanket,"[{'answer': 'on blanket', 'answer_confidence':...",other,"[3, no, blue, red, 1, slide, monkey bars, jump...",http://visualqa.org/data/abstract_v002/scene_i...,abstract_v002_train2015_000000011779.png,700,400
2,11779,117791,where is the man sitting,where is the,bench,"[{'answer': 'on bench', 'answer_confidence': '...",other,"[away, yes, blue, 1, 2, mouse, couch, no, yell...",http://visualqa.org/data/abstract_v002/scene_i...,abstract_v002_train2015_000000011779.png,700,400
3,5536,55360,is this man hungry,is this,yes,"[{'answer': 'yes', 'answer_confidence': 'yes',...",yes/no,"[water, yellow, 4, running, blue, pouring, out...",http://visualqa.org/data/abstract_v002/scene_i...,abstract_v002_train2015_000000005536.png,700,400
4,5536,55361,what kind of drink is that,what kind of,soda,"[{'answer': 'water', 'answer_confidence': 'no'...",other,"[wine, girl would fall, soda, white, yes, coke...",http://visualqa.org/data/abstract_v002/scene_i...,abstract_v002_train2015_000000005536.png,700,400
...,...,...,...,...,...,...,...,...,...,...,...,...
59995,11695,116950,what color is the log the little girl sitting on,what color is the,brown,"[{'answer': 'brown', 'answer_confidence': 'yes...",other,"[tan, brown, white, 1, running, red, 4, pink, ...",http://visualqa.org/data/abstract_v002/scene_i...,abstract_v002_train2015_000000011695.png,700,400
59996,11695,116951,does the boy want to go on the seesaw,does the,yes,"[{'answer': 'yes', 'answer_confidence': 'yes',...",yes/no,"[red, no table, fetch, kicking soccer ball, ye...",http://visualqa.org/data/abstract_v002/scene_i...,abstract_v002_train2015_000000011695.png,700,400
59997,13790,137900,what animal is between the two men,what animal is,cat,"[{'answer': 'cat', 'answer_confidence': 'yes',...",other,"[dog, 4, yellow, 1, collectors, no, red, yes, ...",http://visualqa.org/data/abstract_v002/scene_i...,abstract_v002_train2015_000000013790.png,700,400
59998,13790,137901,what food is by the picnic basket,what,watermelon,"[{'answer': 'watermelon, sandwich, burger', 'a...",other,"[4, she is happy, grapes, pizza, white, becaus...",http://visualqa.org/data/abstract_v002/scene_i...,abstract_v002_train2015_000000013790.png,700,400


In [None]:


cols = ['file_name','question_id','question_preprocessed','question_type','answer_preprocessed','answers','answer_type','multiple_choices']
new_df_train = new_df_train[cols]

print('Number of Question & Answer in MSCOCO Train Dataset:',len(df_train))
new_df_train.head(2)

Number of Question & Answer in MSCOCO Train Dataset: 60000


Unnamed: 0,file_name,question_id,question_preprocessed,question_type,answer_preprocessed,answers,answer_type,multiple_choices
0,abstract_v002_train2015_000000011779.png,117792,who looks happier,who,man,"[{'answer': 'old person', 'answer_confidence':...",other,"[alive, 1, woman, purple, 2, yes, white, boy, ..."
1,abstract_v002_train2015_000000011779.png,117790,where is the woman sitting,where is the,blanket,"[{'answer': 'on blanket', 'answer_confidence':...",other,"[3, no, blue, red, 1, slide, monkey bars, jump..."


In [None]:

new_df_train.rename(columns={"file_name": "image_id"}, inplace=True)
new_df_train

Unnamed: 0,image_id,question_id,question_preprocessed,question_type,answer_preprocessed,answers,answer_type,multiple_choices
0,abstract_v002_train2015_000000011779.png,117792,who looks happier,who,man,"[{'answer': 'old person', 'answer_confidence':...",other,"[alive, 1, woman, purple, 2, yes, white, boy, ..."
1,abstract_v002_train2015_000000011779.png,117790,where is the woman sitting,where is the,blanket,"[{'answer': 'on blanket', 'answer_confidence':...",other,"[3, no, blue, red, 1, slide, monkey bars, jump..."
2,abstract_v002_train2015_000000011779.png,117791,where is the man sitting,where is the,bench,"[{'answer': 'on bench', 'answer_confidence': '...",other,"[away, yes, blue, 1, 2, mouse, couch, no, yell..."
3,abstract_v002_train2015_000000005536.png,55360,is this man hungry,is this,yes,"[{'answer': 'yes', 'answer_confidence': 'yes',...",yes/no,"[water, yellow, 4, running, blue, pouring, out..."
4,abstract_v002_train2015_000000005536.png,55361,what kind of drink is that,what kind of,soda,"[{'answer': 'water', 'answer_confidence': 'no'...",other,"[wine, girl would fall, soda, white, yes, coke..."
...,...,...,...,...,...,...,...,...
59995,abstract_v002_train2015_000000011695.png,116950,what color is the log the little girl sitting on,what color is the,brown,"[{'answer': 'brown', 'answer_confidence': 'yes...",other,"[tan, brown, white, 1, running, red, 4, pink, ..."
59996,abstract_v002_train2015_000000011695.png,116951,does the boy want to go on the seesaw,does the,yes,"[{'answer': 'yes', 'answer_confidence': 'yes',...",yes/no,"[red, no table, fetch, kicking soccer ball, ye..."
59997,abstract_v002_train2015_000000013790.png,137900,what animal is between the two men,what animal is,cat,"[{'answer': 'cat', 'answer_confidence': 'yes',...",other,"[dog, 4, yellow, 1, collectors, no, red, yes, ..."
59998,abstract_v002_train2015_000000013790.png,137901,what food is by the picnic basket,what,watermelon,"[{'answer': 'watermelon, sandwich, burger', 'a...",other,"[4, she is happy, grapes, pizza, white, becaus..."


In [None]:
preprocessed_data_df = new_df_train.copy()

In [None]:
# create dataframe of unique answers and its counts in decending order of answer_count
answer_preprocessed = list(preprocessed_data_df['answer_preprocessed'])
count = {}
for i in answer_preprocessed:
    count[i] = count.get(i, 0) + 1

answer_df = pd.DataFrame(list(count.items()),columns=["answer","answer_count"])
answer_df["answer%"] = answer_df["answer_count"]/len(answer_preprocessed)*100
answer_df = answer_df.sort_values(by='answer_count',ascending=False)
answer_df.head(5)

Unnamed: 0,answer,answer_count,answer%
3,yes,14314,23.856667
7,no,10143,16.905
15,2,3496,5.826667
16,1,1934,3.223333
32,red,1506,2.51


In [None]:
new_df_train

Unnamed: 0,image_id,question_id,question_preprocessed,question_type,answer_preprocessed,answers,answer_type,multiple_choices
0,abstract_v002_train2015_000000011779.png,117792,who looks happier,who,man,"[{'answer': 'old person', 'answer_confidence':...",other,"[alive, 1, woman, purple, 2, yes, white, boy, ..."
1,abstract_v002_train2015_000000011779.png,117790,where is the woman sitting,where is the,blanket,"[{'answer': 'on blanket', 'answer_confidence':...",other,"[3, no, blue, red, 1, slide, monkey bars, jump..."
2,abstract_v002_train2015_000000011779.png,117791,where is the man sitting,where is the,bench,"[{'answer': 'on bench', 'answer_confidence': '...",other,"[away, yes, blue, 1, 2, mouse, couch, no, yell..."
3,abstract_v002_train2015_000000005536.png,55360,is this man hungry,is this,yes,"[{'answer': 'yes', 'answer_confidence': 'yes',...",yes/no,"[water, yellow, 4, running, blue, pouring, out..."
4,abstract_v002_train2015_000000005536.png,55361,what kind of drink is that,what kind of,soda,"[{'answer': 'water', 'answer_confidence': 'no'...",other,"[wine, girl would fall, soda, white, yes, coke..."
...,...,...,...,...,...,...,...,...
59995,abstract_v002_train2015_000000011695.png,116950,what color is the log the little girl sitting on,what color is the,brown,"[{'answer': 'brown', 'answer_confidence': 'yes...",other,"[tan, brown, white, 1, running, red, 4, pink, ..."
59996,abstract_v002_train2015_000000011695.png,116951,does the boy want to go on the seesaw,does the,yes,"[{'answer': 'yes', 'answer_confidence': 'yes',...",yes/no,"[red, no table, fetch, kicking soccer ball, ye..."
59997,abstract_v002_train2015_000000013790.png,137900,what animal is between the two men,what animal is,cat,"[{'answer': 'cat', 'answer_confidence': 'yes',...",other,"[dog, 4, yellow, 1, collectors, no, red, yes, ..."
59998,abstract_v002_train2015_000000013790.png,137901,what food is by the picnic basket,what,watermelon,"[{'answer': 'watermelon, sandwich, burger', 'a...",other,"[4, she is happy, grapes, pizza, white, becaus..."


In [None]:
top_300_answers = list(answer_df['answer'])[:300]

new_data_df_300 = pd.DataFrame()
for i in (range(len(top_300_answers))):
  new_data_df_300 = pd.concat([new_data_df_300, preprocessed_data_df[preprocessed_data_df.answer_preprocessed == top_300_answers[i]]])

print(f"Top 300 answers coverd {round(len(new_data_df_300)/len(preprocessed_data_df)*100,2)}% of datapoints")


Top 300 answers coverd 93.0% of datapoints


In [None]:
new_data_df_300

Unnamed: 0,image_id,question_id,question_preprocessed,question_type,answer_preprocessed,answers,answer_type,multiple_choices
3,abstract_v002_train2015_000000005536.png,55360,is this man hungry,is this,yes,"[{'answer': 'yes', 'answer_confidence': 'yes',...",yes/no,"[water, yellow, 4, running, blue, pouring, out..."
14,abstract_v002_train2015_000000006502.png,65022,is the man eating a hotdog,is the man,yes,"[{'answer': 'yes', 'answer_confidence': 'yes',...",yes/no,"[2, 3, 1, ended, white, yellow, yo-yo, maybe, ..."
20,abstract_v002_train2015_000000008788.png,87882,is the mouse under the chair,is the,yes,"[{'answer': 'yes', 'answer_confidence': 'yes',...",yes/no,"[chairs, boy chasing girl, 2, casually, almost..."
26,abstract_v002_train2015_000000001527.png,15271,is the man falling,is the man,yes,"[{'answer': 'yes', 'answer_confidence': 'maybe...",yes/no,"[wheat, being held, 3, fell, white, yes, sunfl..."
27,abstract_v002_train2015_000000002592.png,25920,is little baby trying to walk,is,yes,"[{'answer': 'yes', 'answer_confidence': 'yes',...",yes/no,"[white, blue, mo, they're on upper shelves, es..."
...,...,...,...,...,...,...,...,...
26820,abstract_v002_train2015_000000013104.png,131040,what is the young man touching,what is the,game console,"[{'answer': 'game console', 'answer_confidence...",other,"[1, yes, to pick him up, yellow, jump on table..."
31225,abstract_v002_train2015_000000013156.png,131561,what is next to the tv,what is,game console,"[{'answer': 'dvd player', 'answer_confidence':...",other,"[she fell, table, 4, 2, game console, cement, ..."
54247,abstract_v002_train2015_000000008881.png,88811,what can be used for electronic entertainment,what,game console,"[{'answer': 'game consul', 'answer_confidence'...",other,"[basket, lots, white, one sitting, no, yellow,..."
54387,abstract_v002_train2015_000000017305.png,173050,what is the woman on the right holding in her ...,what is the woman,game console,"[{'answer': 'game console', 'answer_confidence...",other,"[3, under man's arm, from bottle, yellow, 1, w..."


In [None]:
labelencoder = preprocessing.LabelEncoder()
labelencoder.fit(top_300_answers)
new_data_df_300['class_label'] = labelencoder.transform(list(new_data_df_300['answer_preprocessed']))
print("Number of Class Labels:",len(labelencoder.classes_))

new_data_df_300 = new_data_df_300.drop(['question_id', 'question_type', 'answer_type'], axis=1)
print("Number of datapoints of final dataset:",len(new_data_df_300))
new_data_df_300.to_csv("/content/drive/MyDrive/VQA assignment/preprocessed_k300.csv",index=False)
new_data_df_300.head(3)

Number of Class Labels: 300
Number of datapoints of final dataset: 55799


Unnamed: 0,image_id,question_preprocessed,answer_preprocessed,answers,multiple_choices,class_label
3,abstract_v002_train2015_000000005536.png,is this man hungry,yes,"[{'answer': 'yes', 'answer_confidence': 'yes',...","[water, yellow, 4, running, blue, pouring, out...",299
14,abstract_v002_train2015_000000006502.png,is the man eating a hotdog,yes,"[{'answer': 'yes', 'answer_confidence': 'yes',...","[2, 3, 1, ended, white, yellow, yo-yo, maybe, ...",299
20,abstract_v002_train2015_000000008788.png,is the mouse under the chair,yes,"[{'answer': 'yes', 'answer_confidence': 'yes',...","[chairs, boy chasing girl, 2, casually, almost...",299


In [None]:

class_labels = labelencoder.classes_
print("Class labels:", class_labels)


for index, label in enumerate(class_labels):
    print(f"Class label {index}: {label}")


unique_class_labels = new_data_df_300['class_label'].unique()
print("Unique class labels in the dataset:", unique_class_labels)


Class labels: ['0' '1' '10' '11' '12' '16' '18' '2' '3' '4' '5' '6' '7' '8' '9' 'abcd'
 'afternoon' 'apple' 'apples' 'baby' 'ball' 'baseball' 'basket' 'bat'
 'bear' 'bed' 'beehive' 'bees' 'begging' 'behind cloud' 'behind tree'
 'beige' 'bench' 'bicycle' 'bike' 'bird' 'birds' 'black' 'black and white'
 'blanket' 'blocks' 'blonde' 'blue' 'bone' 'book' 'books' 'bookshelf'
 'bottle' 'boy' 'bread' 'brown' 'bucket' 'burger' 'bush' 'bushes'
 'butterflies' 'butterfly' 'by pond' 'by tree' 'cake' 'camera' 'car' 'cat'
 'cats' 'chair' 'checkered' 'cheese' 'chihuahua' 'closed' 'cloud' 'clouds'
 'cloudy' 'coat rack' 'cold' 'cooking' 'corn' 'couch' 'cup' 'curtains'
 'daisies' 'dancing' 'day' 'daytime' 'deer' 'dessert' 'dog' 'dogs' 'doll'
 'dollhouse' 'door' 'drinking' 'duck' 'ducks' 'eagle' 'eating' 'eggs'
 'evening' 'fall' 'female' 'fire' 'fireplace' 'fish' 'floor' 'floral'
 'flower' 'flowers' 'flying' 'food' 'football' 'friends' 'frisbee' 'frog'
 'game console' 'girl' 'glass' 'gold' 'gone' 'grass' 

In [None]:
X = new_data_df_300[['image_id','question_preprocessed','answer_preprocessed']]
y = new_data_df_300['class_label']
print('X.shape:',X.shape)
print('y.shape:',y.shape)

new_data_df_300.groupby(by='class_label').count()

X.shape: (55799, 3)
y.shape: (55799,)


Unnamed: 0_level_0,image_id,question_preprocessed,answer_preprocessed,answers,multiple_choices
class_label,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
0,467,467,467,467,467
1,1934,1934,1934,1934,1934
2,38,38,38,38,38
3,32,32,32,32,32
4,17,17,17,17,17
...,...,...,...,...,...
295,38,38,38,38,38
296,123,123,123,123,123
297,603,603,603,603,603
298,12,12,12,12,12


In [None]:
X

Unnamed: 0,image_id,question_preprocessed,answer_preprocessed
3,abstract_v002_train2015_000000005536.png,is this man hungry,yes
14,abstract_v002_train2015_000000006502.png,is the man eating a hotdog,yes
20,abstract_v002_train2015_000000008788.png,is the mouse under the chair,yes
26,abstract_v002_train2015_000000001527.png,is the man falling,yes
27,abstract_v002_train2015_000000002592.png,is little baby trying to walk,yes
...,...,...,...
26820,abstract_v002_train2015_000000013104.png,what is the young man touching,game console
31225,abstract_v002_train2015_000000013156.png,what is next to the tv,game console
54247,abstract_v002_train2015_000000008881.png,what can be used for electronic entertainment,game console
54387,abstract_v002_train2015_000000017305.png,what is the woman on the right holding in her ...,game console


In [None]:
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.10, stratify=y, random_state=42)
X_train, X_test, y_train, y_test = train_test_split(X_train, y_train, test_size=0.10, stratify=y_train, random_state=42)

In [None]:
print(X_train.shape, y_train.shape)
print(X_val.shape, y_val.shape)
print(X_test.shape, y_test.shape)

# Convert a class vector y_train and y_test to binary class matrix
Y_train = to_categorical(y_train, 1000)
Y_val = to_categorical(y_val, 1000)
Y_test = to_categorical(y_test, 1000)

(45197, 3) (45197,)
(5580, 3) (5580,)
(5022, 3) (5022,)


In [None]:
X_train

Unnamed: 0,image_id,question_preprocessed,answer_preprocessed
28698,abstract_v002_train2015_000000016721.png,what color is the bird,yellow
8522,abstract_v002_train2015_000000012888.png,what color is the cat,white
8017,abstract_v002_train2015_000000014923.png,is the girl jumping,no
43876,abstract_v002_train2015_000000014320.png,what game the people are playing,frisbee
30907,abstract_v002_train2015_000000014085.png,what is the yellow object on the tree,beehive
...,...,...,...
35378,abstract_v002_train2015_000000007355.png,how many people are in this picture,1
56392,abstract_v002_train2015_000000010309.png,what kind of wine is the man drinking,red
32087,abstract_v002_train2015_000000013969.png,is the woman preparing to catch the cat,no
37397,abstract_v002_train2015_000000017516.png,does everyone have a red cup,yes


In [None]:

aug1 = iaa.Fliplr(0.5)
aug2 = iaa.AddToBrightness((-30,-20))
aug3 = iaa.LinearContrast((0.6, 0.75))

In [None]:
class CustomDataGen_aug(tf.keras.utils.Sequence):

    def __init__(self, X_que, X_img, y,
                 batch_size,
                 shuffle=True):

        self.X_que = X_que
        self.X_img = X_img
        self.y = y
        self.batch_size = batch_size
        self.shuffle = shuffle
        self.indexes = np.arange(len(y))

    def on_epoch_end(self):
        if self.shuffle:
            self.indexes = np.random.permutation(self.indexes)

    def __get_input1(self, que):

        que_arr = (pad_sequences(t.texts_to_sequences([que]), maxlen=22, padding='post'))[0]
        return que_arr

    def __get_input2(self, path):

        img = cv2.imread(path)

        a = np.random.uniform()
        if a<0.25:
            img = aug1.augment_image(img)
        elif a<0.5:
            img = aug2.augment_image(img)
        elif a<0.75:
            img = aug3.augment_image(img)
        else:
            img = img

        img = np.array(img)/255.0

        return img

    def __get_output(self, label):
        return tf.keras.utils.to_categorical(label, num_classes=300)

    def __getitem__(self, index):

        batch_x0 = self.X_que[index * self.batch_size:(index + 1) * self.batch_size]
        batch_x1 = self.X_img[index * self.batch_size:(index + 1) * self.batch_size]
        batch_y = self.y[index * self.batch_size:(index + 1) * self.batch_size]

        X0_batch = np.asarray([self.__get_input1(que) for que in batch_x0])
        X1_batch = np.asarray([self.__get_input2(colab_path+path) for path in batch_x1])
        y_batch = np.asarray([self.__get_output(c) for c in batch_y])

        return tuple([X0_batch, X1_batch]), y_batch

    def __len__(self):
        return len(self.indexes) // self.batch_size


class CustomDataGen(tf.keras.utils.Sequence):

    def __init__(self, X_que, X_img, y,
                 batch_size,
                 shuffle=True):

        self.X_que = X_que
        self.X_img = X_img
        self.y = y
        self.batch_size = batch_size
        self.shuffle = shuffle
        self.indexes = np.arange(len(y))

    def on_epoch_end(self):
        if self.shuffle:
            self.indexes = np.random.permutation(self.indexes)

    def __get_input1(self, que):

        que_arr = (pad_sequences(t.texts_to_sequences([que]), maxlen=22, padding='post'))[0]
        return que_arr

    def __get_input2(self, path):

        img = cv2.imread(path)
        img = np.array(img)/255.0

        return img

    def __get_output(self, label):
        return tf.keras.utils.to_categorical(label, num_classes=300)

    def __getitem__(self, index):

        batch_x0 = self.X_que[index * self.batch_size:(index + 1) * self.batch_size]
        batch_x1 = self.X_img[index * self.batch_size:(index + 1) * self.batch_size]
        batch_y = self.y[index * self.batch_size:(index + 1) * self.batch_size]

        X0_batch = np.asarray([self.__get_input1(que) for que in batch_x0])
        X1_batch = np.asarray([self.__get_input2(colab_path+path) for path in batch_x1])
        y_batch = np.asarray([self.__get_output(c) for c in batch_y])

        return tuple([X0_batch, X1_batch]), y_batch

    def __len__(self):
        return len(self.indexes) // self.batch_size

colab_path = "/content/drive/MyDrive/VQA assignment/scene_img_abstract_v002_train2015"
batch_siz = 128
traingen = CustomDataGen_aug(list(X_train['question_preprocessed']),list(X_train['image_id']),list(y_train),batch_size=batch_siz)
valgen = CustomDataGen(list(X_val['question_preprocessed']),list(X_val['image_id']),list(y_val),batch_size=batch_siz)

In [None]:
# Train Validation & Test Text vectorization
t = Tokenizer(filters='')
t.fit_on_texts(list(X_train['question_preprocessed']))
vocab_size = len(t.word_index) + 1



train_sequences = t.texts_to_sequences(list(X_train['question_preprocessed']))
train_padded_docs = pad_sequences(train_sequences, maxlen=22, padding='post')

val_sequences = t.texts_to_sequences(list(X_val['question_preprocessed']))
val_padded_docs = pad_sequences(val_sequences, maxlen=22, padding='post')

test_sequences = t.texts_to_sequences(list(X_test['question_preprocessed']))
test_padded_docs = pad_sequences(test_sequences, maxlen=22, padding='post')

In [None]:
import numpy as np


glove_file = '/content/drive/MyDrive/VQA assignment/glove.6B.300d.txt'
embedding_index = {}


with open(glove_file, encoding="utf-8") as f:
    for line in f:
        values = line.split()
        word = values[0]
        coefs = np.asarray(values[1:], dtype='float32')
        embedding_index[word] = coefs

print('total loaded %s word vectors.' % len(embedding_index))


total loaded 400000 word vectors.


In [None]:
def embedding_for_vocab(filepath, word_index,embedding_dim):
    vocab_size = len(word_index) + 1

    # Adding again 1 because of reserved 0 index
    embedding_matrix_vocab = np.zeros((vocab_size,
                                       embedding_dim))

    with open(filepath, encoding="utf8") as f:
        for line in f:
            word, *vector = line.split()
            if word in word_index:
                idx = word_index[word]
                embedding_matrix_vocab[idx] = np.array(
                    vector, dtype=np.float32)[:embedding_dim]

    return embedding_matrix_vocab


# matrix for vocab: word_index
embedding_dim = 300
embedding_matrix_vocab = embedding_for_vocab(
    '/content/drive/MyDrive/VQA assignment/glove.6B.300d.txt', t.word_index,
  embedding_dim)

print("Dense vector for first word is => ",
      embedding_matrix_vocab[1])

Dense vector for first word is =>  [ 4.65600006e-02  2.13180006e-01 -7.43639981e-03 -4.58539993e-01
 -3.56389992e-02  2.36430004e-01 -2.88360000e-01  2.15210006e-01
 -1.34859994e-01 -1.64129996e+00 -2.60910004e-01  3.24340016e-02
  5.66210002e-02 -4.32960019e-02 -2.16719992e-02  2.24759996e-01
 -7.51290023e-02 -6.70180023e-02 -1.42470002e-01  3.88250016e-02
 -1.89510003e-01  2.99769998e-01  3.93049985e-01  1.78870007e-01
 -1.73429996e-01 -2.11779997e-01  2.36169994e-01 -6.36809990e-02
 -4.23180014e-01 -1.16609998e-01  9.37540010e-02  1.72959998e-01
 -3.30729991e-01  4.91120011e-01 -6.89949989e-01 -9.24620032e-02
  2.47419998e-01 -1.79910004e-01  9.79079977e-02  8.31179991e-02
  1.52989998e-01 -2.72760004e-01 -3.89339998e-02  5.44529974e-01
  5.37370026e-01  2.91049987e-01 -7.35139987e-03  4.78800014e-02
 -4.07599986e-01 -2.67590005e-02  1.79189995e-01  1.09770000e-02
 -1.09630004e-01 -2.63949990e-01  7.39900023e-02  2.62360007e-01
 -1.50800005e-01  3.46230000e-01  2.57580012e-01  1.197

In [None]:

print(X_train['image_id'])


28698    abstract_v002_train2015_000000016721.png
8522     abstract_v002_train2015_000000012888.png
8017     abstract_v002_train2015_000000014923.png
43876    abstract_v002_train2015_000000014320.png
30907    abstract_v002_train2015_000000014085.png
                           ...                   
35378    abstract_v002_train2015_000000007355.png
56392    abstract_v002_train2015_000000010309.png
32087    abstract_v002_train2015_000000013969.png
37397    abstract_v002_train2015_000000017516.png
6377     abstract_v002_train2015_000000010479.png
Name: image_id, Length: 45197, dtype: object


In [None]:
from google.colab import drive
drive.mount('/content/drive')
data_path = '/content/drive/MyDrive/VQA Notebooks/'

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
def Dataset(colab_path, que, image_id, y, shape):

  img = cv2.imread(os.path.join(colab_path,image_id))
  img = cv2.resize(img, (224, 224), interpolation=cv2.INTER_NEAREST)
  img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
  image_vector = (img/255.0).astype(np.float32)

  que_vector = (pad_sequences(t.texts_to_sequences([que]), maxlen=22, padding='post'))[0]
  que_vector = np.asarray(que_vector).astype(np.float32)

  y = tf.one_hot(tf.cast(int(y), tf.uint8), 300)
  y = np.asarray(y).astype(np.float32)

  return que_vector,image_vector,y



In [None]:
def Dataset(colab_path, que, image_id, y, shape):

  img = cv2.imread(os.path.join(colab_path, image_id))
  if img is None:
      print(f"Error: Image {image_id} not found in path {colab_path}")
      return None, None, None

  img = cv2.resize(img, (shape, shape), interpolation=cv2.INTER_NEAREST)


  img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

  image_vector = (img / 255.0).astype(np.float32)

  que_vector = (pad_sequences(t.texts_to_sequences([que]), maxlen=22, padding='post'))[0]
  que_vector = np.asarray(que_vector).astype(np.float32)

  y = tf.one_hot(tf.cast(int(y), tf.uint8), 300)
  y = np.asarray(y).astype(np.float32)

  return que_vector, image_vector, y
test_image = []
test_que = []
Y_test = []

for i in tqdm(range(len(X_test))):
    que, image, y = Dataset(colab_path, list(X_test['question_preprocessed'])[i], list(X_test['image_id'])[i], list(y_test)[i], 224)
    if image is not None:
        test_image.append(image)
        test_que.append(que)
        Y_test.append(y)

test_image = np.asarray(test_image).astype(np.float32)
test_que = np.asarray(test_que).astype(np.float32)
Y_test = np.asarray(Y_test).astype(np.float32)

print(f"test_image shape: {test_image.shape}")


  9%|▉         | 462/5022 [00:07<01:18, 57.87it/s]


KeyboardInterrupt: 

In [None]:
save_path = "/content/drive/MyDrive/VQA assignment/vqa_test_data_new.npz"

# Save the arrays in a single .npz file
np.savez(save_path, test_image=test_image, test_que=test_que, Y_test=Y_test)

print("Data saved successfully at", save_path)

Data saved successfully at /content/drive/MyDrive/VQA assignment/vqa_test_data_new.npz


In [None]:
import numpy as np

load_path = "/content/drive/MyDrive/VQA assignment/vqa_test_data_new.npz"


data = np.load(load_path)


test_image = data['test_image']
test_que = data['test_que']
Y_test = data['Y_test']


In [None]:
Y_test

In [None]:
test_que

In [None]:
test_image

In [None]:
test_image.shape

In [None]:
# importing necessary libraries
import tensorflow as tf
from tensorflow.keras.layers import Input, Dense, Dropout, Embedding, LSTM, Multiply
from tensorflow.keras.models import Model

# Load pre-trained VGG19 model
pre_trained_model = tf.keras.applications.VGG19(input_shape=(224, 224, 3), include_top=True, weights="imagenet", pooling='avg')
for layer in pre_trained_model.layers:
    layer.trainable = False

# Add regularizer to layers if applicable
regularizer = tf.keras.regularizers.l2(0.01)
for layer in pre_trained_model.layers:
    for attr in ['kernel_regularizer']:
        if hasattr(layer, attr):
            setattr(layer, attr, regularizer)

# Image feature extraction branch
vgg19_fc2_output = (pre_trained_model.get_layer('fc2')).output
img = Dense(units=1024, activation='relu', kernel_initializer='he_normal')(vgg19_fc2_output)
img = Dropout(0.2)(img)

# Question feature extraction branch
input_layer_que = Input(shape=(22,))
embedding = Embedding(vocab_size, 300, weights=[embedding_matrix_vocab], input_length=22, trainable=False)(input_layer_que)
lstm1 = LSTM(64, return_sequences=True)(embedding)
dropout1 = Dropout(0.5)(lstm1)
lstm2 = LSTM(64)(dropout1)
dropout2 = Dropout(0.5)(lstm2)
que = Dense(units=1024, activation='relu', kernel_initializer='he_normal')(dropout2)

# Element-wise multiplication of image and question features
pointwise_mul = Multiply()([que, img])

# Output layer
output = Dense(units=300, activation='softmax', kernel_initializer="glorot_uniform")(pointwise_mul)

# Define the complete model
model_lstm_vgg19 = Model(inputs=[input_layer_que, pre_trained_model.input], outputs=output)

# Compile the model
optimizer = tf.keras.optimizers.Adam(learning_rate=0.0001)
model_lstm_vgg19.compile(optimizer=optimizer, loss='categorical_crossentropy', metrics=['accuracy'])

# Print model summary
model_lstm_vgg19.summary()

In [None]:
# Save the model to the specified path
save_path = '/content/drive/MyDrive/VQA assignment/my_vgg_model.h5'
model_lstm_vgg19.save(save_path)

print(f'Model saved at: {save_path}')


In [None]:
from tensorflow.keras.models import load_model

model_path = '/content/drive/MyDrive/VQA assignment/my_vgg_model.h5'

model_lstm_vgg19 = load_model(model_path)

print(f'Model loaded from: {model_path}')


In [None]:
model_lstm_vgg19 = model_lstm_vgg19.fit(
    traingen,
    batch_size=128,
    epochs=4,
    verbose=1,
    validation_data=valgen
)


TypeError: %d format: a real number is required, not str