In [None]:
#Install if not available 
#If installing on colab, restart runtime 
#before running the rest of the notebook
!python -m spacy download en_core_web_md
!pip install keras-self-attention

### Importing libraries

In [None]:
import numpy as np
import pandas as pd
import warnings
warnings.filterwarnings("ignore")

import spacy
import scipy.io
import gc
from sklearn.preprocessing import LabelEncoder
import pickle

import keras
from keras.models import Sequential
from keras.layers.core import Dense, Dropout, Activation, Reshape
from keras import Input
from keras.layers.recurrent import LSTM
from keras.layers import concatenate
from keras.layers.merge import Concatenate
from keras.models import model_from_json, Model
from keras.utils import plot_model
from collections import defaultdict
import operator
from keras.utils import np_utils, generic_utils
from progressbar import Bar, ETA, Percentage, ProgressBar
from itertools import zip_longest
from keras.models import load_model
import tensorflow as tf
from keras_self_attention import SeqSelfAttention

### Reading Preprocessed Files


In [None]:
questions = open('/content/drive/Processed Text Files/Train Files/ques.txt', 'rb').read().decode('utf-8').splitlines()
questions_len = open('/content/drive/Processed Text Files/Train Files/ques_len.txt', 'rb').read().decode('utf-8').splitlines()
answers = open('/content/drive/Processed Text Files/Train Files/answer.txt','rb').read().decode('utf-8').splitlines()
image_id = open('/content/drive/Processed Text Files/Train Files/images_id.txt','rb').read().decode('utf-8').splitlines()

In [None]:
print(questions[0])
print(answers[0])
print(len(image_id))

What is the overall condition of the given image?
flooded
4511


In [None]:
nlp = spacy.load("en_core_web_md")

In [None]:
features1 = open("/content/drive/Image Feature Files/VGG/Train Features.txt","rb")
features2 = open("/content/drive/Image Feature Files/VGG/Test Features.txt","rb")
features_train = pickle.load(features1)
features_test = pickle.load(features2)

In [None]:
print(features_train.shape)
print(features_test.shape)

(4096, 1448)
(4096, 450)


In [None]:
questions_len, questions, answers, image_id = (list(t) for t in zip(*sorted(zip(questions_len, questions, answers, image_id))))
print (len(questions), len(answers),len(image_id))

4511 4511 4511


In [None]:
le = LabelEncoder()
le.fit(answers)
pickle.dump(le, open('/content/drive/label_encoder_lstm.pkl','wb'))

### Defining Model Parameters

In [None]:
batch_size               =      512
img_dim                  =     4096
word2vec_dim             =      300
num_hidden_nodes_mlp     =     1024
num_hidden_nodes_lstm    =      512
num_layers_lstm          =        3
dropout                  =       0.5
activation_mlp           =     'relu'
num_epochs = 10

In [None]:
img_ids = open('/content/drive/Processed Text Files/Train Files/images_id.txt','rb').read().decode('utf-8').splitlines()

In [None]:
id_map = dict()
i = 0
for ids in np.unique(img_ids):
  id_map[ids] = i
  i+=1
print(id_map)

{'10165': 0, '10166': 1, '10168': 2, '10170': 3, '10171': 4, '10172': 5, '10175': 6, '10176': 7, '10179': 8, '10180': 9, '10181': 10, '10182': 11, '10184': 12, '10300': 13, '10566': 14, '10687': 15, '10806': 16, '10810': 17, '10811': 18, '10817': 19, '10818': 20, '10819': 21, '10820': 22, '10821': 23, '10825': 24, '10826': 25, '10827': 26, '10828': 27, '10834': 28, '10835': 29, '10836': 30, '10837': 31, '10840': 32, '10841': 33, '11723': 34, '6279': 35, '6287': 36, '6332': 37, '6334': 38, '6335': 39, '6338': 40, '6340': 41, '6341': 42, '6344': 43, '6346': 44, '6347': 45, '6348': 46, '6350': 47, '6351': 48, '6352': 49, '6354': 50, '6358': 51, '6359': 52, '6360': 53, '6361': 54, '6363': 55, '6364': 56, '6365': 57, '6367': 58, '6368': 59, '6369': 60, '6370': 61, '6372': 62, '6373': 63, '6374': 64, '6375': 65, '6376': 66, '6379': 67, '6381': 68, '6384': 69, '6385': 70, '6386': 71, '6387': 72, '6388': 73, '6393': 74, '6395': 75, '6397': 76, '6398': 77, '6399': 78, '6402': 79, '6403': 80, '6

### Defining network architecture

In [None]:
image_model = Sequential()
image_model.add(Reshape(input_shape = (4096,), target_shape=(4096,)))
model1 = Model(inputs = image_model.input, outputs = image_model.output)
model1.summary()

Model: "model"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
reshape_input (InputLayer)   [(None, 4096)]            0         
_________________________________________________________________
reshape (Reshape)            (None, 4096)              0         
Total params: 0
Trainable params: 0
Non-trainable params: 0
_________________________________________________________________


In [None]:
language_model = keras.Sequential()
language_model.add(LSTM(units=num_hidden_nodes_lstm, 
                        return_sequences=True, input_shape=(None, word2vec_dim)))
language_model.add(tf.keras.layers.LayerNormalization())
language_model.add(SeqSelfAttention(attention_activation='sigmoid'))
language_model.add(tf.keras.layers.LayerNormalization())
language_model.add(LSTM(units=num_hidden_nodes_lstm, return_sequences=True))
language_model.add(tf.keras.layers.LayerNormalization())
language_model.add(SeqSelfAttention(attention_activation='sigmoid'))
language_model.add(tf.keras.layers.LayerNormalization())
language_model.add(LSTM(units=num_hidden_nodes_lstm, return_sequences=False))
model2 = tf.keras.Model(language_model.input, language_model.output)
model2.summary()

Model: "model_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm_input (InputLayer)      [(None, None, 300)]       0         
_________________________________________________________________
lstm (LSTM)                  (None, None, 512)         1665024   
_________________________________________________________________
layer_normalization (LayerNo (None, None, 512)         1024      
_________________________________________________________________
seq_self_attention (SeqSelfA (None, None, 512)         32833     
_________________________________________________________________
layer_normalization_1 (Layer (None, None, 512)         1024      
_________________________________________________________________
lstm_1 (LSTM)                (None, None, 512)         2099200   
_________________________________________________________________
layer_normalization_2 (Layer (None, None, 512)         1024

In [None]:
combined = concatenate([image_model.output, language_model.output])

In [None]:
model = Dense(2304, activation = 'relu')(combined)
model = tf.keras.layers.LayerNormalization() (model)
#model = Activation('tanh')(model)
model = tf.keras.layers.Dropout(0.8)(model)

model = Dense(1152, activation = 'relu')(model)
model = tf.keras.layers.LayerNormalization() (model)
#model = Activation('tanh')(model)
model = tf.keras.layers.Dropout(0.8)(model)

model = Dense(576, activation = 'relu')(model)
model = tf.keras.layers.LayerNormalization() (model)
#model = Activation('tanh')(model)
model = tf.keras.layers.Dropout(0.8)(model)

model = Dense(288, activation = 'relu')(model)
model = tf.keras.layers.LayerNormalization() (model)
#model = Activation('tanh')(model)
model = tf.keras.layers.Dropout(0.8)(model)

model = Dense(144, activation = 'relu')(model)
model = tf.keras.layers.LayerNormalization() (model)
#model = Activation('tanh')(model)
model = tf.keras.layers.Dropout(0.8)(model)

model = Dense(72, activation = 'relu')(model)
model = tf.keras.layers.LayerNormalization() (model)
#model = Activation('tanh')(model)
model = tf.keras.layers.Dropout(0.8)(model)

model = Dense(41)(model)
model = Activation("softmax")(model)

model = tf.keras.Model(inputs=[image_model.input, language_model.input], outputs=model)

In [None]:
model.compile(loss='categorical_crossentropy', optimizer='Adam')
model.summary()

Model: "model_3"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
lstm_input (InputLayer)         [(None, None, 300)]  0                                            
__________________________________________________________________________________________________
lstm (LSTM)                     (None, None, 512)    1665024     lstm_input[0][0]                 
__________________________________________________________________________________________________
layer_normalization (LayerNorma (None, None, 512)    1024        lstm[0][0]                       
__________________________________________________________________________________________________
seq_self_attention (SeqSelfAtte (None, None, 512)    32833       layer_normalization[0][0]        
____________________________________________________________________________________________

In [None]:
import numpy as np

def get_questions_tensor_timeseries(questions, nlp, timesteps):
    assert not isinstance(questions, list)
    nb_samples = len(questions) 
    word_vec_dim = nlp(questions[0])[0].vector.shape[0] 
    questions_tensor = np.zeros((nb_samples, timesteps, word_vec_dim)) 
    for i in range(len(questions)): 
        tokens = nlp(questions[i]) 
        for j in range(len(tokens)):
            if j<timesteps: 
                questions_tensor[i,j,:] = tokens[j].vector
    return questions_tensor

def get_images_matrix(img_coco_ids, img_map, VGGfeatures): 
    assert not isinstance(img_coco_ids, list)
    nb_samples = len(img_coco_ids) 
    nb_dimensions = VGGfeatures.shape[0] 
    image_matrix = np.zeros((nb_samples, nb_dimensions)) 
    for j in range(len(img_coco_ids)): 
        image_matrix[j,:] = VGGfeatures[:,img_map[img_coco_ids[j]]]
    return image_matrix

def get_answers_sum(answers, encoder):
    assert not isinstance(answers, list)
    y = encoder.transform(answers) 
    nb_classes = encoder.classes_.shape[0] 
    Y = np_utils.to_categorical(y, nb_classes) 
    return Y

def grouped(iterable, n, fillvalue=None):
    args = [iter(iterable)] * n
    return zip_longest(*args, fillvalue=fillvalue)

### Training


In [None]:
batch_size = 4511
for k in range(num_epochs):
    print("Epoch Number: ",k+1)
    progbar = generic_utils.Progbar(len(train_questions))
    for question_batch, ans_batch, im_batch in zip(grouped(questions, batch_size, fillvalue=train_questions[-1]), 
                                               grouped(answers, batch_size, fillvalue=train_answers[-1]),
                                               grouped(image_id, batch_size, fillvalue=train_image_id[-1])):
        timestep = len(nlp(question_batch[-1]))
        X_ques_batch = get_questions_tensor_timeseries(question_batch, nlp, timestep)
        X_img_batch = get_images_matrix(im_batch, id_map, features_train)
        Y_batch = get_answers_sum(ans_batch, le)
        loss = model.train_on_batch(({'lstm_input' : X_ques_batch, 'reshape_input' : X_img_batch}), Y_batch)
        progbar.add(batch_size, values=[('train loss', loss)])

Epoch Number:  1
Epoch Number:  2
Epoch Number:  3
Epoch Number:  4
Epoch Number:  5
Epoch Number:  6
Epoch Number:  7
Epoch Number:  8
Epoch Number:  9
Epoch Number:  10


###Prediction


In [None]:
label_encoder = pickle.load(open('/content/drive/label_encoder_lstm.pkl','rb'))

In [None]:
y_pred = []
batch_size = 1429 
widgets = ['Evaluating ', Percentage(), ' ', Bar(marker='#',left='[',right=']'), ' ', ETA()]
pbar = ProgressBar(widgets=widgets)

In [None]:
id_map1 = dict()
i = 0
img_ids1 = open('/content/drive/Processed Text Files/Test Files/images_id.txt','rb').read().decode('utf-8').splitlines()
for ids in np.unique(img_ids1):
  id_map1[ids] = i
  i+=1
print(id_map1)

{'10163': 0, '10164': 1, '10167': 2, '10174': 3, '10183': 4, '10808': 5, '10812': 6, '10813': 7, '10814': 8, '10823': 9, '10829': 10, '10838': 11, '10839': 12, '10843': 13, '11483': 14, '6336': 15, '6342': 16, '6353': 17, '6362': 18, '6371': 19, '6377': 20, '6383': 21, '6389': 22, '6391': 23, '6394': 24, '6405': 25, '6412': 26, '6417': 27, '6419': 28, '6420': 29, '6445': 30, '6449': 31, '6452': 32, '6467': 33, '6468': 34, '6476': 35, '6488': 36, '6514': 37, '6517': 38, '6536': 39, '6545': 40, '6550': 41, '6553': 42, '6557': 43, '6558': 44, '6561': 45, '6562': 46, '6567': 47, '6568': 48, '6583': 49, '6590': 50, '6593': 51, '6594': 52, '6602': 53, '6619': 54, '6624': 55, '6632': 56, '6638': 57, '6659': 58, '6663': 59, '6671': 60, '6672': 61, '6677': 62, '6679': 63, '6680': 64, '6688': 65, '6691': 66, '6695': 67, '6700': 68, '6717': 69, '6718': 70, '6733': 71, '6744': 72, '6751': 73, '6761': 74, '6765': 75, '6768': 76, '6771': 77, '6774': 78, '6775': 79, '6778': 80, '6780': 81, '6785': 82

In [None]:
test_questions = open('/content/drive/Processed Text Files/Test Files/ques.txt', 'rb').read().decode('utf-8').splitlines()
test_image_id = open('/content/drive/Processed Text Files/Test Files/images_id.txt', 'rb').read().decode('utf-8').splitlines()

In [None]:
for qu_batch,im_batch in pbar(zip(grouped(test_questions, batch_size , 
                                                   fillvalue=test_questions[0]), 
                                           grouped(test_image_id, batch_size, 
                                                   fillvalue=test_image_id[0]))):
    timesteps = len(nlp(qu_batch[-1]))
    X_ques_batch_test = get_questions_tensor_timeseries(qu_batch, nlp, timesteps)
    X_img_batch_test = get_images_matrix(im_batch, id_map1, features_test)
    y_predict = model_.predict(({'lstm_input' : X_ques_batch_test, 'reshape_input' : X_img_batch_test}))
    y_predict = np.argmax(y_predict,axis=1)
    y_predict = label_encoder.inverse_transform(y_predict)

Evaluating N/A% [#                                             ] Time:  0:15:14

In [None]:
#This was used to check 
#if the number of questions==number of predictions
print(y_pred)
print(len(y_predict))
print(len(test_questions))
print(len(test_image_id))
print(features_test.shape)

[]
1429
1429
1429
(4096, 450)


In [None]:
print(y_predict[100])

non flooded


In [None]:
f= open("answer.txt","w+")
for i in y_predict:
  f.write(i+"\n")
f.close()