<a href="https://colab.research.google.com/github/nicologhielmetti/AN2DL-challenges/blob/master/challenge3/chall3.ipynb" target="_parent">
    <img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/>
</a>

In [1]:
import json, os
from functools import partial
import tensorflow as tf
import random
import keras.layers as layers
import keras.models as models
from keras.initializers import orthogonal
from keras.optimizers import Adam
import shutil
from PIL import Image
import numpy as np
from datetime import datetime

In [2]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [4]:
#!pip install gdown
#!gdown https://drive.google.com/uc?id=1tglwr5cbQbzrSLmJlHmz33htFUw0yzc4
!unzip -qq /content/drive/MyDrive/anndl-2020-vqa.zip -d "/content/drive/MyDrive/"

In [None]:
#!unzip -qq VQA_Dataset.zip -d VQA_Dataset

In [3]:
random.seed(96)

cwd = os.getcwd()

datasetName = os.path.join(cwd,'drive/MyDrive/VQA_Dataset')
trainJsonName = 'train.json'
validJsonName = 'valid.json'
testJsonName  = 'test_questions.json'
testJsonNameCorrect = 'test.json'
imagesPath = os.path.join(datasetName, 'Images')
trainJsonPath = os.path.join(datasetName, trainJsonName)
validJsonPath = os.path.join(datasetName, validJsonName)
testJsonPath  = os.path.join(datasetName, testJsonName)
testJsonPathCorrect  = os.path.join(datasetName, testJsonNameCorrect)

In [27]:
def create_split_files(file_path, train_val_split):
    with open(file_path,'r') as json_dataset:
        data = json.load(json_dataset)

    tot_list = list()
    for k,v in data.items():
        tot_list.append(v)

    train_list = random.sample(tot_list, int(len(tot_list) * (1 - train_val_split)))
    validation_list = [i for i in tot_list if i not in train_list]

    with open("drive/MyDrive/VQA_Dataset/train.json", "w") as train:
        json.dump(train_list, train)
        train.close()
    with open("drive/MyDrive/VQA_Dataset/valid.json", "w") as validation:
        json.dump(validation_list, validation)
        validation.close()
    
    #assert len([x for x in tot_list if x in train_list and x in validation_list]) == 0

def generate_correct_test_file(testJsonPath):
    with open(testJsonPath,'r') as json_dataset:
        data = json.load(json_dataset)

    test_list = list()
    for k,v in data.items():
        v["question_id"] = k
        test_list.append(v)

    with open("drive/MyDrive/VQA_Dataset/test.json", "w") as test:
        json.dump(test_list, test)
        test.close()

def create_train_test_dirs(json_definition, dataset_path, split_name):
    dest_dir = os.path.join(dataset_path, split_name)
    if not os.path.isdir(dest_dir):
      os.mkdir(dest_dir)
      os.mkdir(os.path.join(dest_dir, split_name))
    for k,v in json_definition.items():
        try:
            shutil.copy(
                os.path.join(dataset_path, "Images", k +'.png'),
                os.path.join(dest_dir, split_name, k +'.png')
            )
        except FileNotFoundError as e:
            print("Split name: " + split_name + ". File not found: " + str(e))
            continue


In [None]:

create_split_files(os.path.join(datasetName, 'train_questions_annotations.json'), 0.3)


In [28]:
generate_correct_test_file(testJsonPath)

In [4]:


with open(trainJsonPath,'r') as json_file_train, open(validJsonPath, 'r') as json_file_valid, open(testJsonPathCorrect, 'r') as json_file_test:
    data_train = json.load(json_file_train)
    data_valid = json.load(json_file_valid)
    data_test = json.load(json_file_test)

    json_file_train.close()
    json_file_valid.close()
    json_file_test.close()


os.chdir(cwd)
#create_train_test_dirs(data_train, datasetName, 'train')
#create_train_test_dirs(data_valid, datasetName, 'validation')
#create_train_test_dirs(data_test, datasetName, 'test')

In [None]:
# RUN only for train the autoenc
dest_dir = os.path.join(datasetName, "ImagesExt")
if not os.path.isdir(dest_dir):
  os.mkdir(dest_dir)
shutil.move(imagesPath, dest_dir)
imagesPath = os.path.join(dest_dir, "Images")

In [7]:
# RUN only for train the autoenc
img_size = (256, 256)
preproc_fun_fixed = partial(tf.keras.preprocessing.image.smart_resize, size=img_size)
batch_size = 32
datagen = tf.keras.preprocessing.image.ImageDataGenerator(rescale=1./255,
                                                          data_format='channels_last',
                                                          preprocessing_function=preproc_fun_fixed,
                                                          validation_split=0.3)
train_data = datagen.flow_from_directory(datasetName+'/ImagesExt', img_size, class_mode='input',
                                         batch_size=batch_size, subset="training")
valid_data = datagen.flow_from_directory(datasetName+'/ImagesExt', img_size, class_mode='input',
                                         batch_size=batch_size, subset="validation")




Found 20534 images belonging to 1 classes.
Found 8799 images belonging to 1 classes.


In [8]:
# RUN only for train the autoenc
def Conv2DLayer(x, filters, kernel, strides, padding, block_id, kernel_init=orthogonal()):
    prefix = f'block_{block_id}_'
    x = layers.Conv2D(filters, kernel_size=kernel, strides=strides, padding=padding,
                      kernel_initializer=kernel_init, name=prefix+'conv')(x)
    x = layers.LeakyReLU(name=prefix+'lrelu')(x)
    x = layers.Dropout(0.2, name=prefix+'drop')((x))
    x = layers.BatchNormalization(name=prefix+'conv_bn')(x)
    return x

def Transpose_Conv2D(x, filters, kernel, strides, padding, block_id, kernel_init=orthogonal()):
    prefix = f'block_{block_id}_'
    x = layers.Conv2DTranspose(filters, kernel_size=kernel, strides=strides, padding=padding,
                               kernel_initializer=kernel_init, name=prefix+'de-conv')(x)
    x = layers.LeakyReLU(name=prefix+'lrelu')(x)
    x = layers.Dropout(0.2, name=prefix+'drop')((x))
    x = layers.BatchNormalization(name=prefix+'conv_bn')(x)
    return x



def AutoEncoder(input_shape):
    inputs = layers.Input(shape=input_shape)
    
    # 256 x 256
    conv1 = Conv2DLayer(inputs, 64, 3, strides=1, padding='same', block_id=1)
    conv2 = Conv2DLayer(conv1, 64, 3, strides=2, padding='same', block_id=2)
    
    # 128 x 128
    conv3 = Conv2DLayer(conv2, 128, 5, strides=2, padding='same', block_id=3)
    
    # 64 x 64
    conv4 = Conv2DLayer(conv3, 128, 3, strides=1, padding='same', block_id=4)
    conv5 = Conv2DLayer(conv4, 256, 5, strides=2, padding='same', block_id=5)
    
    # 32 x 32
    conv6 = Conv2DLayer(conv5, 512, 3, strides=2, padding='same', block_id=6)
    
    # 16 x 16
    deconv1 = Transpose_Conv2D(conv6, 512, 3, strides=2, padding='same', block_id=7)
    
    # 32 x 32
    skip1 = layers.concatenate([deconv1, conv5], name='skip1')
    conv7 = Conv2DLayer(skip1, 256, 3, strides=1, padding='same', block_id=8)
    deconv2 = Transpose_Conv2D(conv7, 128, 3, strides=2, padding='same', block_id=9)
    
    # 64 x 64
    skip2 = layers.concatenate([deconv2, conv3], name='skip2')
    conv8 = Conv2DLayer(skip2, 128, 5, strides=1, padding='same', block_id=10)
    deconv3 = Transpose_Conv2D(conv8, 64, 3, strides=2, padding='same', block_id=11)
    
    # 128 x 128
    skip3 = layers.concatenate([deconv3, conv2], name='skip3')
    conv9 = Conv2DLayer(skip3, 64, 5, strides=1, padding='same', block_id=12)
    deconv4 = Transpose_Conv2D(conv9, 64, 3, strides=2, padding='same', block_id=13)
    
    # 256 x 256
    skip3 = layers.concatenate([deconv4, conv1])
    conv10 = layers.Conv2D(3, 3, strides=1, padding='same', activation='sigmoid',
                       kernel_initializer=orthogonal(), name='final_conv')(skip3)

    
    return models.Model(inputs=inputs, outputs=conv10)

In [9]:
# RUN only for train the autoenc
autoencoder = AutoEncoder((*img_size, 3))
model_opt = Adam(lr=0.002)

autoencoder.compile(optimizer=model_opt, loss='mse', metrics=['accuracy'])
#autoencoder.summary()

In [10]:
# RUN only for train the autoenc
saved_weight = os.path.join('/content/drive/MyDrive','saved_models_chall3', 'dataweights.{epoch:02d}.hdf5')
modelchk = tf.keras.callbacks.ModelCheckpoint(saved_weight,
                                              monitor='val_loss',
                                              verbose=1,
                                              save_best_only=True,
                                              save_weights_only=False
                                              )

tensorboard = tf.keras.callbacks.TensorBoard(log_dir='logs',
                                          histogram_freq=0,
                                          write_graph=True,
                                          write_images=True
                                          )

csv_logger = tf.keras.callbacks.CSVLogger('logs/keras_log.csv',
                                       append=True)

es_callback = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=5, verbose=1, restore_best_weights=True)

In [None]:
# RUN only for train the autoenc
epochs=50
autoencoder.fit(train_data,
                steps_per_epoch = len(train_data),
                epochs=epochs,
                verbose=1,
                validation_data=valid_data,
                validation_steps = len(train_data),
                callbacks=[modelchk,tensorboard, csv_logger, es_callback]
                )

Epoch 1/100


In [5]:
autoencoder = tf.keras.models.load_model(os.path.join('/content/drive/MyDrive','saved_models_chall3', 'best_model_autoencoder.hdf5'))

In [4]:
# RUN only for getting compressed feature

def extract_layers(main_model, starting_layer_ix, ending_layer_ix):
  # create an empty model
  new_model = tf.keras.Sequential()
  for ix in range(starting_layer_ix, ending_layer_ix + 1):
    curr_layer = main_model.get_layer(index=ix)
    # copy this layer over to the new model
    new_model.add(curr_layer)
  return new_model

encoder = extract_layers(autoencoder, 0, 24)
encoder.add(tf.keras.layers.GlobalAveragePooling2D())
for l in encoder.layers:
    l.trainable = False

In [14]:
# RUN only for getting compressed feature

train_images = set([el['image_id'] for el in data_train])
valid_images = set([el['image_id'] for el in data_valid])
len_train = len(train_images)
len_valid = len(valid_images)

print('taken the ' + str(len(valid_images)/len(train_images))+' of validation')
i = 0
train_tensor_map = {}
for imagename in train_images:
    print('{:3.2f} %'.format(i/len_valid * 100),end = '\r')
    image = Image.open(os.path.join(imagesPath, imagename + '.png')).resize(img_size).convert('RGB')
    img = np.array(image).astype(np.float32) / 255
    res = encoder.predict(x = np.expand_dims(img,0))
    train_tensor_map[str(imagename)] = res.tolist()
    i = i + 1

json.dump(train_tensor_map, open("train_tensors_encoder.json","w"), indent=2)

taken the 0.5738543939694505 of validation


In [16]:
# RUN only for getting compressed feature

valid_tensor_map = {}
i = 0
for imagename in valid_images:
    print('{:3.2f} %'.format(i/len_valid * 100),end = '\r')
    image = Image.open(os.path.join(imagesPath, imagename + '.png')).resize(img_size).convert('RGB')
    img = np.array(image).astype(np.float32) / 255
    res = encoder.predict(x = np.expand_dims(img,0))
    valid_tensor_map[str(imagename)] = res.tolist()
    i = i + 1

json.dump(valid_tensor_map, open("valid_tensors_encoder.json","w"), indent=2)



In [None]:
# RUN only for getting compressed feature

test_images = set([el['image_id'] for el in data_test])

len_test = len(test_images)
test_tensor_map = {}
i = 0
for imagename in test_images:
    print('{:3.2f} %'.format(i/len_valid * 100),end = '\r')
    image = Image.open(os.path.join(imagesPath, imagename + '.png')).resize(img_size).convert('RGB')
    img = np.array(image).astype(np.float32) / 255
    res = encoder.predict(x = np.expand_dims(img,0))
    test_tensor_map[str(imagename)] = res.tolist()
    i = i + 1

json.dump(test_tensor_map, open("test_tensors_encoder.json","w"), indent=2)

In [12]:
classes = {
        '0': 0,
        '1': 1,
        '2': 2,
        '3': 3,
        '4': 4,
        '5': 5,
        'apple': 6,
        'baseball': 7,
        'bench': 8,
        'bike': 9,
        'bird': 10,
        'black': 11,
        'blanket': 12,
        'blue': 13,
        'bone': 14,
        'book': 15,
        'boy': 16,
        'brown': 17,
        'cat': 18,
        'chair': 19,
        'couch': 20,
        'dog': 21,
        'floor': 22,
        'food': 23,
        'football': 24,
        'girl': 25,
        'grass': 26,
        'gray': 27,
        'green': 28,
        'left': 29,
        'log': 30,
        'man': 31,
        'monkey bars': 32,
        'no': 33,
        'nothing': 34,
        'orange': 35,
        'pie': 36,
        'plant': 37,
        'playing': 38,
        'red': 39,
        'right': 40,
        'rug': 41,
        'sandbox': 42,
        'sitting': 43,
        'sleeping': 44,
        'soccer': 45,
        'squirrel': 46,
        'standing': 47,
        'stool': 48,
        'sunny': 49,
        'table': 50,
        'tree': 51,
        'watermelon': 52,
        'white': 53,
        'wine': 54,
        'woman': 55,
        'yellow': 56,
        'yes': 57
}

bst= 300
bsv= 300

In [7]:
class CustomDataGenerator(tf.keras.utils.Sequence):

    def __init__(self, data, batch_size, tokenizer, featuresMap, maxSentenceLen, seed=96, num_classes=58, shuffle=True, test=False):
        self.data = data  # data on wich perform
        self.batch_size = batch_size  # batch size
        self.featuresMap = featuresMap  # features of the images obtained from a pretrained model
        self.seed = seed  # seed for the shuffle operations
        self.num_classes = num_classes  # number of classes (13 in our case)
        self.test = test
        self.shuffle = shuffle # boolean to say if to perform shuffle on each batch or not
        self.on_epoch_end()  
        self.tok = tokenizer
        self.maxSentenceLen = maxSentenceLen
        # set the seed
        random.seed(self.seed)
        np.random.seed(self.seed)

    def __len__(self):
        'method for the lenght of the generator'
        return int(np.floor(len(self.data) / self.batch_size))

    def on_epoch_end(self):
        'Updates indexes after each epoch'
        self.indexes = np.arange(len(self.data))
        if self.shuffle == True:
            np.random.shuffle(self.indexes)

    def __getitem__(self, index):
        'returns a batch of (image, question) and answer'
        indexes = self.indexes[index * self.batch_size: (index + 1) * self.batch_size]
        data_temp = [self.data[k] for k in indexes]
        X = self._generate_X(data_temp)
        if self.test == False:
            y = self._generate_y(data_temp)
            return X, y
        else:
            return X

    def _generate_X(self, data_temp):
        'generates the batch of (image,question)'
        img_array = np.empty((self.batch_size, 512))
        question_array = np.empty((self.batch_size, self.maxSentenceLen))
        for i, dictionary in enumerate(data_temp):
            filename = dictionary['image_id']
            image = np.array(self.featuresMap[filename])
            img_array[i,] = image.squeeze()
            token = self.tok.texts_to_sequences([dictionary['question']])
            padded_sequence = tf.keras.preprocessing.sequence.pad_sequences(token, maxlen=self.maxSentenceLen)
            padded_sequence = padded_sequence.squeeze()
            question_array[i,] = padded_sequence
        x1 = np.array(img_array)
        x2 = np.array(question_array)
        return [x1, x2]

    def _generate_y(self, data_temp):
        'generates the one hot encoding of the answer'
        answer_array = []
        for dictionary in data_temp:
            answer_array.append(
                tf.keras.utils.to_categorical(classes[dictionary['answer']], num_classes=self.num_classes))
        y = np.array(answer_array)
        return y

In [8]:
trainTensors = os.path.join(datasetName, "train_tensors_encoder.json")
validTensors = os.path.join(datasetName, "valid_tensors_encoder.json")

with open(trainJsonPath,'r') as json_file_train, open (validJsonPath, 'r') as json_file_valid:
    data_train = json.load(json_file_train)
    data_valid = json.load(json_file_valid)
    json_file_train.close()
    json_file_valid.close()

train_size = len(data_train)
valid_size = len(data_valid)

with open(trainTensors,'r') as json_file_train, open (validTensors, 'r') as json_file_valid:
    train_features = json.load(json_file_train)
    valid_features = json.load(json_file_valid)
    json_file_train.close()
    json_file_valid.close()

In [13]:
questions = [el['question'] for el in data_train]
words = set()
maxLength = 0
for q in questions:
    seq = tf.keras.preprocessing.text.text_to_word_sequence(q)
    if maxLength < len(seq): maxLength = len(seq)
    for x in seq:
        words.add(x)
# number of different words in our sequences or vocaboulary size
n_words = len(words)
# Tokenizer and indexes creation
tok = tf.keras.preprocessing.text.Tokenizer(num_words=n_words)
tok.fit_on_texts(questions)

gen_train = CustomDataGenerator(data_train, bst, tok, train_features, maxSentenceLen=maxLength)
gen_val   = CustomDataGenerator(data_valid, bsv, tok, valid_features, shuffle = False, maxSentenceLen=maxLength)

In [None]:
#Structure of the CNN and of the RNN

#CNN
inp1 = tf.keras.Input(shape = (512))

dense1 = tf.keras.layers.Dense(units=256, activation= tf.keras.activations.relu, kernel_initializer = 'he_uniform')(inp1)

#RNN with LSTM
inp2 = tf.keras.Input(name='input_LSTM', shape=maxLength)
r = tf.keras.layers.Embedding(input_dim=n_words, output_dim=32)(inp2)
r = tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(units=256, return_sequences=True))(r)
r = tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(units=256, return_sequences=False))(r)
dense2 = tf.keras.layers.Dense(units=256, activation = tf.keras.activations.relu, kernel_initializer = 'he_uniform')(r)

conc = tf.keras.layers.Concatenate()([dense1, dense2])
d = tf.keras.layers.Dense(units=1024, activation=tf.keras.activations.relu, kernel_initializer = 'he_uniform')(conc)
d = tf.keras.layers.Dropout(0.2)(d)
d = tf.keras.layers.Dense(units=1024, activation=tf.keras.activations.relu, kernel_initializer = 'he_uniform')(d)
d = tf.keras.layers.Dropout(0.2)(d)
out = tf.keras.layers.Dense(units=58, activation=tf.keras.activations.softmax)(d)

model = tf.keras.Model([inp1, inp2], out)
model.summary()

loss = tf.keras.losses.CategoricalCrossentropy()
optimizer = tf.keras.optimizers.Adam(learning_rate = 1e-4)
metrics = ['accuracy']
saved_weight = os.path.join('/content/drive/MyDrive','saved_models_chall3', 'model_{epoch:02d}.hdf5')
model_chk = tf.keras.callbacks.ModelCheckpoint(saved_weight,
                                              monitor='val_loss',
                                              verbose=1,
                                              save_best_only=True,
                                              save_weights_only=False
                                              )
es_callback = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=10, verbose=1, restore_best_weights=True)


model.compile(metrics=metrics, optimizer=optimizer, loss=loss)

In [16]:
model = tf.keras.models.load_model(os.path.join('/content/drive/MyDrive','saved_models_chall3', 'model_10.hdf5'))
model.fit(gen_train, steps_per_epoch=len(gen_train), 
                    validation_data=gen_val, validation_steps=len(gen_val),
                    epochs = 100, callbacks = [model_chk, es_callback])

Epoch 1/100

Epoch 00001: val_loss improved from inf to 1.03202, saving model to /content/drive/MyDrive/saved_models_chall3/model_01.hdf5
Epoch 2/100

Epoch 00002: val_loss did not improve from 1.03202
Epoch 3/100

Epoch 00003: val_loss did not improve from 1.03202
Epoch 4/100

Epoch 00004: val_loss did not improve from 1.03202
Epoch 5/100

Epoch 00005: val_loss did not improve from 1.03202
Epoch 6/100

Epoch 00006: val_loss did not improve from 1.03202
Epoch 7/100

Epoch 00007: val_loss did not improve from 1.03202
Epoch 8/100

Epoch 00008: val_loss did not improve from 1.03202
Epoch 9/100

Epoch 00009: val_loss did not improve from 1.03202
Epoch 10/100

Epoch 00010: val_loss did not improve from 1.03202
Epoch 11/100

Epoch 00011: val_loss did not improve from 1.03202
Restoring model weights from the end of the best epoch.
Epoch 00011: early stopping


<tensorflow.python.keras.callbacks.History at 0x7fe87f9b71d0>

In [29]:
def create_csv(results, results_dir='./'):
    csv_fname = 'results_'
    csv_fname += datetime.now().strftime('%b%d_%H-%M-%S') + '.csv'
    with open(os.path.join(results_dir, csv_fname), 'w') as f:
        f.write('Id,Category\n')
        for key, value in results.items():
            f.write(str(key) + ',' + str(value) + '\n')

testJsonName = 'test.json'
testJsonPath = os.path.join(datasetName, testJsonName)

testTensors = os.path.join(datasetName, "test_tensors_encoder.json")

with open(testJsonPath,'r') as json_file_test:
    data_test = json.load(json_file_test)
    json_file_test.close()

with open(testTensors,'r') as json_file_test:
    test_features = json.load(json_file_test)
    json_file_test.close()

print('Test set length:' + str(len(data_test)))
test_gen = CustomDataGenerator(data_test, 1, tok, test_features, maxSentenceLen=maxLength,
                               shuffle=False, test=True)

predictions = model.predict(test_gen)
print('Predictions vector length:' + str(len(predictions)))

results = {}

work_pr = []
for i in range(len(predictions)):
    work_pr.append(tf.argmax(predictions[i], axis=-1).numpy())

for i in range(len(data_test)):
    results[data_test[i]['question_id']] = work_pr[i]

create_csv(results)
print('CSV written!')


Test set length:6372
Predictions vector length:6372
CSV written!
