<a href="https://colab.research.google.com/github/maryamhashemi/Persian_VQA/blob/master/persian_VQA.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

### Import prerequesties.

In [0]:
import json
import os
from tensorflow.keras.models import Model 
from tensorflow.keras.layers import Dense, LSTM, Dropout, Embedding, Multiply
from tensorflow.keras.callbacks import ModelCheckpoint
from tensorflow.keras.preprocessing.image import load_img, img_to_array
import re

In [2]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


### Set Parameters

In [0]:
DROPOUT_RATE = 0.5
EMBEDDING_DIM = 300
EPOCHS = 10
BATCH_SIZE = 256

BASE_PATH = '/content/drive/My Drive/Persian_VQA/'
QUESTION_TRAIN_PATH =   os.path.join(BASE_PATH, 'google-train_questions.json')
ANNOTATION_TRAIN_PATH = os.path.join(BASE_PATH, 'google-train-annotations.json')
IMAGE_TRAIN_PATH = os.path.join(BASE_PATH, 'train_images_1000')

QUESTION_VAL_PATH =   os.path.join(BASE_PATH, '...')
ANNOTATION_VAL_PATH = os.path.join(BASE_PATH, '...')

### Reading Data

In [0]:
def read_image_paths(dir_path):
  ims = {}

  for filename in os.listdir(dir_path):
    if filename.endswith('.jpg'):
      image_id = int(re.findall('\d+', filename)[1])
      ims[image_id] = os.path.join(dir_path, filename)

  return ims

In [0]:
def load_and_proccess_image(image_path):
  im = img_to_array(load_img(image_path))
  return im

In [0]:
def read_images(paths):
  ims = {}
  for image_id, image_path in paths.items():
    ims[image_id] = load_and_proccess_image(image_path)
  return ims

In [0]:
def get_train_image_paths():
  return read_image_paths(IMAGE_TRAIN_PATH)

In [14]:
train_ims = read_images(get_train_image_paths())
print(len(train_ims))

1000


In [0]:
with open(QUESTION_TRAIN_PATH) as json_file:
    data = json.load(json_file)
data['questions'][0]

{'image_id': 487025,
 'question': 'صندلی نیمکت چه شکلی است؟',
 'question_id': 4870250}

In [0]:
with open(ANNOTATION_TRAIN_PATH) as json_file:
    data = json.load(json_file)
data['annotations'][0]

{'answer_type': 'other',
 'answers': [{'answer': 'بیضی', 'answer_confidence': 'yes', 'answer_id': 1},
  {'answer': 'نیمه دایره', 'answer_confidence': 'yes', 'answer_id': 2},
  {'answer': 'منحنی', 'answer_confidence': 'yes', 'answer_id': 3},
  {'answer': 'منحنی', 'answer_confidence': 'yes', 'answer_id': 4},
  {'answer': 'منحنی دوتایی', 'answer_confidence': 'yes', 'answer_id': 5},
  {'answer': 'موز', 'answer_confidence': 'maybe', 'answer_id': 6},
  {'answer': 'منحنی', 'answer_confidence': 'yes', 'answer_id': 7},
  {'answer': 'موج دار', 'answer_confidence': 'yes', 'answer_id': 8},
  {'answer': 'پیچش', 'answer_confidence': 'no', 'answer_id': 9},
  {'answer': 'منحنی', 'answer_confidence': 'maybe', 'answer_id': 10}],
 'image_id': 487025,
 'multiple_choice_answer': 'curved',
 'question_id': 4870250,
 'question_type': 'what'}

### Build Vanilla Network

In [0]:
def img_model(img_input):

  x = Dense(1024, activation='tanh', input_dim = 4096)(img_input)

  return model


In [0]:
def qs_model(qs_input, num_words, embedding_dim, dropout_rate):

  x = Embedding(num_words, embedding_dim)(qs_input)
  x = LSTM(units=512, return_sequences=True, input_shape= (None,embedding_dim))(x)
  x = Dropout(dropout_rate)(x)
  x = LSTM(units=512, return_sequences=False)(x)
  x = Dropout(dropout_rate)(x)
  x = Dense(1024, activation='tanh')(x)

  return x

In [0]:
def vanilla(num_classes, dropout_rate, num_words, embedding_dim):
  qs_input = ...
  img_input = ...

  CNN_model = img_model(img_input)
  LSTM_model = qs_model(qs_input, num_words, embedding_dim, dropout_rate)

  x = Multiply([CNN_model, LSTM_model])
  x = Dropout(dropout_rate)(x)
  x = Dense(1000, activation='tanh')(x)
  x = Dropout(dropout_rate)(x)
  output = Dense(num_classes, activation='softmax')(x)
  
  model = Model(inputs= [qs_input, img_input], outputs= output)
  return model

### Train

In [0]:
def Train():
  num_classes = ...
  num_words = ...
  checkpointer = ModelCheckpoint(...)

  model = vanilla(num_classes, DROPOUT_RATE, num_words, EMBEDDING_DIM)
  model.comile(optimizer='rmsprop', loss='categorical_crossentropy', metrics=['accuracy'])
  modle.fit(x_train,y_train, epochs = EPOCHS, batch_size = BATCH_SIZE, callbacks = [checkpointer])


### Evaluate

In [0]:
def Evaluation():
  