In [2]:
from PrepareOriginalData import PrepareData
import numpy as np

# Some constants
taskType = 'all'
data_amount = 1
epochs = 50

# Train on only one question type
question_type = 'yes/no'

# Load training set
p = PrepareData(path_images='data_vqa_feat', # Path to image features 
                subset='train2014', # Desired subset: either train2014 or val2014
                taskType=taskType, # 'OpenEnded', 'MultipleChoice', 'all'
                cut_data=data_amount, # Percentage of data to use, 1 = All values, above 1=#samples for debugging
                output_path='data', # Path where we want to output temporary data
                pad_length=32, # Number of words in a question (zero padded)
                question_threshold=0, answer_threshold=0, # Keep only most common words
                answers_sparse=True, questions_sparse=True, answer_type=question_type)
image_features, questions, answers, annotations = p.load_data()
print("Image features", image_features.shape)
print("Question features", questions.shape)
print("Answers", answers.shape)
print("Dictionary size", p.dic_size)
print("Number of possible classes", np.max(answers) + 1)

# Save dictionary
p.dumpDictionary('dictionary_yes_no')

loading VQA annotations and questions into memory...
0:00:05.061790
creating index...
index created!
loading VQA annotations and questions into memory...
0:00:07.255484
creating index...
index created!
Image features (190604, 1024)
Question features (190604, 32)
Answers (190604, 1)
Dictionary size 10867
Number of possible classes 5


In [3]:
answers

array([[4],
       [3],
       [3],
       ..., 
       [3],
       [3],
       [3]])

In [None]:
from NeuralNetworkYesNo import NeuralNetwork
# Use this when using sparse representation
neuralnet = NeuralNetwork(image_features.shape[0],1024,questions.shape[1],p.dic_size,np.max(answers)+1, epochs = epochs, batchSize=512, loss='sparse_categorical_crossentropy')

In [None]:
# Train network
neuralnet.fit(image_features, questions, answers)

In [None]:
image_features = questions = answers = annotations = []
question_type = 'yes/no'
# Load validation set and evaluate prediction on it
pt= PrepareData(path_images='data_vqa_feat', # Path to image features 
                        subset='val2014', # Desired subset: either train2014 or val2014
                        taskType=taskType, # 'OpenEnded', 'MultipleChoice', 'all'
                        cut_data=data_amount, # Percentage of data to use, 1 = All values, above 1 = 10 samples for debugging
                        output_path='data', # Path where we want to output temporary data
                        pad_length=32, # Number of words in a question (zero padded)
                        question_threshold=0, answer_threshold=0, # Keep only most common words
                        answers_sparse=True, questions_sparse=True, answer_type=question_type,
                        precomputed_dic=p._question_dict)
pt.loadDictionary('data/dictionary_yes_no.pkl') # Use same dictionary as in training
image_features, questions, answers, annotations = pt.load_data()
print("Image features", image_features.shape)
print("Question features", questions.shape)
print("Dictionary size", pt.dic_size)

In [None]:
# Predict
pred = neuralnet.predict_current_state(image_features, questions)
pred.shape

In [None]:
from sklearn.metrics import accuracy_score
# TODO: can probably still improve this accuracy
print('Answer type classification accuracy:', accuracy_score(pred, answers))