In [1]:
import io
import h5py
import json
import torch
from matplotlib import pyplot
import PIL.Image
import numpy as np
import string

from collections import defaultdict as ddict


%matplotlib inline

In [2]:
data_root = '/data/milatmp1/bahdanau/flatqa-letters/long-tail/'
model_output_path = '/u/murtyjay/nmn-iwp/outputs/output_mac4.h5'

def parse_dat(data_root, part):
  features_path = data_root + part + 'features.h5'
  with h5py.File(features_path) as src:
    features = src['features'][:]
    
  data_path = data_root + part + 'questions.h5'
  with h5py.File(data_path) as src:
    questions = src['questions'][:]
    answers = src['answers'][:]
    image_idxs = src['image_idxs'][:]
  
  with open(data_root + 'vocab.json') as src:
    vocab = json.load(src)
    question_idx_to_token = {v: k for k, v in vocab['question_token_to_idx'].items()}
    answer_idx_to_token = {v: k for k, v in vocab['answer_token_to_idx'].items()}  
  return features, questions, answers, image_idxs, vocab, question_idx_to_token, answer_idx_to_token

def get_question_distribution(train_questions):
  relation_count = ddict(int)
  object_count = ddict(int)
  object_pairwise_count = ddict(int)

  for question in train_questions:
    relation_count[question[5]] += 1.0
    object_count[ question[4] ] += 1.0
    object_count[question[7]] += 1.0
    object_pairwise_count[(question[4], question[7])] += 1.0

  return relation_count, object_count, object_pairwise_count

train_features, train_questions, train_answers, train_image_idxs, vocab, question_idx_to_token, answer_idx_to_token = parse_dat(data_root, 'train_')
relation_count, object_count, object_pairwise_count = get_question_distribution(train_questions)

In [3]:
def pprint_question(questions, vocab):
  for q in questions:
    print(' '.join([vocab[idx] for idx in q]))

print(pprint_question(train_questions[:10], question_idx_to_token))
print(relation_count)




def print_image(img, features):
  image = np.array(PIL.Image.open(io.BytesIO(features[img]) ))
  pyplot.figure(figsize=(5, 5))
  pyplot.imshow(image, origin='lower')
  pyplot.show()

    
      


def get_confusion_matrix(model_path, data_root, correct = None):

  if correct is None:
    with h5py.File(model_path) as src:
      correct = src['correct'].value
    
  features, questions, answers, image_idxs, vocab, question_idx_to_token, answer_idx_to_token = parse_dat(data_root,'val_')
  confusion = {'TP' : 0.0, 'FP' : 0.0, 'FN' : 0.0, 'TN' : 0.0}
  acc = 0.0
  for i in range(1000):

    if answer_idx_to_token[answers[i]] == 'false':
      confusion['TN'] += 1.0
      if not correct[i]: confusion['FP'] += 1.0 
          
    else:
      confusion['TP'] += 1.0
      if not correct[i]: 
        confusion['FN'] += 1.0
        #print_image(image_idxs[i])
        #question_tokens = ' '.join([question_idx_to_token[idx] for idx in questions[i]])
        #print(question_tokens,  answer_idx_to_token[answers[i]])
  
        
    if correct[i]: 
      acc += 1.0
      continue

  return confusion


def get_pr(confusion_matrix):
    p = confusion_matrix['TP'] / (confusion_matrix['TP'] + confusion_matrix['FP'])
    r = confusion_matrix['TP'] / (confusion_matrix['TP'] + confusion_matrix['FN'])
    return p, r

is there a blue D right_of blue A
is there a blue D below purple A
is there a yellow A left_of purple C
is there a gray A left_of purple B
is there a purple N below yellow C
is there a brown C right_of purple A
is there a green E below purple D
is there a purple D right_of yellow F
is there a yellow D below gray C
is there a red F left_of yellow D
None
defaultdict(<class 'int'>, {40: 250051.0, 41: 250109.0, 42: 249960.0, 43: 249880.0})


In [4]:
# CONFUSION MATRIX ANALYSIS

model_output_path = '/u/murtyjay/nmn-iwp/outputs/output_film'

for i in range(1, 6):
    print(i)
    file="%s%s.h5" %(model_output_path, i)
    print(file)
    cm = get_confusion_matrix(file , data_root )
    print(get_pr(cm))




1
/u/murtyjay/nmn-iwp/outputs/output_film1.h5
(0.9191176470588235, 0.9861932938856016)
2
/u/murtyjay/nmn-iwp/outputs/output_film2.h5
(0.9057971014492754, 0.9823182711198428)
3
/u/murtyjay/nmn-iwp/outputs/output_film3.h5
(0.9174311926605505, 0.9900990099009901)
4
/u/murtyjay/nmn-iwp/outputs/output_film4.h5
(0.9276437847866419, 0.9920634920634921)
5
/u/murtyjay/nmn-iwp/outputs/output_film5.h5
(0.9157509157509157, 0.9940357852882704)


In [6]:
# identify answers based on just color information

freq_letter = lambda tok : question_idx_to_token[tok] in list(string.ascii_uppercase)[:6]
rare_letter = lambda tok : not freq_letter(tok) 


def read_json(data_root, part):
  json_data=open('%s/%s_scenes.json' %(data_root, part)).read()
  data = json.loads(json_data)
  return data


class ColorOnlyBaseline(object):
    def fit(self, data_root):
      self.json_reps = {}
      self.json_reps['train'] = read_json(data_root, 'train')
      self.json_reps['test']  = read_json(data_root, 'test')
      self.json_reps['val']   = read_json(data_root, 'val')
    
    def classify(self, question, idx, split):
      curr_img_colors = [obj['color'] for obj in self.json_reps[split][idx]]
      colors = question_idx_to_token[question[3]], question_idx_to_token[question[6]]
   
      return colors[0] in curr_img_colors and colors[1] in curr_img_colors   
    
class ColorAndShapeBaseline(object):
    def fit(self, data_root):
      self.json_reps = {}
      self.json_reps['train'] = read_json(data_root, 'train')
      self.json_reps['test']  = read_json(data_root, 'test')
      self.json_reps['val']   = read_json(data_root, 'val')
    
    def classify(self, question, idx, split):
      curr_img_colAndshape = [(obj['color'], obj['shape']) for obj in self.json_reps[split][idx]]
      lobj = (question_idx_to_token[question[3]], question_idx_to_token[question[4]])
      robj = (question_idx_to_token[question[6]], question_idx_to_token[question[7]])
        
      return lobj in curr_img_colAndshape and robj in curr_img_colAndshape

class ColorAndShapeTypeBaseline(object):
    def fit(self, data_root):
      self.json_reps = {}
      self.json_reps['train'] = read_json(data_root, 'train')
      self.json_reps['test']  = read_json(data_root, 'test')
      self.json_reps['val']   = read_json(data_root, 'val')
    
    def classify(self, question, idx, split):
      curr_img_colAndshape = [(obj['color'], obj['shape'] in list(string.ascii_uppercase)[:6] ) for obj in self.json_reps[split][idx]]
      lobj = (question_idx_to_token[question[3]], freq_letter(question[4]) )
      robj = (question_idx_to_token[question[6]], freq_letter(question[7]) )
        
      return lobj in curr_img_colAndshape and robj in curr_img_colAndshape

color_only_bot = ColorAndShapeBaseline()
color_only_bot.fit(data_root)
features, questions, answers, image_idxs, vocab, question_idx_to_token, answer_idx_to_token = parse_dat(data_root,'val_')


colorOnlyBaseline_correct = [color_only_bot.classify(question, _id,  'val') == answers[_id] for (_id, question) in enumerate(questions[:1000])]
print(len(colorOnlyBaseline_correct))
acc = sum(colorOnlyBaseline_correct) / 1000.0
print(acc)
hard_questions = [idx for idx in range(1000) if not colorOnlyBaseline_correct[idx]]
#print(get_confusion_matrix(None, data_root, colorOnlyBaseline_correct))

1000
0.886


In [29]:
# OOV analysis 



freq_freq_factor = lambda question, img :  freq_letter(question[4])  and freq_letter(question[7])
rare_rare_factor = lambda question, img :  rare_letter(question[4]) and rare_letter(question[7])
freq_rare_factor = lambda question, img : (rare_letter(question[4]) and freq_letter(question[7])) or (freq_letter(question[4]) and rare_letter(question[7])) 

model_output_path = '/u/murtyjay/nmn-iwp/outputs/output_ee'


def get_incorrect_questions(model_path, data_root):
  with h5py.File(model_path) as src:
    correct = src['correct'].value
  incorrect_ques = []
  features, questions, answers, image_idxs, vocab, question_idx_to_token, answer_idx_to_token = parse_dat(data_root,'val_')

  for i in range(1000):
    if not correct[i]:
        ques = questions[i]
        
    
    
def get_factor_posterior(model_path, data_root, factor):
  with h5py.File(model_path) as src:
    correct = src['correct'].value
    
  features, questions, answers, image_idxs, vocab, question_idx_to_token, answer_idx_to_token = parse_dat(data_root,'val_')
  factor_and_incorrect = 0.0
  factor_and_correct = 0.0
  prob_factor = 0.0

  for i in range(1000): 
    question = questions[i]
    factor_val = factor(question, image_idxs[i])
    
    prob_factor += factor_val
    if correct[i]: factor_and_correct += factor_val
    else: factor_and_incorrect += factor_val
    
  prob_factor_given_correct   = factor_and_correct   / sum(correct)
  prob_factor_given_incorrect = factor_and_incorrect / (1000.0 - sum(correct))
  prob_factor = prob_factor / 1000.0
  return prob_factor_given_correct ,  prob_factor_given_incorrect , prob_factor

def get_correct_given_factor(model_path, data_root, factor):
  with h5py.File(model_path) as src:
    correct = src['correct'].value
    
  features, questions, answers, image_idxs, vocab, question_idx_to_token, answer_idx_to_token = parse_dat(data_root,'val_')
  factor_true_dat = 0.0
  factor_total = 0.0
  factor_true_list = []
  acc = []
    
    
    
  for i in range(1000): 
    question = questions[i]
    acc.append( correct[i])
    factor_val = factor(question, image_idxs[i])
    if factor_val:
        factor_true_dat += correct[i]
        factor_total += 1.0
        factor_true_list.append(i)
    

  return factor_true_dat / factor_total , 1.0*sum(acc)/len(acc), factor_true_list



def get_factor_samples(model_path, data_root, factor_list):
  with h5py.File(model_path) as src:
    correct = src['correct'].value
    
  features, questions, answers, image_idxs, vocab, question_idx_to_token, answer_idx_to_token = parse_dat(data_root,'val_')

  confusion = {'TP' : 0.0, 'FP' : 0.0, 'FN' : 0.0, 'TN' : 0.0}
        
  for _id in factor_list:
    question = questions[_id]
    if answer_idx_to_token[answers[_id]] == 'false':
      confusion['TN'] += 1.0
      if not correct[_id]: confusion['FP'] += 1.0 
          
    else:
      confusion['TP'] += 1.0
      if not correct[_id]: 
        confusion['FN'] += 1.0
        
    if correct[_id]:
      print(' '.join([question_idx_to_token[idx] for idx in question]), bool(answers[_id]))
      print_image(image_idxs[_id], features)

  print(confusion)

from collections import Counter



avg_condtioned_accuracy = 0.0
avg_overall_accuracy = []
for i in range(1, 6):
    file="%s%s.h5" %(model_output_path, i)
    #prob_factor_given_correct , prob_factor_given_incorrect , prob_factor  = get_factor_posterior(file , data_root, rare_rare_factor )
  
    prob_correct_given_factor, prob_correct, factor_list = get_correct_given_factor(file, data_root, rare_rare_factor)
    #get_factor_samples(file, data_root, factor_list)
    #print(prob_factor, prob_factor_given_incorrect, prob_factor - prob_factor_given_correct,  prob_factor - prob_factor_given_incorrect)
    
    avg_condtioned_accuracy += prob_correct_given_factor
    avg_overall_accuracy .append( prob_correct)
    print(prob_correct)

print(np.std(avg_overall_accuracy)*100.0)
print(avg_condtioned_accuracy/5, 1.0*sum(avg_overall_accuracy)/len(avg_overall_accuracy))

0.963
0.969
0.963
0.955
0.955
0.53665631459995
0.9525368248772506 0.961


In [8]:
6. * 6 / 26 / 26

0.05325443786982249