In [3]:
import os
import sys
import json
import pickle

import nltk
from tqdm.autonotebook import tqdm
from PIL import Image

def process_question(root, split, word_dic=None, answer_dic=None):
    if word_dic is None:
        word_dic = {}

    if answer_dic is None:
        answer_dic = {}

    with open(os.path.join(root, 'questions', 'CLEVR_{}_questions.json'.format(split))) as f:
        data = json.load(f)

    result = []
    word_index = 1
    answer_index = 0

    for question in tqdm(data['questions']):
        words = nltk.word_tokenize(question['question'])
        question_token = []

        for word in words:
            try:
                question_token.append(word_dic[word])

            except:
                question_token.append(word_index)
                word_dic[word] = word_index
                word_index += 1

        answer_word = question['answer']

        try:
            answer = answer_dic[answer_word]

        except:
            answer = answer_index
            answer_dic[answer_word] = answer_index
            answer_index += 1

        result.append((question['image_filename'], question_token, answer, question['question_family_index']))

    with open(os.path.join(root, 'data/{}.pkl'.format(split)), 'wb') as f:
        pickle.dump(result, f)

    return word_dic, answer_dic

# if __name__ == '__main__':
#    root = sys.argv[1]
#
#    word_dic, answer_dic = process_question(root, 'train')
#    process_question(root, 'val', word_dic, answer_dic)
#
#    with open('../data/dic.pkl', 'wb') as f:
#        pickle.dump({'word_dic': word_dic, 'answer_dic': answer_dic}, f)



In [168]:
with open('/Users/sebamenabar/Documents/datasets/CLEVR/CLEVR_v1.0/data/dic.pkl', 'rb') as f:
    original_dic = pickle.load(f)
    
word_dic = original_dic['word_dic']
answer_dic = original_dic['answer_dic']

In [171]:
root = '/Users/sebamenabar/Documents/datasets/CLEVR/CLEVR_CoGenT_v1.0/'
split = 'trainA'
word_dic, answer_dic = process_question(root, split, word_dic=word_dic, answer_dic=answer_dic)

HBox(children=(IntProgress(value=0, max=699960), HTML(value='')))

In [169]:
root = '/Users/sebamenabar/Documents/datasets/CLEVR/CLEVR_CoGenT_v1.0/'
split = 'valA'
process_question(root, split, word_dic=word_dic, answer_dic=answer_dic);

HBox(children=(IntProgress(value=0, max=150000), HTML(value='')))

In [170]:
root = '/Users/sebamenabar/Documents/datasets/CLEVR/CLEVR_CoGenT_v1.0/'
split = 'valB'
process_question(root, split, word_dic=word_dic, answer_dic=answer_dic);

HBox(children=(IntProgress(value=0, max=149991), HTML(value='')))

In [5]:
root = '/Users/sebamenabar/Documents/datasets/CLEVR/CLEVR_CoGenT_v1.0/'
split = 'trainA'
with open(os.path.join(root, 'data/dic.pkl'), 'wb') as f:
    pickle.dump({'word_dic': word_dic, 'answer_dic': answer_dic}, f)

In [11]:
with open(os.path.join(root, f'data/{split}.pkl'), 'rb') as f:
    questions = pickle.load(f)

In [13]:
len(questions)

149991

In [7]:
import numpy as np
import random

def make_sample(root, split, n_samples):
    with open(os.path.join(root, f'data/{split}.pkl'), 'rb') as f:
        questions = pickle.load(f)
       
    sample = random.sample(questions, k=n_samples)
    
    with open(os.path.join(root, f'data/{split}_sample.pkl'), 'wb') as f:
        pickle.dump(sample, f)
        
    return sample

In [8]:
root = '/Users/sebamenabar/Documents/datasets/CLEVR/CLEVR_v1.0/'
make_sample(root, 'train', 256);

In [9]:
make_sample(root, 'val', 128);

In [10]:
root = '/Users/sebamenabar/Documents/datasets/CLEVR/CLEVR_CoGenT_v1.0/'
make_sample(root, 'trainA', 256);
make_sample(root, 'valA', 128);
make_sample(root, 'valB', 128);

In [33]:
split = 'valA_sample'
with open(os.path.join(root, f'data/{split}.pkl'), 'rb') as f:
    questions = pickle.load(f)
    
print(questions)

[('CLEVR_valA_011712.png', [59, 2, 47, 10, 13, 43, 6, 49, 8, 9, 10, 2, 45, 21, 89, 77, 2, 47, 10, 69, 78, 79, 22], 0, 5)]


In [46]:
%load_ext autoreload
%autoreload 2

In [47]:
import os
import sys

sys.path.insert(0, 'code')

In [88]:
import torch
from torch.utils.data import DataLoader
from torchsummaryX import summary

from tqdm.autonotebook import tqdm
from easydict import EasyDict as edict

from mac import MACNetwork
from utils import load_vocab
from datasets import ClevrDataset, collate_fn

In [54]:
cfg = edict({
    'GPU_ID': '-1',
    'CUDA': False,
    'WORKERS': 4,
    'TRAIN': {'FLAG': True,
    'LEARNING_RATE': 0.0001,
    'BATCH_SIZE': 8,
    'MAX_EPOCHS': 25,
    'SNAPSHOT_INTERVAL': 5,
    'WEIGHT_INIT': 'xavier_uniform',
    'CLIP_GRADS': True,
    'CLIP': 8,
    # 'MAX_STEPS': 4,
    'EALRY_STOPPING': True,
    'PATIENCE': 5,
    'VAR_DROPOUT': False},
    'DATASET': {
        # 'DATA_DIR': '/mnt/nas2/GrimaRepo/datasets/CLEVR_v1.0/features',
        'DATA_DIR': '/Users/sebamenabar/Documents/datasets/CLEVR/CLEVR_v1.0/data',
    },
    'model': {
        'max_step': 4,
        'separate_syntax_semantics': False,
        'common': {
            'module_dim': 256,
        },
        'input_unit': {
            'wordvec_dim': 256,
            'rnn_dim': 256, 
            'bidirectional': True,
            'separate_syntax_semantics_embeddings': False,
        },
        'control_unit': {
        },
        'read_unit': {},
        'write_unit': {
            'rtom': True,
        },
        'output_unit': {},
    }
})

vocab = load_vocab(cfg)

In [58]:
cogent = ''
ds = ClevrDataset(
    data_dir=cfg.DATASET.DATA_DIR,
    split='val' + cogent,
    sample=True,
)

In [73]:
model = MACNetwork(cfg=cfg, vocab=vocab)
state = torch.load('data/big_bsz_adam_baseline/Model/model_checkpoint_000025.pth', map_location='cpu')
print(model.load_state_dict(state['model']))

model_ema = MACNetwork(cfg=cfg, vocab=vocab)
state = torch.load('data/big_bsz_adam_baseline/Model/model_ema_checkpoint_000025.pth', map_location='cpu')
model_ema.load_state_dict(state['model'])

<All keys matched successfully>


<All keys matched successfully>

In [76]:
bsz = 16
loader = DataLoader(
    dataset=ds, batch_size=bsz, drop_last=False,
    shuffle=False, num_workers=cfg.WORKERS, collate_fn=collate_fn,
)

In [90]:
def calc_accuracy(model, model_ema, loader, mode='val', max_samples=None):

    if mode == 'train':
        model.train()
        model_ema.train()
    else:
        model.eval()
        model_ema.eval()
    # else:
    #     loader = self.dataloader_val

    total_correct = 0
    total_correct_ema = 0
    total_samples = 0
    # all_accuracies_ema = []
    pbar = tqdm(loader, total=len(loader), desc=mode.upper())
    for data in pbar:
        image, question, question_len, answer = data['image'], data['question'], data['question_length'], data['answer']
        answer = answer.long()
        # question = Variable(question)
        # answer = Variable(answer)

        # if self.cfg.CUDA:
        #    image = image.cuda()
        #    question = question.cuda()
        #    answer = answer.cuda().squeeze()

        with torch.no_grad():
            scores = model(image, question, question_len)
            scores_ema = model_ema(image, question, question_len)

        correct_ema = scores_ema.detach().argmax(1) == answer
        total_correct_ema += correct_ema.sum().cpu().item()

        correct = scores.detach().argmax(1) == answer
        total_correct += correct.sum().cpu().item()
        total_samples += answer.size(0)

        # print('total_correct', total_correct, 'total_correct_ema', total_correct_ema, 'total_samples', total_samples)

        # pbar.set_description(
        #     'Avg Acc: {:.5f}; Avg Acc: {:.5f}'.format(total_correct / total_samples, total_correct_ema / total_samples)
        # )
        pbar.set_postfix({
            'Acc': f'{total_correct / total_samples:.5f}',
            'Ema Acc': f'{total_correct_ema / total_samples:.5f}',
        })

    accuracy_ema = total_correct_ema / total_samples
    accuracy = total_correct / total_samples

    return accuracy, accuracy_ema

In [160]:
bsz = 16
loader = DataLoader(
    dataset=ds, batch_size=bsz, drop_last=False,
    shuffle=True, num_workers=1, collate_fn=collate_fn,
)
for _ in range(5):
    print(calc_accuracy(model, model_ema, loader))

HBox(children=(IntProgress(value=0, description='VAL', max=8, style=ProgressStyle(description_width='initial')…

(0.984375, 0.984375)


HBox(children=(IntProgress(value=0, description='VAL', max=8, style=ProgressStyle(description_width='initial')…

(0.984375, 0.984375)


HBox(children=(IntProgress(value=0, description='VAL', max=8, style=ProgressStyle(description_width='initial')…

(0.984375, 0.984375)


HBox(children=(IntProgress(value=0, description='VAL', max=8, style=ProgressStyle(description_width='initial')…

(0.984375, 0.984375)


HBox(children=(IntProgress(value=0, description='VAL', max=8, style=ProgressStyle(description_width='initial')…

(0.984375, 0.984375)


In [129]:
(torch.tensor([0]) == torch.tensor([1])).all()

tensor(False)

In [152]:
prev = torch.FloatTensor([0])
for _ in tqdm(range(100)):
    bsz = 1
    loader = DataLoader(
        dataset=ds, batch_size=bsz, drop_last=False,
        shuffle=False, num_workers=2, collate_fn=collate_fn,
    )
    b = next(iter(loader))
    model.eval()
    with torch.no_grad():
        res = model(b['image'], b['question'], b['question_length'])
        if (prev != res).any():
            print('DISTINTO')
            print(prev)
            print(res)
        prev = res

HBox(children=(IntProgress(value=0), HTML(value='')))

DISTINTO
tensor([0.])
tensor([[ 29.5146,   2.1618,  21.1591,  -2.2148,   0.6288, -12.3224,   4.7054,
         -10.7861,  -5.4904,  -0.3733, -10.8270,  -3.6604, -10.6570, -12.3457,
          -4.9649, -11.6541, -15.1843,   4.7506,  -1.4528,  -8.6764, -10.4358,
         -22.4362, -14.3401, -16.4160, -32.7953, -30.9391, -35.0976, -25.7792]])


In [120]:
bsz = 16
loader = DataLoader(
    dataset=ds, batch_size=bsz, drop_last=False,
    shuffle=False, num_workers=cfg.WORKERS, collate_fn=collate_fn,
)
b = next(iter(loader))
summary(model, b['image'], b['question'], b['question_length'])

                                              Kernel Shape  \
Layer                                                        
0_input_unit.stem.Dropout_0                              -   
1_input_unit.stem.Conv2d_1               [1024, 256, 3, 3]   
2_input_unit.stem.ELU_2                                  -   
3_input_unit.stem.Dropout_3                              -   
4_input_unit.stem.Conv2d_4                [256, 256, 3, 3]   
5_input_unit.stem.ELU_5                                  -   
6_input_unit.Embedding_encoder_embed             [256, 90]   
7_input_unit.Dropout_embedding_dropout                   -   
8_input_unit.LSTM_encoder                                -   
9_input_unit.Dropout_question_dropout                    -   
10_mac.control.control_input_u.Linear_0         [256, 256]   
11_mac.control.Linear_attn                        [256, 1]   
12_mac.read.Dropout_dropout                              -   
13_mac.read.Dropout_dropout                              -   
14_mac.r

Unnamed: 0_level_0,Kernel Shape,Output Shape,Params,Mult-Adds
Layer,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
0_input_unit.stem.Dropout_0,-,"[16, 1024, 14, 14]",,
1_input_unit.stem.Conv2d_1,"[1024, 256, 3, 3]","[16, 256, 14, 14]",2359552.0,462422016.0
2_input_unit.stem.ELU_2,-,"[16, 256, 14, 14]",,
3_input_unit.stem.Dropout_3,-,"[16, 256, 14, 14]",,
4_input_unit.stem.Conv2d_4,"[256, 256, 3, 3]","[16, 256, 14, 14]",590080.0,115605504.0
5_input_unit.stem.ELU_5,-,"[16, 256, 14, 14]",,
6_input_unit.Embedding_encoder_embed,"[256, 90]","[16, 35, 256]",23040.0,23040.0
7_input_unit.Dropout_embedding_dropout,-,"[16, 35, 256]",,
8_input_unit.LSTM_encoder,-,"[323, 256]",395264.0,393216.0
9_input_unit.Dropout_question_dropout,-,"[16, 256]",,


In [1]:
import h5py

In [2]:
split = 'val'
with h5py.File(f'/Users/sebamenabar/Documents/datasets/CLEVR/CLEVR_v1.0/data/{split}_features.hdf5', 'w') as f:
    f['data'] = f['features']
    del f['features']
    # print(f['features'].move)

OSError: Unable to create file (unable to lock file, errno = 35, error message = 'Resource temporarily unavailable')

In [191]:
with h5py.File(f'/Users/sebamenabar/Documents/datasets/CLEVR/CLEVR_v1.0/data/test_features.hdf5', 'r') as f:
    print(len(f['data']))
    print((f['data'][-1]))    

15000
[[[0.         0.05087278 0.         ... 0.         0.         0.        ]
  [0.01763823 0.5509663  0.02942866 ... 0.12289741 0.40298975 0.5492783 ]
  [0.         0.03658335 0.         ... 0.         0.         0.05156684]
  ...
  [0.         0.33695713 0.         ... 0.         0.         0.13060549]
  [0.         0.19138068 0.         ... 0.         0.         0.        ]
  [0.         0.26273248 0.         ... 0.22389077 0.         0.18434143]]

 [[0.13486437 0.02146848 0.         ... 0.         0.0232568  0.17847401]
  [0.33560872 0.47945076 0.13680963 ... 0.058296   0.33762506 0.30320403]
  [0.13650647 0.03535173 0.         ... 0.         0.07748105 0.02331108]
  ...
  [0.08189411 0.18688494 0.00785208 ... 0.         0.03150245 0.11974703]
  [0.0954212  0.14652827 0.         ... 0.         0.05891643 0.14895979]
  [0.21022311 0.38624477 0.15710187 ... 0.26107907 0.20620145 0.1328    ]]

 [[0.         0.         0.         ... 0.         0.         0.        ]
  [0.         0.