In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import sys

sys.path.insert(0, 'code/')

In [3]:
import h5py
import numpy as np

import torch
import torch
from torch import nn
from torch.utils.data import DataLoader
import torchvision
from torchsummaryX import summary

from tqdm import tqdm_notebook as tqdm

from mac import MACNetwork
from utils import load_vocab
from datasets import ClevrDataset, collate_fn as whole_collate_fn, QOnlyDataset, qonly_collate_fn

In [30]:
cfg

{'GPU_ID': '-1',
 'CUDA': False,
 'WORKERS': 4,
 'TRAIN': {'FLAG': True,
  'LEARNING_RATE': 0.0001,
  'BATCH_SIZE': 64,
  'MAX_EPOCHS': 25,
  'SNAPSHOT_INTERVAL': 5,
  'WEIGHT_INIT': 'xavier_uniform',
  'CLIP_GRADS': True,
  'CLIP': 8,
  'MAX_STEPS': 4,
  'EALRY_STOPPING': True,
  'PATIENCE': 5,
  'VAR_DROPOUT': False},
 'DATASET': {'DATA_DIR': '/Users/sebamenabar/Documents/datasets/CLEVR/data'}}

In [4]:
from config import cfg_from_file, __C, cfg

cfg_from_file('cfg/local.yml')
__C.CUDA = False
__C.GPU_ID = '-1'
vocab = load_vocab(cfg)
# cfg.TRAIN.RECV_OBJECTS = False

  yaml_cfg = edict(yaml.load(f))


In [6]:
model = MACNetwork(cfg=cfg, max_step=4, vocab=vocab)
model.load_state_dict(torch.load('/Users/sebamenabar/Documents/vanilla_mac.pth', map_location='cpu')['model'])
# model(b['image'], b['question'], b['question_length'])

IncompatibleKeys(missing_keys=[], unexpected_keys=[])

In [7]:
ds = QOnlyDataset(
    data_dir='/Users/sebamenabar/Documents/datasets/CLEVR/data',
    # img_dir='/Users/sebamenabar/Documents/datasets/CLEVR/CLEVR_v1.0/images/',
    # scenes_json='/Users/sebamenabar/Documents/TAIA/individual/sm/data/clevr/train/scenes.json',
    # raw_image=True,
    split='train',
)

In [8]:
def forward_q(model, question, question_len):
    embed = model.input_unit.encoder_embed(question)
    embed = model.input_unit.embedding_dropout(embed)
    embed = nn.utils.rnn.pack_padded_sequence(embed, question_len, batch_first=True)

    contextual_words, (question_embedding, _) = model.input_unit.encoder(embed)
    if model.input_unit.bidirectional:
        question_embedding = torch.cat([question_embedding[0], question_embedding[1]], -1)
    question_embedding = model.input_unit.question_dropout(question_embedding)

    contextual_words, _ = nn.utils.rnn.pad_packed_sequence(contextual_words, batch_first=True)
    
    return question_embedding, contextual_words

def idxs_to_q(questions, vocab):
    return [
        ' '.join([
            vocab[idx.item()] for idx in question if idx.item() !=0
        ]) for question in questions
    ]

In [9]:
def get_cw_attention(model, question, context):
    ret = []
    _question = model.mac.control.control_input(question)
    
    for step in range(model.mac.max_step):
        control = model.mac.control.control_input_u[step](_question)
        control = torch.unsqueeze(control, 1)
        interactions = control * context

        logits = model.mac.control.attn(interactions)

        attn = torch.softmax(logits, 1)
        
        ret.append(attn)
        
    return ret

In [11]:
loader = DataLoader(dataset=ds, batch_size=32, shuffle=False,
                                       num_workers=2, drop_last=False, collate_fn=qonly_collate_fn)

In [12]:
context_words = []
question_embeddings = []
question_lengths = []

model.eval()
with torch.no_grad():
    for i, b in tqdm(enumerate(loader), total=len(loader)):
        qembs, cws = forward_q(model, b['question'], b['question_length'])
        
        context_words.append(cws)
        question_embeddings.append(qembs)
        question_lengths.append(b['question_length'])
        
        if i >= 1000:
            break

HBox(children=(IntProgress(value=0, max=21875), HTML(value='')))

In [13]:
len(context_words), len(question_embeddings)

(1001, 1001)

In [14]:
attentions = []

model.eval()
with torch.no_grad():
    for qembs, cws in tqdm(zip(question_embeddings, context_words), total=len(question_embeddings)):
        attentions.append(get_cw_attention(model, qembs, cws))

HBox(children=(IntProgress(value=0, max=1001), HTML(value='')))

In [15]:
_context_words = []
for cws in context_words:
    _context_words += [t.squeeze(0) for t in cws.split(1)]

In [16]:
_question_embeddings = []
for qembs in question_embeddings:
    _question_embeddings += [t.squeeze(0) for t in qembs.split(1)]

In [17]:
_attentions = []
for attn in attentions:
    for attn1, attn2, attn3, attn4 in zip(*attn):
        _attentions.append(torch.stack((attn1, attn2, attn3, attn4)).squeeze(2))
        # print(torch.stack((attn1, attn2, attn3, attn4)).size())
        # break
    # break

In [18]:
print(len(_context_words), len(_question_embeddings), len(_attentions))
print(_context_words[0].size(), _question_embeddings[0].size(), _attentions[0].size())

32032 32032 32032
torch.Size([30, 512]) torch.Size([512]) torch.Size([4, 30])


In [19]:
for i, (cw, qemb, attn) in enumerate(zip(_context_words, _question_embeddings, _attentions)):
    non_pad = cw.sum(dim=1) != 0
    _context_words[i] = _context_words[i][non_pad]
    _attentions[i] = _attentions[i][:, non_pad]

    # print(non_pad)

In [20]:
print(len(_context_words), len(_question_embeddings), len(_attentions))
print(_context_words[1].size(), _question_embeddings[1].size(), _attentions[1].size())

32032 32032 32032
torch.Size([29, 512]) torch.Size([512]) torch.Size([4, 29])


In [21]:
ds = list(zip(_context_words, _attentions))

In [22]:
def custom_pad(sequences, batch_first=True, padding_value=0):
    max_size = sequences[0].size()
    trailing_dims = max_size[1:]
    max_len = max([s.size(0) for s in sequences])
    
    if batch_first:
        out_dims = (len(sequences), max_len) + trailing_dims
    else:
        out_dims = (max_len, len(sequences)) + trailing_dims

    out_tensor = sequences[0].data.new(*out_dims) # .fill_(padding_value)
    for i, tensor in enumerate(sequences):
        length = tensor.size(0)
        # use index notation to prevent duplicate references to the tensor
        if batch_first:
            out_tensor[i, :length, ...] = tensor
            if length < max_len:
                out_tensor[i, length:, ...] = (1 - tensor.sum(dim=0)) / (max_len - length)
            else:
                out_tensor[i] = torch.softmax(out_tensor[i], dim=0)
        else:
            out_tensor[:length, i, ...] = tensor
            if length < max_len:
                out_tensor[length:, i, ...] = (1 - tensor.sum(dim=0)) / (max_len - length)
            else:
                out_tensor[i] = torch.softmax(out_tensor[i], dim=0)

    return out_tensor

In [23]:
def collate_fn(batch):
    cws = torch.nn.utils.rnn.pad_sequence([b[0] for b in batch], batch_first=True)
    attns = custom_pad([b[1].t() for b in batch], batch_first=True)

    return cws, attns

In [24]:
train_size = int(len(ds) * 0.8)
print('Train size:', train_size, 'Val size:', len(ds) - train_size)

train_loader = DataLoader(ds[:train_size], batch_size=32, shuffle=True, collate_fn=collate_fn)
val_loader = DataLoader(ds[train_size:], batch_size=32, shuffle=True, collate_fn=collate_fn)

len(train_loader) * 32 , len(val_loader) * 32

Train size: 25625 Val size: 6407


(25632, 6432)

In [25]:
def step(model, optimizer, loader, train=True, bar=None, step=0):
    total_loss = 0.
    total_samples = 0
    total_ndcg = 0.

    model.train(train)
    
    loss_fn = nn.KLDivLoss(reduction='batchmean')
    
    
    with torch.set_grad_enabled(train):
        # for b in loader:
        lbar = tqdm(loader, total=len(loader))
        for b in lbar:
        
            ret = model(b[0]).squeeze(2)
            
            
            loss = loss_fn(torch.log_softmax(ret, dim=1), b[1][:,:,step])

            if train:
                loss.backward()
                optimizer.step()
                model.zero_grad()

            # pbar.set_postfix(loss=loss.item())

            total_loss += loss.item()
            total_samples += b[0].size(0)
    
            ndcg = calc_ndcg(ret.detach().cpu().numpy(), b[1][:,:,step], k=5)
            total_ndcg += ndcg * b[0].size(0)
    
            if train:
                lbar.set_postfix(train_loss=total_loss / total_samples, train_ndcg=total_ndcg / total_samples)
            else:
                lbar.set_postfix(val_loss=total_loss / total_samples, val_ndcg=total_ndcg / total_samples)
    
    return total_loss / total_samples, total_ndcg / total_samples

In [28]:
models = {}
epochs = 2

for s in range(4):
    pred_model = nn.Sequential(
        nn.Linear(512, 128),
        nn.ReLU(),
        nn.Linear(128, 1),
    )
    models[s] = pred_model
    
    optimizer = torch.optim.Adam(pred_model.parameters(), lr=0.01, weight_decay=1e-6)
    loss_fn = nn.KLDivLoss(reduction='batchmean')
    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'min', factor=0.5, patience=5, verbose=True)

    print(f'------ STEP {s} ------')
    ebar = tqdm(range(epochs))
    for epoch in ebar:
        train_loss, train_ndcg = step(pred_model, optimizer, train_loader, train=True, step=s)
        val_loss, val_ndcg = step(pred_model, optimizer, val_loader, train=False, step=s)

        ebar.set_postfix(train_loss=train_loss, val_loss=val_loss, train_ndcg=train_ndcg, val_ndcg=val_ndcg)
        scheduler.step(val_loss)
        if epoch % 10 == 0:
            print('Epoch', epoch, 'Train loss:', train_loss, 'Val loss:', val_loss)

------ STEP 0 ------


HBox(children=(IntProgress(value=0, max=2), HTML(value='')))

HBox(children=(IntProgress(value=0, max=801), HTML(value='')))

HBox(children=(IntProgress(value=0, max=201), HTML(value='')))

Epoch 0 Train loss: 0.004443228411383745 Val loss: 0.0037066052662418285


HBox(children=(IntProgress(value=0, max=801), HTML(value='')))

HBox(children=(IntProgress(value=0, max=201), HTML(value='')))

------ STEP 1 ------


HBox(children=(IntProgress(value=0, max=2), HTML(value='')))

HBox(children=(IntProgress(value=0, max=801), HTML(value='')))

HBox(children=(IntProgress(value=0, max=201), HTML(value='')))

Epoch 0 Train loss: 0.006734322998581863 Val loss: 0.0047644713304242645


HBox(children=(IntProgress(value=0, max=801), HTML(value='')))

HBox(children=(IntProgress(value=0, max=201), HTML(value='')))

------ STEP 2 ------


HBox(children=(IntProgress(value=0, max=2), HTML(value='')))

HBox(children=(IntProgress(value=0, max=801), HTML(value='')))

HBox(children=(IntProgress(value=0, max=201), HTML(value='')))

Epoch 0 Train loss: 0.004785724133689229 Val loss: 0.0032633835490806363


HBox(children=(IntProgress(value=0, max=801), HTML(value='')))

HBox(children=(IntProgress(value=0, max=201), HTML(value='')))

------ STEP 3 ------


HBox(children=(IntProgress(value=0, max=2), HTML(value='')))

HBox(children=(IntProgress(value=0, max=801), HTML(value='')))

HBox(children=(IntProgress(value=0, max=201), HTML(value='')))

Epoch 0 Train loss: 0.004928564643569109 Val loss: 0.0037830763212860848


HBox(children=(IntProgress(value=0, max=801), HTML(value='')))

HBox(children=(IntProgress(value=0, max=201), HTML(value='')))

In [29]:
torch.save({key: m.state_dict() for key, m in models.items()}, '/Users/sebamenabar/Documents/attn_preds.pth')

In [27]:
"""Information Retrieval metrics
Useful Resources:
http://www.cs.utexas.edu/~mooney/ir-course/slides/Evaluation.ppt
http://www.nii.ac.jp/TechReports/05-014E.pdf
http://www.stanford.edu/class/cs276/handouts/EvaluationNew-handout-6-per.pdf
http://hal.archives-ouvertes.fr/docs/00/72/67/60/PDF/07-busa-fekete.pdf
Learning to Rank for Information Retrieval (Tie-Yan Liu)
"""
import numpy as np


def mean_reciprocal_rank(rs):
    """Score is reciprocal of the rank of the first relevant item
    First element is 'rank 1'.  Relevance is binary (nonzero is relevant).
    Example from http://en.wikipedia.org/wiki/Mean_reciprocal_rank
    >>> rs = [[0, 0, 1], [0, 1, 0], [1, 0, 0]]
    >>> mean_reciprocal_rank(rs)
    0.61111111111111105
    >>> rs = np.array([[0, 0, 0], [0, 1, 0], [1, 0, 0]])
    >>> mean_reciprocal_rank(rs)
    0.5
    >>> rs = [[0, 0, 0, 1], [1, 0, 0], [1, 0, 0]]
    >>> mean_reciprocal_rank(rs)
    0.75
    Args:
        rs: Iterator of relevance scores (list or numpy) in rank order
            (first element is the first item)
    Returns:
        Mean reciprocal rank
    """
    rs = (np.asarray(r).nonzero()[0] for r in rs)
    return np.mean([1. / (r[0] + 1) if r.size else 0. for r in rs])


def r_precision(r):
    """Score is precision after all relevant documents have been retrieved
    Relevance is binary (nonzero is relevant).
    >>> r = [0, 0, 1]
    >>> r_precision(r)
    0.33333333333333331
    >>> r = [0, 1, 0]
    >>> r_precision(r)
    0.5
    >>> r = [1, 0, 0]
    >>> r_precision(r)
    1.0
    Args:
        r: Relevance scores (list or numpy) in rank order
            (first element is the first item)
    Returns:
        R Precision
    """
    r = np.asarray(r) != 0
    z = r.nonzero()[0]
    if not z.size:
        return 0.
    return np.mean(r[:z[-1] + 1])


def precision_at_k(r, k):
    """Score is precision @ k
    Relevance is binary (nonzero is relevant).
    >>> r = [0, 0, 1]
    >>> precision_at_k(r, 1)
    0.0
    >>> precision_at_k(r, 2)
    0.0
    >>> precision_at_k(r, 3)
    0.33333333333333331
    >>> precision_at_k(r, 4)
    Traceback (most recent call last):
        File "<stdin>", line 1, in ?
    ValueError: Relevance score length < k
    Args:
        r: Relevance scores (list or numpy) in rank order
            (first element is the first item)
    Returns:
        Precision @ k
    Raises:
        ValueError: len(r) must be >= k
    """
    assert k >= 1
    r = np.asarray(r)[:k] != 0
    if r.size != k:
        raise ValueError('Relevance score length < k')
    return np.mean(r)


def average_precision(r):
    """Score is average precision (area under PR curve)
    Relevance is binary (nonzero is relevant).
    >>> r = [1, 1, 0, 1, 0, 1, 0, 0, 0, 1]
    >>> delta_r = 1. / sum(r)
    >>> sum([sum(r[:x + 1]) / (x + 1.) * delta_r for x, y in enumerate(r) if y])
    0.7833333333333333
    >>> average_precision(r)
    0.78333333333333333
    Args:
        r: Relevance scores (list or numpy) in rank order
            (first element is the first item)
    Returns:
        Average precision
    """
    r = np.asarray(r) != 0
    out = [precision_at_k(r, k + 1) for k in range(r.size) if r[k]]
    if not out:
        return 0.
    return np.mean(out)


def mean_average_precision(rs):
    """Score is mean average precision
    Relevance is binary (nonzero is relevant).
    >>> rs = [[1, 1, 0, 1, 0, 1, 0, 0, 0, 1]]
    >>> mean_average_precision(rs)
    0.78333333333333333
    >>> rs = [[1, 1, 0, 1, 0, 1, 0, 0, 0, 1], [0]]
    >>> mean_average_precision(rs)
    0.39166666666666666
    Args:
        rs: Iterator of relevance scores (list or numpy) in rank order
            (first element is the first item)
    Returns:
        Mean average precision
    """
    return np.mean([average_precision(r) for r in rs])


def dcg_at_k(r, k, method=0):
    """Score is discounted cumulative gain (dcg)
    Relevance is positive real values.  Can use binary
    as the previous methods.
    Example from
    http://www.stanford.edu/class/cs276/handouts/EvaluationNew-handout-6-per.pdf
    >>> r = [3, 2, 3, 0, 0, 1, 2, 2, 3, 0]
    >>> dcg_at_k(r, 1)
    3.0
    >>> dcg_at_k(r, 1, method=1)
    3.0
    >>> dcg_at_k(r, 2)
    5.0
    >>> dcg_at_k(r, 2, method=1)
    4.2618595071429155
    >>> dcg_at_k(r, 10)
    9.6051177391888114
    >>> dcg_at_k(r, 11)
    9.6051177391888114
    Args:
        r: Relevance scores (list or numpy) in rank order
            (first element is the first item)
        k: Number of results to consider
        method: If 0 then weights are [1.0, 1.0, 0.6309, 0.5, 0.4307, ...]
                If 1 then weights are [1.0, 0.6309, 0.5, 0.4307, ...]
    Returns:
        Discounted cumulative gain
    """
    r = np.asfarray(r)[:k]
    if r.size:
        if method == 0:
            return r[0] + np.sum(r[1:] / np.log2(np.arange(2, r.size + 1)))
        elif method == 1:
            return np.sum(r / np.log2(np.arange(2, r.size + 2)))
        else:
            raise ValueError('method must be 0 or 1.')
    return 0.


def ndcg_at_k(r, k, method=0):
    """Score is normalized discounted cumulative gain (ndcg)
    Relevance is positive real values.  Can use binary
    as the previous methods.
    Example from
    http://www.stanford.edu/class/cs276/handouts/EvaluationNew-handout-6-per.pdf
    >>> r = [3, 2, 3, 0, 0, 1, 2, 2, 3, 0]
    >>> ndcg_at_k(r, 1)
    1.0
    >>> r = [2, 1, 2, 0]
    >>> ndcg_at_k(r, 4)
    0.9203032077642922
    >>> ndcg_at_k(r, 4, method=1)
    0.96519546960144276
    >>> ndcg_at_k([0], 1)
    0.0
    >>> ndcg_at_k([1], 2)
    1.0
    Args:
        r: Relevance scores (list or numpy) in rank order
            (first element is the first item)
        k: Number of results to consider
        method: If 0 then weights are [1.0, 1.0, 0.6309, 0.5, 0.4307, ...]
                If 1 then weights are [1.0, 0.6309, 0.5, 0.4307, ...]
    Returns:
        Normalized discounted cumulative gain
    """
    dcg_max = dcg_at_k(sorted(r, reverse=True), k, method)
    if not dcg_max:
        return 0.
    return dcg_at_k(r, k, method) / dcg_max

def calc_ndcg(pred, gt, k=5, method=1):
    # pred nparray WITH softmax
    count = 0
    total = 0.
    
    sorted_pred = (-pred).argsort(axis=1)
    for i, sample in enumerate(sorted_pred):
        curr = ndcg_at_k(gt[i, sample], k=k, method=method)
        total += curr
        count += 1
        
        # print(curr, gt[i, sample])
        
    return total / count

In [486]:
ndcg_at_k([0.05] * 20, 5)

1.0

In [374]:
ndcg_at_k([0.5, 0.3, 0.2, 0.1], 1)

1.0

In [477]:
ndcg_at_k([0.4, 0.5, 0.05, 0.05], 1)

0.8

In [481]:
ndcg_at_k([0.25, 0.25, 0.25, 0.25], k=4)

1.0

In [480]:
ndcg_at_k([0.1, 0.05, 0.05, 0.8], k=4)

0.6079646887731703

In [379]:
pred = np.array([0.3, 0.1, 0.5])
gt = np.array([0.1, 0.2, 0.7])

In [382]:
gt[np.argsort(-pred, )]

array([0.7, 0.1, 0.2])

In [383]:
ndcg_at_k(gt[np.argsort(-pred, )], k=3)

0.9616786482850547

In [None]:
b = next(iter(val_loader))

In [45]:
pred = pred_model(b[0]).squeeze(2)

In [54]:
(-pred).argsort()

tensor([[ 9, 13, 26,  ..., 12,  4, 11],
        [11, 12, 10,  ...,  3,  7,  8],
        [ 9, 14, 26,  ...,  7, 12,  6],
        ...,
        [ 0, 20, 26,  ..., 18,  6,  9],
        [ 8,  9,  2,  ...,  4,  5,  6],
        [10, 14, 25,  ..., 12,  5,  6]])

In [57]:
(-b[1][:,:,0]).argsort()

tensor([[ 9, 13, 26,  ...,  4, 11, 12],
        [11, 12, 19,  ...,  3,  9,  8],
        [ 9, 14, 26,  ...,  7, 11, 12],
        ...,
        [ 0, 20, 11,  ..., 18, 17,  9],
        [ 8,  2,  9,  ...,  5,  3,  6],
        [10, 14,  3,  ..., 12, 13,  6]])

In [431]:
b[1][:,:,0].size()

torch.Size([32, 38])

In [58]:
(-pred).argsort() == (-b[1][:,:,0]).argsort()

tensor([[1, 1, 1,  ..., 0, 0, 0],
        [1, 1, 0,  ..., 1, 0, 1],
        [1, 1, 1,  ..., 1, 0, 0],
        ...,
        [1, 1, 0,  ..., 1, 0, 1],
        [1, 0, 0,  ..., 0, 0, 1],
        [1, 1, 0,  ..., 1, 0, 1]], dtype=torch.uint8)

In [444]:
(-pred.softmax(dim=1).cpu().detach().numpy()).argsort(axis=1)[0]

array([22, 24, 21, 23, 25, 10, 20, 26, 11,  0, 13, 34, 14,  9,  1, 35, 36,
       37, 27,  2, 33, 15,  3,  5, 12,  8,  4,  6,  7, 31, 32, 19, 16, 28,
       18, 30, 17, 29])

In [457]:
b[1][:,:,0][:].size()

torch.Size([32, 38])

In [462]:
(-pred.softmax(dim=1).detach()).argsort()[0]

tensor([22, 24, 21, 23, 25, 10, 20, 26, 11,  0, 13, 34, 14,  9,  1, 35, 36, 37,
        27,  2, 33, 15,  3,  5, 12,  8,  4,  6,  7, 31, 32, 19, 16, 28, 18, 30,
        17, 29])

In [460]:
b[1][:,:,0].numpy().shape

(32, 38)

In [463]:
(-pred.softmax(dim=1).detach()).argsort()

tensor([[22, 24, 21,  ..., 30, 17, 29],
        [ 7,  9,  8,  ..., 14,  5,  4],
        [27, 28, 26,  ..., 11,  9, 13],
        ...,
        [ 4, 18,  2,  ..., 16, 12, 17],
        [12, 16, 13,  ...,  5,  4,  2],
        [11, 15, 12,  ...,  5,  2,  3]])

In [476]:
ndcg_at_k([0.1, 0.1, 0.1, 0.1, 0.6], k=5, method=1)

0.6142980774040249

In [474]:
calc_ndcg(pred.softmax(dim=1).detach().cpu().numpy(), b[1][:,:,0])

0.9976750697978618 tensor([3.3479e-01, 2.4326e-01, 9.4054e-02, 1.1488e-01, 8.9872e-02, 1.8376e-02,
        1.3426e-02, 2.2863e-02, 1.6301e-02, 9.2898e-03, 3.9664e-03, 5.7711e-03,
        1.5857e-03, 3.5235e-03, 3.4244e-03, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        6.7526e-03, 2.7234e-03, 2.2329e-03, 6.7731e-04, 7.0492e-04, 4.9589e-04,
        3.0534e-03, 8.4697e-04, 1.0258e-03, 3.2900e-04, 4.6015e-04, 3.9638e-04,
        5.3188e-04, 6.8161e-04, 2.8981e-04, 1.1153e-03, 9.6075e-04, 4.8343e-04,
        1.9040e-04, 6.6829e-04])
0.9912203350330319 tensor([0.2669, 0.1131, 0.0716, 0.1152, 0.0893, 0.0378, 0.0420, 0.0033, 0.0377,
        0.0620, 0.0257, 0.0166, 0.0093, 0.0059, 0.0029, 0.0079, 0.0050, 0.0113,
        0.0029, 0.0019, 0.0021, 0.0010, 0.0060, 0.0035, 0.0102, 0.0042, 0.0061,
        0.0009, 0.0067, 0.0035, 0.0035, 0.0035, 0.0035, 0.0007, 0.0062, 0.0041,
        0.0030, 0.0030])
0.9993238875533926 tensor([5.5919e-01, 2.6426e-01, 7.5054e-02, 8.2902e-02, 1.2917e-02, 5.2201e-04,
     

0.983520252040605

In [464]:
b[1][:,:,0].numpy()[(-pred.softmax(dim=1).detach()).argsort().numpy()]

IndexError: index 34 is out of bounds for axis 0 with size 32

In [447]:
b[1][:,:,0]

tensor([[9.2898e-03, 3.4244e-03, 2.7234e-03,  ..., 0.0000e+00, 0.0000e+00,
         0.0000e+00],
        [3.7698e-02, 1.1295e-02, 1.0159e-02,  ..., 3.5411e-03, 3.5411e-03,
         3.5411e-03],
        [4.9808e-05, 4.9266e-05, 2.0854e-05,  ..., 2.1191e-04, 2.1191e-04,
         2.1191e-04],
        ...,
        [3.2635e-02, 2.9457e-02, 4.0475e-02,  ..., 3.3415e-02, 3.3415e-02,
         3.3415e-02],
        [1.0017e-02, 2.7052e-03, 1.3543e-03,  ..., 1.9447e-02, 1.9447e-02,
         1.9447e-02],
        [1.0312e-02, 3.4165e-03, 2.1006e-03,  ..., 2.4301e-02, 2.4301e-02,
         2.4301e-02]])

In [445]:
b[1][:,:,0][(-pred.softmax(dim=1).cpu().detach().numpy()).argsort(axis=1)]

IndexError: index 34 is out of bounds for dimension 0 with size 32

In [432]:
b[1][:,:,0][(-pred.softmax(dim=1).cpu().detach().numpy()).argsort(axis=1)]

IndexError: index 34 is out of bounds for dimension 0 with size 32

In [62]:
cfg

{'GPU_ID': '-1',
 'CUDA': False,
 'WORKERS': 4,
 'TRAIN': {'FLAG': True,
  'LEARNING_RATE': 0.0001,
  'BATCH_SIZE': 64,
  'MAX_EPOCHS': 25,
  'SNAPSHOT_INTERVAL': 5,
  'WEIGHT_INIT': 'xavier_uniform',
  'CLIP_GRADS': True,
  'CLIP': 8,
  'MAX_STEPS': 4,
  'EALRY_STOPPING': True,
  'PATIENCE': 5,
  'VAR_DROPOUT': False},
 'DATASET': {'DATA_DIR': '/Users/sebamenabar/Documents/datasets/CLEVR/data'}}

In [70]:
cfg.ATTNS_PATH = '/Users/sebamenabar/Documents/attn_preds.pth'
model = MACNetwork(cfg=cfg, max_step=4, vocab=vocab)
model.load_state_dict(torch.load('/Users/sebamenabar/Documents/vanilla_mac.pth', map_location='cpu')['model'], strict=False)
# model(b['image'], b['question'], b['question_length'])

[(0, Sequential(
  (0): Linear(in_features=512, out_features=128, bias=True)
  (1): ReLU()
  (2): Linear(in_features=128, out_features=1, bias=True)
)), (1, Sequential(
  (0): Linear(in_features=512, out_features=128, bias=True)
  (1): ReLU()
  (2): Linear(in_features=128, out_features=1, bias=True)
)), (2, Sequential(
  (0): Linear(in_features=512, out_features=128, bias=True)
  (1): ReLU()
  (2): Linear(in_features=128, out_features=1, bias=True)
)), (3, Sequential(
  (0): Linear(in_features=512, out_features=128, bias=True)
  (1): ReLU()
  (2): Linear(in_features=128, out_features=1, bias=True)
))]
uno
uno
uno
uno


IncompatibleKeys(missing_keys=['mac.control.attns.0.0.weight', 'mac.control.attns.0.0.bias', 'mac.control.attns.0.2.weight', 'mac.control.attns.0.2.bias', 'mac.control.attns.1.0.weight', 'mac.control.attns.1.0.bias', 'mac.control.attns.1.2.weight', 'mac.control.attns.1.2.bias', 'mac.control.attns.2.0.weight', 'mac.control.attns.2.0.bias', 'mac.control.attns.2.2.weight', 'mac.control.attns.2.2.bias', 'mac.control.attns.3.0.weight', 'mac.control.attns.3.0.bias', 'mac.control.attns.3.2.weight', 'mac.control.attns.3.2.bias'], unexpected_keys=['mac.control.concept_memory.categories', 'mac.control.concept_memory.attributes', 'mac.control.concept_memory.values'])

In [72]:
whole_ds = ClevrDataset(
    data_dir='/Users/sebamenabar/Documents/datasets/CLEVR/data',
    # img_dir='/Users/sebamenabar/Documents/datasets/CLEVR/CLEVR_v1.0/images/',
    # scenes_json='/Users/sebamenabar/Documents/TAIA/individual/sm/data/clevr/train/scenes.json',
    # raw_image=True,
    split='val',
)

In [90]:
whole_loader = DataLoader(whole_ds, batch_size=32, shuffle=False, collate_fn=whole_collate_fn)

In [88]:
b = next(iter(whole_loader))

In [92]:
model.eval()
with torch.no_grad():
    pbar = tqdm(whole_loader, total=len(whole_loader))
    total_accuracy = 0.
    total_samples = 0
    for data in pbar:

        image, question, question_len, answer = data['image'], data['question'], data['question_length'], data['answer']
        answer = answer.long()

        with torch.no_grad():
            scores = model(image, question, question_len)
            # scores_ema = self.model_ema(image, question, question_len)

        # correct_ema = scores_ema.detach().argmax(1) == answer
        # accuracy_ema = correct_ema.sum().cpu().numpy() / answer.shape[0]
        # all_accuracies_ema.append(accuracy_ema)

        correct = scores.detach().argmax(1) == answer
        accuracy = correct.sum().cpu().numpy() / answer.shape[0]
        # all_accuracies.append(accuracy)
        total_accuracy += (accuracy * image.size(0))
        total_samples += image.size(0)
        
        pbar.set_postfix(avg_accuracy=total_accuracy / total_samples)

        # accuracy_ema = sum(all_accuracies_ema) / float(len(all_accuracies_ema))
# accuracy = sum(all_accuracies) / float(len(all_accuracies))

# accuracy

HBox(children=(IntProgress(value=0, max=4688), HTML(value='')))

KeyboardInterrupt: 