forked from SKTBrain/BAN-KVQA
-
Notifications
You must be signed in to change notification settings - Fork 0
/
attention.py
39 lines (31 loc) · 1.19 KB
/
attention.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
"""
Bilinear Attention Networks
Jin-Hwa Kim, Jaehyun Jun, Byoung-Tak Zhang
https://arxiv.org/abs/1805.07932
This code is written by Jin-Hwa Kim.
"""
import torch.nn as nn
from torch.nn.utils.weight_norm import weight_norm
from bc import BCNet
class BiAttention(nn.Module):
def __init__(self, x_dim, y_dim, z_dim, glimpse, dropout=[.2,.5]):
super(BiAttention, self).__init__()
self.glimpse = glimpse
self.logits = weight_norm(BCNet(x_dim, y_dim, z_dim, glimpse, dropout=dropout, k=3), \
name='h_mat', dim=None)
def forward(self, v, q, v_mask=True):
"""
v: [batch, k, vdim]
q: [batch, qdim]
"""
p, logits = self.forward_all(v, q, v_mask)
return p, logits
def forward_all(self, v, q, v_mask=True):
v_num = v.size(1)
q_num = q.size(1)
logits = self.logits(v,q) # b x g x v x q
if v_mask:
mask = (0 == v.abs().sum(2)).unsqueeze(1).unsqueeze(3).expand(logits.size())
logits.data.masked_fill_(mask.data, -float('inf'))
p = nn.functional.softmax(logits.view(-1, self.glimpse, v_num * q_num), 2)
return p.view(-1, self.glimpse, v_num, q_num), logits