In [1]:
import torch 
import torch.nn as nn
import torch.nn.functional as F

In [2]:
#self-attention 
class SA(torch.nn.Module):
    ''' Self Attention 自注意力 / Scaled Dot-Product Attention 缩放点积注意力

        Vaswani A , Shazeer N , Parmar N , et al. Attention Is All You Need[J]. NIST, 2017.
    '''
    def __init__(self, ndim, kdim, vdim):
        '''
        ndim(int): input dimension
        kdim(int): K dimension == Q dimension
        vdim(int): V dimension
        '''
        super().__init__()

        self.q1x1 = torch.nn.Conv1d(in_channels=ndim, out_channels=kdim, kernel_size=1)
        self.k1x1 = torch.nn.Conv1d(in_channels=ndim, out_channels=kdim, kernel_size=1)
        self.v1x1 = torch.nn.Conv1d(in_channels=ndim, out_channels=vdim, kernel_size=1)

    def forward(self, x):
        '''
        x(float): shape [B, ndim, T]
        '''
        q = self.q1x1(x) # [B, qdim, Tq=T]
        k = self.k1x1(x) # [B, kdim, Tk=T]
        v = self.v1x1(x) # [B, vdim, Tv=T]
        o = torch.matmul(k.permute(0,2,1), q).softmax(1) # [B, Tk, Tq]
        o = torch.matmul(v, o)  # [B, kdim, Tq], kernel_size = 1所以Time是不变的,

        return o


In [3]:
x = torch.rand(10,39,110)

In [4]:
x.shape

torch.Size([10, 39, 110])

In [5]:
q = torch.nn.Conv1d(in_channels = 39, out_channels = 512, kernel_size = 1)
k = torch.nn.Conv1d(in_channels = 39, out_channels = 512, kernel_size = 1)
v = torch.nn.Conv1d(in_channels = 39, out_channels = 256, kernel_size = 1)

In [6]:
q_matrix = q(x)
q_matrix.shape

torch.Size([10, 512, 110])

In [7]:
k_matrix = k(x)
k_matrix.shape

torch.Size([10, 512, 110])

In [8]:
v_matrix = v(x)
v_matrix.shape

torch.Size([10, 256, 110])

In [9]:
k_matrix_transpose = k_matrix.permute(0,2,1)

In [10]:
k_matrix_transpose.shape

torch.Size([10, 110, 512])

In [11]:
o = torch.matmul(k_matrix_transpose, q_matrix) # [B, Tk, Tq]

In [12]:
o.shape#两个维度都分别变成时间维度了

torch.Size([10, 110, 110])

In [13]:
o_softmax = o.softmax(1)#注意，1的话是按行扫描，所以是对一列的数据进行softmax

In [14]:
o_softmax.shape

torch.Size([10, 110, 110])

In [15]:
o = torch.matmul(v_matrix,o_softmax)#注意，第一个乘数是

In [16]:
o.shape

torch.Size([10, 256, 110])

In [17]:
import glob

In [18]:
glob.glob(r"../*.zip")

[]

In [19]:
x.shape

torch.Size([10, 39, 110])

In [20]:
conv1 = nn.Conv1d(39, 39,kernel_size = 1)

In [21]:
temp1 = conv1(x)

In [22]:
temp1.shape

torch.Size([10, 39, 110])

In [28]:
bias_e = torch.randn(1)

In [29]:
bias_e

tensor([0.3634])

In [25]:
temp2 = torch.ones(2,3,4)

In [26]:
temp2

tensor([[[1., 1., 1., 1.],
         [1., 1., 1., 1.],
         [1., 1., 1., 1.]],

        [[1., 1., 1., 1.],
         [1., 1., 1., 1.],
         [1., 1., 1., 1.]]])

In [27]:
temp2 + bias_e

tensor([[[1.9789, 1.9789, 1.9789, 1.9789],
         [1.9789, 1.9789, 1.9789, 1.9789],
         [1.9789, 1.9789, 1.9789, 1.9789]],

        [[1.9789, 1.9789, 1.9789, 1.9789],
         [1.9789, 1.9789, 1.9789, 1.9789],
         [1.9789, 1.9789, 1.9789, 1.9789]]])

In [30]:
temp2.shape

torch.Size([2, 3, 4])

In [35]:
key = torch.rand(2,3,4)

In [36]:
key

tensor([[[0.4441, 0.8244, 0.4262, 0.3668],
         [0.4122, 0.8291, 0.5976, 0.9598],
         [0.5176, 0.6342, 0.1710, 0.7016]],

        [[0.9586, 0.4913, 0.3767, 0.9273],
         [0.8638, 0.5397, 0.4672, 0.2715],
         [0.1455, 0.7241, 0.7961, 0.5724]]])

In [37]:
key + torch.

tensor([[[1.4441, 1.8244, 1.4262, 1.3668],
         [1.4122, 1.8291, 1.5976, 1.9598],
         [1.5176, 1.6342, 1.1710, 1.7016]],

        [[1.9586, 1.4913, 1.3767, 1.9273],
         [1.8638, 1.5397, 1.4672, 1.2715],
         [1.1455, 1.7241, 1.7961, 1.5724]]])

In [38]:
key.shape

torch.Size([2, 3, 4])

In [39]:
tanh_key = torch.tanh(key)

In [40]:
tanh_key.shape

torch.Size([2, 3, 4])

In [41]:
v = torch.rand(4)
alpha_t = torch.matmul(tanh_key, v)  # [B, kdim, Tq], kernel_size = 1所以Time是不变的,

In [42]:
alpha_t.shape

torch.Size([2, 3])

In [44]:
alpha_t

tensor([[0.9110, 0.9516, 0.8791],
        [1.0329, 0.9762, 0.7004]])

In [47]:
alpha_t_unseq = alpha_t.unsqueeze(2)

In [49]:
alpha_t_unseq

tensor([[[0.9110],
         [0.9516],
         [0.8791]],

        [[1.0329],
         [0.9762],
         [0.7004]]])

In [50]:
final_alpha_t = alpha_t_unseq.expand(-1,-1,4)

In [51]:
final_alpha_t

tensor([[[0.9110, 0.9110, 0.9110, 0.9110],
         [0.9516, 0.9516, 0.9516, 0.9516],
         [0.8791, 0.8791, 0.8791, 0.8791]],

        [[1.0329, 1.0329, 1.0329, 1.0329],
         [0.9762, 0.9762, 0.9762, 0.9762],
         [0.7004, 0.7004, 0.7004, 0.7004]]])

In [53]:
tanh_key

tensor([[[0.4170, 0.6774, 0.4021, 0.3511],
         [0.3904, 0.6800, 0.5353, 0.7442],
         [0.4758, 0.5610, 0.1694, 0.6054]],

        [[0.7436, 0.4552, 0.3599, 0.7293],
         [0.6982, 0.4927, 0.4359, 0.2650],
         [0.1445, 0.6195, 0.6619, 0.5171]]])

In [54]:
output = final_alpha_t * tanh_key

In [55]:
u = torch.sum(output,dim =1)

In [56]:
u

tensor([[1.1697, 1.7573, 1.0246, 1.5602],
        [1.5509, 1.3850, 1.2608, 1.3742]])

In [57]:
u.shape

torch.Size([2, 4])

In [84]:
u = torch.sum(output,dim =1)

In [60]:
key_2 = key*key 

In [61]:
key_2.shape

torch.Size([2, 3, 4])

In [63]:
sigma_left = key_2 * final_alpha_t

In [64]:
sigma_left.shape

torch.Size([2, 3, 4])

In [65]:
sigma_left_sum = torch.sum(sigma_left, dim = 1) 

In [66]:
sigma_left_sum.shape

torch.Size([2, 4])

In [67]:
sigma = torch.sqrt(sigma_left_sum - u*u)

In [68]:
sigma.shape

torch.Size([2, 4])

In [72]:
sigma

tensor([[nan, nan, nan, nan],
        [nan, nan, nan, nan]])

In [69]:
u

tensor([[1.1697, 1.7573, 1.0246, 1.5602],
        [1.5509, 1.3850, 1.2608, 1.3742]])

In [73]:
C_acnn = torch.cat([u,sigma],dim = 1)

In [74]:
C_acnn.shape

torch.Size([2, 8])

In [75]:
linear1 = nn.Linear(8, 3) 

In [76]:
beta = linear1(C_acnn)

In [77]:
beta

tensor([[nan, nan, nan],
        [nan, nan, nan]], grad_fn=<AddmmBackward>)

In [78]:
beta.shape

torch.Size([2, 3])

In [90]:
#感觉好像是手动的
in_cha = 3
out_cha = 3
ker_size = 1
# weight = nn.Parameter(torch.Tensor(
#                 in_channels, out_channels, kernel_size))

In [92]:
convs = nn.ModuleList()  # 创建多个一维卷积层
N = 3
for i in range(N):
    convs.append(nn.Conv1d(in_channels = in_cha, out_channels = out_cha, kernel_size = ker_size))

In [96]:
torch.cat([self.pool(F.relu(conv(embeddings))).squeeze(-1) for conv in convs], dim=1)

Conv1d(3, 3, kernel_size=(1,), stride=(1,))

In [97]:
my_beta = torch.tensor([[1,2,3],[4,5,6]])

In [98]:
my_beta.shape

torch.Size([2, 3])

In [99]:
my_beta

tensor([[1, 2, 3],
        [4, 5, 6]])

In [100]:
input_feature = torch.rand(2,3,4)

In [101]:
input_feature

tensor([[[0.4519, 0.6362, 0.3710, 0.5947],
         [0.5907, 0.8330, 0.1735, 0.0698],
         [0.2487, 0.0451, 0.1134, 0.7435]],

        [[0.3901, 0.2463, 0.8774, 0.1199],
         [0.9949, 0.4607, 0.1898, 0.0633],
         [0.5997, 0.0593, 0.3830, 0.8622]]])

In [104]:
#输入就是batch_size, channels, seq_len
conv1 = nn.Conv1d(3,5,kernel_size = 1)
output1 = conv1(input_feature) 

In [107]:
output1.shape

torch.Size([2, 5, 4])

In [108]:
conv1_output = torch.ones(2,3,4)

In [109]:
conv1_output

tensor([[[1., 1., 1., 1.],
         [1., 1., 1., 1.],
         [1., 1., 1., 1.]],

        [[1., 1., 1., 1.],
         [1., 1., 1., 1.],
         [1., 1., 1., 1.]]])

In [111]:
conv2_output = torch.full((2,3,4),2.0)

In [112]:
conv2_output

tensor([[[2., 2., 2., 2.],
         [2., 2., 2., 2.],
         [2., 2., 2., 2.]],

        [[2., 2., 2., 2.],
         [2., 2., 2., 2.],
         [2., 2., 2., 2.]]])

In [113]:
conv3_output = torch.full((2,3,4),3.0)
conv3_output

tensor([[[3., 3., 3., 3.],
         [3., 3., 3., 3.],
         [3., 3., 3., 3.]],

        [[3., 3., 3., 3.],
         [3., 3., 3., 3.],
         [3., 3., 3., 3.]]])

In [114]:
my_beta

tensor([[1, 2, 3],
        [4, 5, 6]])

In [115]:
my_beta_unsq = my_beta.unsqueeze(2)

In [116]:
my_beta_unsq

tensor([[[1],
         [2],
         [3]],

        [[4],
         [5],
         [6]]])

In [117]:
my_beta_unsq_expand = my_beta_unsq.expand(-1,-1, 4)

In [137]:
my_beta_unsq_expand

tensor([[[1, 1, 1, 1],
         [2, 2, 2, 2],
         [3, 3, 3, 3]],

        [[4, 4, 4, 4],
         [5, 5, 5, 5],
         [6, 6, 6, 6]]])

In [138]:
all_cnn_result = [conv1_output,conv2_output,conv3_output]

In [139]:
all_cnn_result

[tensor([[[1., 1., 1., 1.],
          [1., 1., 1., 1.],
          [1., 1., 1., 1.]],
 
         [[1., 1., 1., 1.],
          [1., 1., 1., 1.],
          [1., 1., 1., 1.]]]), tensor([[[2., 2., 2., 2.],
          [2., 2., 2., 2.],
          [2., 2., 2., 2.]],
 
         [[2., 2., 2., 2.],
          [2., 2., 2., 2.],
          [2., 2., 2., 2.]]]), tensor([[[3., 3., 3., 3.],
          [3., 3., 3., 3.],
          [3., 3., 3., 3.]],
 
         [[3., 3., 3., 3.],
          [3., 3., 3., 3.],
          [3., 3., 3., 3.]]])]

In [153]:
for cnn_result, beta in zip(all_cnn_result,my_beta.T):
    print(cnn_result)
    print('-'*30)
    print(beta.unsqueeze(1).unsqueeze(2).expand(-1,3,4))
    print('-'*30)
    print(beta.unsqueeze(1).unsqueeze(2).expand(-1,3,4) * cnn_result)
    print('-'*30)

tensor([[[1., 1., 1., 1.],
         [1., 1., 1., 1.],
         [1., 1., 1., 1.]],

        [[1., 1., 1., 1.],
         [1., 1., 1., 1.],
         [1., 1., 1., 1.]]])
------------------------------
tensor([[[1, 1, 1, 1],
         [1, 1, 1, 1],
         [1, 1, 1, 1]],

        [[4, 4, 4, 4],
         [4, 4, 4, 4],
         [4, 4, 4, 4]]])
------------------------------
tensor([[[1., 1., 1., 1.],
         [1., 1., 1., 1.],
         [1., 1., 1., 1.]],

        [[4., 4., 4., 4.],
         [4., 4., 4., 4.],
         [4., 4., 4., 4.]]])
------------------------------
tensor([[[2., 2., 2., 2.],
         [2., 2., 2., 2.],
         [2., 2., 2., 2.]],

        [[2., 2., 2., 2.],
         [2., 2., 2., 2.],
         [2., 2., 2., 2.]]])
------------------------------
tensor([[[2, 2, 2, 2],
         [2, 2, 2, 2],
         [2, 2, 2, 2]],

        [[5, 5, 5, 5],
         [5, 5, 5, 5],
         [5, 5, 5, 5]]])
------------------------------
tensor([[[ 4.,  4.,  4.,  4.],
         [ 4.,  4.,  4.,  4.],
 

In [158]:
 temp_result = [ conv* (beta.unsqueeze(1).unsqueeze(2).expand(-1,conv.shape[1],conv.shape[2])) for conv,beta in zip(all_cnn_result, my_beta.T)]

In [164]:
(sum(temp_result)/len(temp_result))

tensor([[[ 4.6667,  4.6667,  4.6667,  4.6667],
         [ 4.6667,  4.6667,  4.6667,  4.6667],
         [ 4.6667,  4.6667,  4.6667,  4.6667]],

        [[10.6667, 10.6667, 10.6667, 10.6667],
         [10.6667, 10.6667, 10.6667, 10.6667],
         [10.6667, 10.6667, 10.6667, 10.6667]]])

In [169]:
import numpy as np
np.array(temp_result)

ValueError: only one element tensors can be converted to Python scalars

In [126]:
my_beta_unsq_2 = my_beta_unsq.unsqueeze(3)

In [127]:
my_beta_unsq_2

tensor([[[[1]],

         [[2]],

         [[3]]],


        [[[4]],

         [[5]],

         [[6]]]])

In [128]:
my_beta_unsq_2.shape

torch.Size([2, 3, 1, 1])

In [140]:
z = torch.zeros(2,3,4)

In [141]:
z

tensor([[[0., 0., 0., 0.],
         [0., 0., 0., 0.],
         [0., 0., 0., 0.]],

        [[0., 0., 0., 0.],
         [0., 0., 0., 0.],
         [0., 0., 0., 0.]]])

In [172]:
# batch_size, in_channels, seq_len
class ACNN(nn.Module):
    def __init__(self,in_channels, out_channels, kernel_size, dilation, seq_len, N):
        super(ACNN,self).__init__()
        self.W_e = Conv1d(in_channels, out_channels,kernel_size)
        self.W_a = Conv1d(in_channels, out_channels,kernel_size)
        self.params = nn.ParameterDict({
                'b_e': nn.Parameter(torch.randn(1)),
                'b_a': nn.Parameter(torch.randn(1)),
                'v':nn.Parameter(torch.rand(seq_len))
        })
        self.W_b_list = nn.ModuleList()
        self.N = N
        for i in range(N):
            self.W_b_list.append(nn.Conv1d(in_channels = in_channels, 
                                        out_channels = out_channels, 
                                        kernel_size = kernel_size, bias = True))

        # self.b_e = b_e
        # self.b_a = b_a
        # self.v = v #(seq_len,)
        self.linear_combination = nn.Linear(seq_len,N)
    def forward(self, x):
        e_t = self.W_e(x) + self.params['b_e']#好像Bias = True就可以了
        a_t = torch.matmul(self.params['v'], torch.tanh(self.W_a(x) + self.params['b_a']))
        a_t = a_t.unsqueeze(2).expand(-1,-1,e_t.shape[2])
        u = torch.sum(a_t * e_t, dim = 1)
        sigma = torch.sqrt(torch.sum(a_t*(e_t*e_t),dim = 1) - u*u)#batch,seq_len
        C_acnn = torch.cat([u,sigma], dim = 1)#batch,seq_len*2
        beta = linear_combination(C_acnn)#batch,N 有N个filter
        out = sum([ conv(x)* beta.unsqueeze(1).unsqueeze(2).expand(-1,conv.shape[1],conv.shape[2]) for conv in zip(self.W_b_list, beta.T)])/self.N
        return out 

In [71]:
torch.exp(u)

tensor([[3.2209, 5.7971, 2.7860, 4.7599],
        [4.7155, 3.9948, 3.5281, 3.9520]])

In [98]:
u.mean()

tensor(2.3935)

In [105]:
t2.shape

torch.Size([2, 2, 3])

In [108]:
t2

tensor([[[0.5539, 0.7566, 0.5373],
         [0.4297, 0.3929, 0.9804]],

        [[0.9643, 0.3211, 0.6145],
         [0.6518, 0.0360, 0.1140]]])

In [111]:
mean_exp = torch.mean(t2,dim = 2)

In [112]:
mean_exp

tensor([[0.6159, 0.6010],
        [0.6333, 0.2673]])

In [118]:
sum_ = torch.sum(mean_exp,dim = 1).unsqueeze(1).expand(-1,2)

In [119]:
mean_exp / sum_

tensor([[0.5061, 0.4939],
        [0.7032, 0.2968]])