In [27]:
import torch
import torch.nn as nn



In [50]:

class AttentivePooling(nn.Module):
    def __init__(self, dim1: int, dim2: int):
        super(AttentivePooling, self).__init__()
        self.dim1 = dim1
        self.dim2 = dim2

        self.dropout = nn.Dropout(0.2)
        self.dense  = nn.Linear(dim2, 200)
        self.tanh = nn.Tanh()
        self.flatten = nn.Linear(200, 1)
        self.softmax = nn.Softmax(dim=1)
       

    def forward(self, x):
        user_vecs = self.dropout(x)
        user_att = self.tanh(self.dense(user_vecs))
        user_att = self.flatten(user_att)
        user_att = self.softmax(user_att)
        result = torch.einsum('ijk,ijk->ik', user_vecs, user_att)
        return result



In [30]:
dim1 = 5
dim2 = 10
x = torch.randn((2, dim1, dim2))

pool = AttentivePooling(dim1, dim2)

pool(x).shape



torch.Size([2, 10])

In [51]:
import tensorflow as tf
import keras
from keras.layers import *
from keras.models import Model


def tfAttentivePooling(dim1,dim2):
    vecs_input = Input(shape=(dim1,dim2),dtype='float32')
    user_vecs =Dropout(0.2)(vecs_input)
    user_att = Dense(200,activation='tanh')(user_vecs)
    user_att = keras.layers.Flatten()(Dense(1)(user_att))
    user_att = Activation('softmax')(user_att)    
    user_vec = keras.layers.Dot((1,1))([user_vecs,user_att])
    model = Model(vecs_input,user_vec)
    return model

def testDot(dim1, dim2):
    input = Input(shape=(dim1,dim2), dtype='float32')
    output = keras.layers.Dot((2,2))([input, input])
    return Model(input, output)


In [33]:
t = testDot(2,3)
x_tf = tf.convert_to_tensor([[[1,2,3],[4,5,6]]])
t(x_tf).eval(session=tf.compat.v1.Session())

2022-05-12 02:54:20.549877: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1512] Adding visible gpu devices: 0
2022-05-12 02:54:20.549931: I tensorflow/core/common_runtime/gpu/gpu_device.cc:984] Device interconnect StreamExecutor with strength 1 edge matrix:
2022-05-12 02:54:20.549937: I tensorflow/core/common_runtime/gpu/gpu_device.cc:990]      0 
2022-05-12 02:54:20.549941: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1003] 0:   N 
2022-05-12 02:54:20.550022: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1115] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 3314 MB memory) -> physical GPU (device: 0, name: Tesla T4, pci bus id: 0001:00:00.0, compute capability: 7.5)


array([[[14, 32],
        [32, 77]]], dtype=int32)

In [52]:
pool = tfAttentivePooling(dim1, dim2)

def print_model(m):
    for layer in m.layers:
        print(layer.name, layer.output_shape) 

print_model(pool)
x_tf = tf.convert_to_tensor(x.numpy())

pool(x_tf) #.eval(session=tf.compat.v1.Session())

input_16 (None, 30, 300)
dropout_15 (None, 30, 300)
dense_19 (None, 30, 200)
dense_20 (None, 30, 1)
flatten_10 (None, 30)
activation_11 (None, 30)
dot_14 (None, 300)


<tf.Tensor 'model_15/dot_14/Squeeze:0' shape=(2, 300) dtype=float32>

In [36]:
from models import Attention

def get_doc_encoder():
    sentence_input = Input(shape=(30,300), dtype='float32')
    droped_vecs = Dropout(0.2)(sentence_input)

    l_cnnt = Conv1D(400,3,activation='relu')(droped_vecs)
    l_cnnt = Dropout(0.2)(l_cnnt)
    l_cnnt = Attention(20,20)([l_cnnt,l_cnnt,l_cnnt])
    l_cnnt = keras.layers.Activation('relu')(l_cnnt)
    
    droped_rep = Dropout(0.2)(l_cnnt)
    title_vec = AttentivePooling(30,400)(droped_rep)
    sentEncodert = Model(sentence_input, title_vec)
    return sentEncodert

d = get_doc_encoder()
print_model(d)


input_11 (None, 30, 300)
dropout_8 (None, 30, 300)
conv1d_2 (None, 28, 400)
dropout_9 (None, 28, 400)
attention_1 (None, 28, 400)
activation_6 (None, 28, 400)
dropout_10 (None, 28, 400)
model_10 (None, 400)


In [61]:
class Permute(nn.Module):
    def __init__(self, *dims):
        super(Permute, self).__init__()
        self.dims = dims
    
    def forward(self, x):
        return x.permute(*self.dims)

class DocEncoder(nn.Module):
    def __init__(self):        
        super(DocEncoder,self).__init__()
        self.phase1 = nn.Sequential(
            nn.Dropout(0.2),
            Permute(0,2,1),            
            nn.Conv1d(300,400,3),
            nn.ReLU(),
            nn.Dropout(0.2),
            Permute(0,2,1)
        )
        self.attention = nn.MultiheadAttention(400,20)
        self.phase2 = nn.Sequential(
            nn.ReLU(),
            nn.Dropout(0.2),
            AttentivePooling(30,400)
        )
    
    def forward(self, x):
        l_cnnt = self.phase1(x)
        print(l_cnnt.shape)
        l_cnnt, attention_weights = self.attention(l_cnnt, l_cnnt, l_cnnt)
        print(l_cnnt.shape)
        result = self.phase2(l_cnnt)
        print(result.shape)
        return result


In [62]:
d_pt = DocEncoder()
dim1 = 30
dim2 = 300
x = torch.randn((2, dim1, dim2))

d_pt.forward(x)

torch.Size([2, 28, 400])
torch.Size([2, 28, 400])
torch.Size([2, 400])


tensor([[2.3963e-03, 4.0591e-02, 0.0000e+00, 8.7075e-02, 1.4391e-02, 3.6700e-02,
         3.3882e-02, 1.2511e-01, 5.3376e-03, 9.0946e-03, 6.6884e-02, 2.0321e-03,
         0.0000e+00, 1.3617e-01, 2.1996e-02, 2.0637e-01, 1.9337e-01, 6.6152e-02,
         7.2891e-02, 7.8411e-02, 1.2602e-01, 2.0873e-02, 9.7937e-03, 1.0884e-01,
         0.0000e+00, 4.0968e-02, 2.3909e-02, 1.7039e-01, 6.3651e-02, 1.2156e-01,
         3.3234e-02, 2.9967e-02, 1.2508e-02, 8.0818e-02, 4.1254e-02, 1.1412e-01,
         2.1410e-02, 1.3321e-01, 1.5358e-02, 1.9235e-01, 5.7579e-04, 1.0120e-01,
         7.8862e-02, 6.2540e-02, 1.6081e-02, 3.7133e-02, 1.9012e-01, 5.8300e-02,
         2.1432e-01, 1.7429e-02, 1.4498e-01, 2.8760e-01, 0.0000e+00, 1.0378e-01,
         7.4191e-02, 9.5585e-02, 1.1763e-03, 1.0734e-01, 1.1476e-01, 2.4840e-02,
         5.0252e-02, 7.1241e-02, 1.2763e-01, 7.8959e-02, 2.2823e-02, 1.1548e-02,
         6.8733e-02, 0.0000e+00, 2.0620e-01, 6.6682e-02, 6.1807e-04, 7.8888e-02,
         1.3336e-02, 4.1974e

In [65]:

def get_user_encoder():
    news_vecs_input = Input(shape=(50,400), dtype='float32')
    
    news_vecs = Dropout(0.2)(news_vecs_input)
    gru_input = keras.layers.Lambda(lambda x:x[:,-15:,:])(news_vecs)
    vec1 = GRU(400)(gru_input)
    vecs2 = Attention(20,20)([news_vecs]*3)
    vec2 = tfAttentivePooling(50,400)(vecs2)

    user_vecs2 = Attention(20,20)([news_vecs_input]*3)
    user_vecs2 = Dropout(0.2)(user_vecs2)
    user_vec2 = tfAttentivePooling(50,400)(user_vecs2)
    user_vec2 = keras.layers.Reshape((1,400))(user_vec2)
        
    user_vecs1 = Lambda(lambda x:x[:,-20:,:])(news_vecs_input)
    user_vec1 = GRU(400)(user_vecs1)
    user_vec1 = keras.layers.Reshape((1,400))(user_vec1)

    user_vecs = keras.layers.Concatenate(axis=-2)([user_vec1,user_vec2])
    vec = tfAttentivePooling(2,400)(user_vecs)
        
    sentEncodert = Model(news_vecs_input, vec)
    return sentEncodert

u = get_user_encoder()
print_model(u)

input_18 (None, 50, 400)
attention_4 (None, 50, 400)
lambda_3 (None, 20, 400)
dropout_19 (None, 50, 400)
gru_3 (None, 400)
model_17 (None, 400)
reshape_2 (None, 1, 400)
reshape_1 (None, 1, 400)
concatenate_1 (None, 2, 400)
model_18 (None, 400)


In [79]:
class VecTail(nn.Module):
    def __init__(self, n):
        super(VecTail, self).__init__()
        self.n = n

    def forward(self, x):
        return x[:,-self.n,:]

class UserEncoder(nn.Module):
    def __init__(self):        
        super(UserEncoder,self).__init__()
        # news_vecs_input = Input(shape=(50,400), dtype='float32')
        #self.dropout1 = nn.Dropout(0.2)
        #self.tail = VecTail(15)
        #self.gru = nn.GRU(400, 400)
        #self.attention = nn.MultiheadAttention(400, 20)
        #self.pool = AttentivePooling(50, 400)
        self.attention2 = nn.MultiheadAttention(400, 20)
        self.dropout2 = nn.Dropout(0.2)
        self.pool2 = AttentivePooling(50, 400)
        self.tail2 = VecTail(20)
        self.gru2 = nn.GRU(400,400)
        self.pool3 = AttentivePooling(2, 400)

    def forward(self, news_vecs_input):    
        #news_vecs =self.dropout1(news_vecs_input)
        #gru_input = self.tail(news_vecs)
        #vec1 = self.gru(gru_input)
        #vecs2 = self.attention(*[news_vecs]*3)
        #vec2 = self.pool(vecs2)
    
        user_vecs2, _u_weights = self.attention2(*[news_vecs_input]*3)
        user_vecs2 = self.dropout2(user_vecs2)
        user_vec2 = self.pool2(user_vecs2)
        print(user_vec2.shape)
        #user_vec2 = keras.layers.Reshape((1,400))(user_vec2)
        #user_vec2 = user_vec2.unsqueeze(1)

        user_vecs1 = self.tail2(news_vecs_input)
        user_vec1, _u_hidden = self.gru2(user_vecs1)
        #user_vec1 = keras.layers.Reshape((1,400))(user_vec1)
        #user_vec1 = user_vec1.unsqueeze(1)
        
        user_vecs = torch.stack([user_vec1, user_vec2], dim=1) #keras.layers.Concatenate(axis=-2)([user_vec1,user_vec2])
        print(user_vecs.shape)
        vec = self.pool3(user_vecs)
        print(vec.shape)
        return vec
        


    


In [80]:
u_pt = UserEncoder()
dim1 = 50
dim2 = 400
x = torch.randn((2, dim1, dim2))

u_pt.forward(x)

torch.Size([2, 400])
torch.Size([2, 2, 400])
torch.Size([2, 400])


tensor([[ 2.4103e-01, -2.2155e-01,  5.5414e-02, -1.0528e-01, -2.6470e-02,
          5.6696e-02,  1.4110e-01, -6.1008e-02,  4.1602e-02, -1.0160e-01,
         -2.2682e-02,  2.5239e-01,  2.3476e-02,  4.7963e-02, -5.2091e-02,
          1.6705e-01,  9.3662e-02, -2.2356e-01,  6.5630e-02, -9.0097e-02,
          1.7363e-01,  1.4190e-01,  1.9042e-01,  2.0884e-01,  1.0162e-03,
          1.2115e-01, -2.8425e-01, -1.1501e-01, -7.7650e-02,  4.8579e-02,
          3.3464e-02,  1.2099e-02,  1.1184e-01, -2.0019e-01, -1.2009e-01,
         -6.9716e-02,  2.5229e-01, -2.3580e-02, -1.8622e-01,  6.8790e-02,
         -1.5805e-02, -8.6447e-02, -2.0113e-01,  6.0035e-02, -4.2379e-03,
         -1.2508e-01,  9.4129e-02,  1.6371e-01,  4.1479e-02,  1.8373e-01,
         -2.6810e-01, -2.2285e-01, -4.2221e-03,  3.3998e-01, -2.5080e-01,
          2.3551e-03,  2.1028e-01, -2.1904e-01,  3.0840e-01, -2.1326e-01,
          2.3559e-02,  5.4087e-02, -3.3355e-02, -9.3945e-03,  4.3261e-02,
          1.8866e-01, -7.0549e-02, -3.