In [1]:
"""
https://buomsoo-kim.github.io/attention/2020/03/26/Attention-mechanism-16.md/

sentiment scores:
very positive = 5
slightly positive = 4
neutral = 3
slightly negative = 2
very negative = 1

"""

'\nhttps://buomsoo-kim.github.io/attention/2020/03/26/Attention-mechanism-16.md/\n\nsentiment scores:\nvery positive = 5\nslightly positive = 4\nneutral = 3\nslightly negative = 2\nvery negative = 1\n\n'

In [1]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from tqdm import tqdm
import re

In [2]:
data = pd.read_csv("Twitter-sentiment-self-drive-DFE.csv", encoding = 'latin-1')
print(data.head())

    _unit_id  _golden _unit_state  _trusted_judgments _last_judgment_at  \
0  724227031     True      golden                 236               NaN   
1  724227032     True      golden                 231               NaN   
2  724227033     True      golden                 233               NaN   
3  724227034     True      golden                 240               NaN   
4  724227035     True      golden                 240               NaN   

  sentiment  sentiment:confidence  our_id sentiment_gold  \
0         5                0.7579   10001           5\n4   
1         5                0.8775   10002           5\n4   
2         2                0.6805   10003           2\n1   
3         2                0.8820   10004           2\n1   
4         3                1.0000   10005              3   

                               sentiment_gold_reason  \
0  Author is excited about the development of the...   
1  Author is excited that driverless cars will be...   
2  The author is ske

Preprocessing

In [3]:
NUM_INSTANCES = 3000
MAX_SENT_LEN = 10
tweets = []
sent_scores = []
unique_tokens = set()

for i in tqdm(range(NUM_INSTANCES)):
    rand_idx = np.random.randint(len(data))

    tweet = []
    sentences = data['text'].iloc[rand_idx].split(".")
    for sent in sentences:
        if len(sent) != 0:
            # Get only words
            sent = [x.lower() for x in re.findall(r"\w+", sent)]
            if len(sent) >= MAX_SENT_LEN:
                sent = sent[:MAX_SENT_LEN]
            else:
                for _ in range(MAX_SENT_LEN - len(sent)):
                    sent.append("<pad>")

            tweet.append(sent)
            unique_tokens.update(sent)
    tweets.append(tweet)
    if data['sentiment'].iloc[rand_idx] == "not_relevant":
        sent_scores.append(0)
    else:
        sent_scores.append(int(data["sentiment"].iloc[rand_idx]))

100%|██████████| 3000/3000 [00:00<00:00, 5604.88it/s]


In [4]:
print(len(tweets))
tweet_id = 14
print(f"Text from tweet {tweet_id}")
print(tweets[tweet_id])
print("Number of sentences in tweet")
print(len(tweets[tweet_id]))
print("Sentiment score for the tweet")
print(sent_scores[tweet_id])

3000
Text from tweet 14
[['everyone', 'will', 'have', 'self', 'driving', 'cars', 'by', '2026', 'analyst', 'says'], ['co', 'vzu7fmlb5k', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>']]
Number of sentences in tweet
2
Sentiment score for the tweet
3


Unique tokens

In [5]:
unique_tokens = list(unique_tokens)
print("Number of unique tokens")
print(len(unique_tokens))
print("Example of the first 20 tokens")
print(unique_tokens[:20])

Number of unique tokens
6330
Example of the first 20 tokens
['music', 'ralphpaglia', 'cyberdyne', 'otcccc', 'laws', 'd8g9obxryq', 'lookin', 'robo', 'newyorker', 'effect', 'scottkirsner', 'bbnqzhcvc2ì', 'hi', 'jpfpces3us', 'tv', 'story', 'yes', 'reply', 'zuhwrs7j', 'sets']


Numericalize each token

In [6]:
# encode each token into index
for i in tqdm(range(len(tweets))):
#for i in range(len(tweets)):
    for j in range(len(tweets[i])):
        tweets[i][j] = [unique_tokens.index(x) for x in tweets[i][j]]

100%|██████████| 3000/3000 [00:09<00:00, 321.30it/s]


In [7]:
print(f"Tweet {tweet_id} in numbers")
print(tweets[tweet_id])
print("Number of sentences in tweet")
print(len(tweets[tweet_id]))

Tweet 14 in numbers
[[5617, 1271, 4636, 3121, 5990, 2488, 4515, 4774, 4294, 276], [2718, 5160, 6184, 6184, 6184, 6184, 6184, 6184, 6184, 6184]]
Number of sentences in tweet
2


Setting parameters

In [8]:
VOCAB_SIZE = len(unique_tokens)
NUM_CLASSES = len(set(sent_scores))
LEARNING_RATE = 1e-3
NUM_EPOCHS = 1#0
HIDDEN_SIZE = 16
EMBEDDING_DIM = 30
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")

print("Classes", NUM_CLASSES)

Classes 6


### Encoders

In [9]:
class wordEncoder(nn.Module):
  def __init__(self, vocab_size, hidden_size, embedding_dim):
    super(wordEncoder, self).__init__()
    self.hidden_size = hidden_size
    self.vocab_size = vocab_size

    self.embedding = nn.Embedding(vocab_size, embedding_dim)
    self.gru = nn.GRU(embedding_dim, hidden_size, bidirectional = True)

  def forward(self, word, h0):
    word = self.embedding(word).unsqueeze(0).unsqueeze(1)
    out, h0 = self.gru(word, h0)
    return out, h0

In [10]:
#torch.autograd.set_detect_anomaly(True)

In [11]:
class sentEncoder(nn.Module):
  def __init__(self, hidden_size):
    super(sentEncoder, self).__init__()
    self.hidden_size = hidden_size
    self.gru = nn.GRU(hidden_size, hidden_size, bidirectional = True)

  def forward(self, sentence, h0):
    sentence = sentence.unsqueeze(0).unsqueeze(1)
    out, h0 = self.gru(sentence)
    return out, h0

### Hierarchical Attention Network

In [53]:
class HAN(nn.Module):
  def __init__(self, wordEncoder, sentEncoder, num_classes, device):
    super(HAN, self).__init__()
    self.wordEncoder = wordEncoder
    self.sentEncoder = sentEncoder
    self.device = device
    #self.softmax = nn.Softmax(dim=1)
    self.softmax = nn.Softmax(dim=0)

    # word-level attention
    self.word_attention = nn.Linear(self.wordEncoder.hidden_size*2, self.wordEncoder.hidden_size*2)
    self.u_w = nn.Linear(self.wordEncoder.hidden_size*2, 1, bias = False)

    # sentence-level attention
    self.sent_attention = nn.Linear(self.sentEncoder.hidden_size * 2, self.sentEncoder.hidden_size*2)
    self.u_s = nn.Linear(self.sentEncoder.hidden_size*2, 1, bias = False)

    # final layer
    self.dense_out = nn.Linear(self.sentEncoder.hidden_size*2, num_classes)
    self.log_softmax = nn.LogSoftmax(dim=0)

  def forward(self, document):
    word_attention_weights = []
    sentenc_out = torch.zeros((document.size(0), 2, self.sentEncoder.hidden_size)).to(self.device)
    # iterate on sentences
    h0_sent = torch.zeros(2, 1, self.sentEncoder.hidden_size, dtype = torch.float).to(self.device)
    for i in range(document.size(0)):
      sent = document[i]
      wordenc_out = torch.zeros((sent.size(0), 2, self.wordEncoder.hidden_size)).to(self.device)
      h0_word = torch.zeros(2, 1, self.wordEncoder.hidden_size, dtype = torch.float).to(self.device)
      # iterate on words
      for j in range(sent.size(0)):
        _, h0_word = self.wordEncoder(sent[j], h0_word)
        wordenc_out[j] = h0_word.squeeze()
      #print(wordenc_out)
      wordenc_out = wordenc_out.view(wordenc_out.size(0), -1)
      u_word = torch.tanh(self.word_attention(wordenc_out))
      #print()
      #print("u_word")
      #print(u_word)

      x = self.u_w(u_word)
      #print("~~~~~ x ~~~~~")
      #print(x)
      #print(x.shape)
      #word_weights = self.softmax(self.u_w(u_word))
      #aligned_weights_ = F.softmax(aligned_weights.unsqueeze(0))
      word_weights = self.softmax(x)

      #print()
      #print("word_weights")
      #print(word_weights)
      word_attention_weights.append(word_weights)

      sent_summ_vector = (u_word * word_weights).sum(axis=0)

      _, h0_sent = self.sentEncoder(sent_summ_vector, h0_sent)
      sentenc_out[i] = h0_sent.squeeze()
    sentenc_out = sentenc_out.view(sentenc_out.size(0), -1)
    u_sent = torch.tanh(self.sent_attention(sentenc_out))
    sent_weights = self.softmax(self.u_s(u_sent))
    doc_summ_vector = (u_sent * sent_weights).sum(axis=0)
    out = self.dense_out(doc_summ_vector)
    return word_attention_weights, sent_weights, self.log_softmax(out)

## Training

In [52]:
word_encoder = wordEncoder(VOCAB_SIZE, HIDDEN_SIZE, EMBEDDING_DIM).to(DEVICE)
sent_encoder = sentEncoder(HIDDEN_SIZE * 2).to(DEVICE)
model = HAN(word_encoder, sent_encoder, NUM_CLASSES, DEVICE).to(DEVICE)
optimizer = torch.optim.Adam(model.parameters(), lr = LEARNING_RATE)
criterion = nn.NLLLoss()
losses = []
weights = []

for i in tqdm(range(NUM_EPOCHS)):
    current_loss = 0
    for j in range(len(tweets[:50])):
        tweet, score = torch.tensor(tweets[j], dtype = torch.long).to(DEVICE), torch.tensor(sent_scores[j]).to(DEVICE)
        word_weights, sent_weights, output = model(tweet)
        optimizer.zero_grad()
        #current_loss += criterion(output.unsqueeze(0), score.unsqueeze(0))
        current_loss = criterion(output.unsqueeze(0), score.unsqueeze(0))
        current_loss.backward(retain_graph=True)
        optimizer.step()

    print(f"epoch {i+1}/{NUM_EPOCHS}, loss: {current_loss}")
    losses.append(current_loss.item()/(j+1))

100%|██████████| 1/1 [00:31<00:00, 31.50s/it]



word_weights
tensor([[0.0861],
        [0.0833],
        [0.1047],
        [0.1055],
        [0.1011],
        [0.1135],
        [0.1072],
        [0.1005],
        [0.0989],
        [0.0993]], grad_fn=<SoftmaxBackward>)

word_weights
tensor([[0.1048],
        [0.1181],
        [0.1057],
        [0.1000],
        [0.0972],
        [0.0958],
        [0.0951],
        [0.0946],
        [0.0944],
        [0.0943]], grad_fn=<SoftmaxBackward>)

word_weights
tensor([[0.0840],
        [0.1063],
        [0.0999],
        [0.0965],
        [0.1111],
        [0.1059],
        [0.1007],
        [0.1086],
        [0.1059],
        [0.0812]], grad_fn=<SoftmaxBackward>)

word_weights
tensor([[0.1049],
        [0.1004],
        [0.0984],
        [0.0981],
        [0.0985],
        [0.0991],
        [0.0996],
        [0.1000],
        [0.1004],
        [0.1006]], grad_fn=<SoftmaxBackward>)

word_weights
tensor([[0.0985],
        [0.0812],
        [0.0785],
        [0.0997],
        [0.1110],
        

In [82]:
with torch.no_grad():
    tweet, score = torch.tensor(tweets[50], dtype = torch.long).to(DEVICE), torch.tensor(sent_scores[j]).to(DEVICE)
    print(tweet)
    print("Class:", score.item())

    print("~~~ RESULTS ~~~")
    word_weights, sent_weights, output = model(tweet)
    print(output)
    print(word_weights)
    print(sent_weights)
    print("~~~ Prediction ~~~")
    _, idx = torch.max(output, 0)
    print("Class:",idx.item())

tensor([[3796, 1081, 4739, 5202, 4182, 4182, 4182, 4182, 4182, 4182],
        [5084, 3871, 5270,  711, 5173, 3668,  777,  968, 4182, 4182],
        [1615, 4962, 3188, 3808,  893, 2970, 4182, 4182, 4182, 4182],
        [3244,  293, 4182, 4182, 4182, 4182, 4182, 4182, 4182, 4182]])
Class: 3
~~~ RESULTS ~~~

word_weights
tensor([[0.0771],
        [0.0996],
        [0.0989],
        [0.0870],
        [0.0989],
        [0.1040],
        [0.1067],
        [0.1083],
        [0.1094],
        [0.1101]])

word_weights
tensor([[0.0886],
        [0.0913],
        [0.0833],
        [0.0881],
        [0.1001],
        [0.0974],
        [0.0992],
        [0.1079],
        [0.1194],
        [0.1247]])

word_weights
tensor([[0.0879],
        [0.0874],
        [0.0925],
        [0.0901],
        [0.0887],
        [0.0811],
        [0.1070],
        [0.1172],
        [0.1225],
        [0.1256]])

word_weights
tensor([[0.0721],
        [0.0747],
        [0.0947],
        [0.1023],
        [0.1061],
     

In [83]:
for t in tweet:
    t = t.numpy()
    sent = " ".join([unique_tokens[w] for w in t])
    print(sent)

if score.item() == idx.item():
    print(f"Correct: {score.item()}")
else:
    print(f"Truth: {score.item()}, Predicted:{idx.item()}")

testing a rover nasaames <pad> <pad> <pad> <pad> <pad> <pad>
it uses same tech as self driving cars <pad> <pad>
stateofnasa nasasocial nasa û_ http t <pad> <pad> <pad> <pad>
co pin2j8fusj <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad>
Correct: 3


# ~ ~ ~ T E S T S  ~ ~ ~

Get a document

In [19]:
tweet_id = 9
print(tweets[tweet_id])
doc = tweets[tweet_id]



for t in doc:
    print(t)
    sent = " ".join([unique_tokens[w] for w in t])
    print(sent)
    print()

print("Total of sentences in doc:",len(doc))

print("Score:", sent_scores[tweet_id])

[[3569, 2433, 1940, 5684, 6000, 1145, 480, 4786, 2378, 3436], [4985, 92, 1147, 3515, 2246, 5117, 6184, 6184, 6184, 6184], [4776, 2312, 6184, 6184, 6184, 6184, 6184, 6184, 6184, 6184], [2718, 2051, 6184, 6184, 6184, 6184, 6184, 6184, 6184, 6184]]
[3569, 2433, 1940, 5684, 6000, 1145, 480, 4786, 2378, 3436]
what intersections would look like in a world with only

[4985, 92, 1147, 3515, 2246, 5117, 6184, 6184, 6184, 6184]
i literally winced the whole time <pad> <pad> <pad> <pad>

[4776, 2312, 6184, 6184, 6184, 6184, 6184, 6184, 6184, 6184]
http t <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad>

[2718, 2051, 6184, 6184, 6184, 6184, 6184, 6184, 6184, 6184]
co 1ekgcqdm <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad>

Total of sentences in doc: 4
Score: 2


Transform the document and its score into a tensor

In [20]:
print("Device:", DEVICE)
document = torch.tensor(doc, dtype = torch.long).to(DEVICE)
score = torch.tensor(sent_scores[j]).to(DEVICE)

print(document)
print(document.type())
print(document.shape)
print()
print(score)
print(score.type())
print(score.shape)


Device: cpu
tensor([[3569, 2433, 1940, 5684, 6000, 1145,  480, 4786, 2378, 3436],
        [4985,   92, 1147, 3515, 2246, 5117, 6184, 6184, 6184, 6184],
        [4776, 2312, 6184, 6184, 6184, 6184, 6184, 6184, 6184, 6184],
        [2718, 2051, 6184, 6184, 6184, 6184, 6184, 6184, 6184, 6184]])
torch.LongTensor
torch.Size([4, 10])

tensor(3)
torch.LongTensor
torch.Size([])


Initializing encoders

In [21]:
word_encoder = wordEncoder(VOCAB_SIZE, HIDDEN_SIZE, EMBEDDING_DIM).to(DEVICE)


In [22]:
print(word_encoder.hidden_size)
print(word_encoder.embedding)
print(word_encoder.vocab_size)
print(word_encoder.gru)
print(word_encoder)

16
Embedding(6330, 30)
6330
GRU(30, 16, bidirectional=True)
wordEncoder(
  (embedding): Embedding(6330, 30)
  (gru): GRU(30, 16, bidirectional=True)
)


In [23]:
word_attention_weights = []

In [44]:
sentenc_out = torch.zeros((document.size(0), 2, sent_encoder.hidden_size)).to(DEVICE)
print(sentenc_out)
print(sentenc_out.shape)

tensor([[[0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
          0., 0., 0., 0., 0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
          0., 0., 0., 0., 0., 0., 0., 0., 0.]],

        [[0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
          0., 0., 0., 0., 0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
          0., 0., 0., 0., 0., 0., 0., 0., 0.]],

        [[0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
          0., 0., 0., 0., 0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
          0., 0., 0., 0., 0., 0., 0., 0., 0.]],

        [[0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.

In [24]:
import sys
for i in range(document.size(0)):
    print(f"Sentence {i} in document")
    sent = document[i]
    print("~~~ Creating a new wordenc_out")
    wordenc_out = torch.zeros((sent.size(0), 2, word_encoder.hidden_size)).to(DEVICE)
    h0_word = torch.zeros(2, 1, word_encoder.hidden_size, dtype = torch.float).to(DEVICE)
    # iterate on words
    for j in range(sent.size(0)):
        print("word", sent[j])
        _, h0_word = word_encoder(sent[j], h0_word)

        #print(_)
        #print(_.shape)
        #print("#"*50)
        #print(h0_word)
        print(h0_word.shape)
        print("#" * 50)
        wordenc_out[j] = h0_word.squeeze()
        #print(wordenc_out)
    print("#_#" * 50)
    print(f"Continuing in sentence {i}")


Sentence 0 in document
~~~ Creating a new wordenc_out
word tensor(3569)
torch.Size([2, 1, 16])
##################################################
word tensor(2433)
torch.Size([2, 1, 16])
##################################################
word tensor(1940)
torch.Size([2, 1, 16])
##################################################
word tensor(5684)
torch.Size([2, 1, 16])
##################################################
word tensor(6000)
torch.Size([2, 1, 16])
##################################################
word tensor(1145)
torch.Size([2, 1, 16])
##################################################
word tensor(480)
torch.Size([2, 1, 16])
##################################################
word tensor(4786)
torch.Size([2, 1, 16])
##################################################
word tensor(2378)
torch.Size([2, 1, 16])
##################################################
word tensor(3436)
torch.Size([2, 1, 16])
##################################################
#_##_##_##_##_##_##_##_##_#

In [25]:
    wordenc_out = wordenc_out.view(wordenc_out.size(0), -1)
    print(wordenc_out.shape)


torch.Size([10, 32])


In [26]:
    # word-level attention
    word_attention = nn.Linear(word_encoder.hidden_size*2, word_encoder.hidden_size*2)
    u_w = nn.Linear(word_encoder.hidden_size*2, 1, bias = False)

In [27]:
    x = word_attention(wordenc_out)
    print(x)
    print(x.shape)

tensor([[ 0.1042, -0.2369, -0.3620, -0.1506,  0.4012, -0.2512, -0.1348, -0.0056,
          0.0571,  0.1853, -0.1059,  0.0846,  0.0639,  0.0702,  0.2185,  0.3496,
         -0.0015, -0.2413,  0.1023, -0.2145, -0.3328, -0.2059,  0.1359, -0.0793,
         -0.0425, -0.1120, -0.1226,  0.3884,  0.2770, -0.2538,  0.1783,  0.0865],
        [ 0.3290,  0.0959, -0.3581, -0.1266,  0.5102,  0.0231, -0.2772, -0.0332,
          0.0640,  0.0406, -0.0917, -0.0729, -0.1020,  0.0492,  0.2190,  0.1950,
         -0.1644, -0.1984,  0.0360, -0.3093, -0.1349, -0.1019, -0.0518, -0.0663,
         -0.3275, -0.0023, -0.1576,  0.2930,  0.2203, -0.0012,  0.0010,  0.2678],
        [ 0.1045, -0.2553, -0.3143,  0.0171,  0.1703, -0.1433, -0.4102,  0.1004,
         -0.0268, -0.1532, -0.0734,  0.0041, -0.0481,  0.1687,  0.1280, -0.0411,
         -0.3815, -0.0985, -0.1021,  0.0643, -0.0470,  0.0625,  0.0605, -0.0558,
          0.1663,  0.0799, -0.0094,  0.0759,  0.2678, -0.1145,  0.1214,  0.3028],
        [ 0.0122, -0.3534

In [28]:
    u_word = torch.tanh(x)
    print(u_word)
    print(u_word.shape)

tensor([[ 0.1038, -0.2326, -0.3469, -0.1494,  0.3810, -0.2461, -0.1340, -0.0056,
          0.0571,  0.1832, -0.1055,  0.0844,  0.0638,  0.0701,  0.2151,  0.3360,
         -0.0015, -0.2367,  0.1019, -0.2113, -0.3211, -0.2031,  0.1350, -0.0792,
         -0.0425, -0.1116, -0.1220,  0.3700,  0.2702, -0.2485,  0.1764,  0.0863],
        [ 0.3177,  0.0956, -0.3435, -0.1259,  0.4701,  0.0231, -0.2703, -0.0332,
          0.0639,  0.0405, -0.0914, -0.0727, -0.1017,  0.0492,  0.2156,  0.1925,
         -0.1630, -0.1958,  0.0360, -0.2998, -0.1341, -0.1015, -0.0518, -0.0662,
         -0.3162, -0.0023, -0.1563,  0.2849,  0.2168, -0.0012,  0.0010,  0.2616],
        [ 0.1042, -0.2499, -0.3044,  0.0171,  0.1686, -0.1423, -0.3887,  0.1000,
         -0.0268, -0.1520, -0.0733,  0.0041, -0.0480,  0.1671,  0.1273, -0.0411,
         -0.3640, -0.0981, -0.1018,  0.0642, -0.0469,  0.0624,  0.0605, -0.0558,
          0.1648,  0.0797, -0.0094,  0.0758,  0.2616, -0.1140,  0.1208,  0.2939],
        [ 0.0122, -0.3394

In [29]:
    y = u_w(u_word)
    print(y)
    print(y.shape)

tensor([[-0.0833],
        [-0.0854],
        [-0.0749],
        [-0.0837],
        [-0.0884],
        [-0.0896],
        [-0.0891],
        [-0.0878],
        [-0.0865],
        [-0.0854]], grad_fn=<MmBackward>)
torch.Size([10, 1])


In [30]:
    softmax = nn.Softmax(dim=0)

In [31]:
    word_weights = softmax(y)
    print(word_weights)
    print(word_weights.shape)
    print(word_weights.sum(dim=0))

tensor([[0.1002],
        [0.1000],
        [0.1011],
        [0.1002],
        [0.0997],
        [0.0996],
        [0.0996],
        [0.0998],
        [0.0999],
        [0.1000]], grad_fn=<SoftmaxBackward>)
torch.Size([10, 1])
tensor([1.0000], grad_fn=<SumBackward1>)


In [79]:
    word_attention_weights.append(word_weights)
    print(len(word_attention_weights))
    print(word_attention_weights)

1
[tensor([[0.0962],
        [0.0797],
        [0.0865],
        [0.0958],
        [0.1019],
        [0.1054],
        [0.1074],
        [0.1085],
        [0.1091],
        [0.1095]], grad_fn=<SoftmaxBackward>)]


Sentence Representation

In [32]:
    sent_summ_vector = (u_word * word_weights).sum(axis=0)
    print(sent_summ_vector)
    print(sent_summ_vector.shape)

tensor([ 0.0137, -0.3047, -0.3298,  0.0779,  0.0664, -0.2713, -0.3330,  0.3125,
        -0.1091, -0.1997, -0.1551,  0.0580,  0.0185,  0.2675,  0.1825, -0.1381,
        -0.3260, -0.2014, -0.0316,  0.1954, -0.0466,  0.1398,  0.1192,  0.0787,
         0.2843,  0.0927,  0.0130,  0.1462,  0.2271, -0.1703,  0.2011,  0.2857],
       grad_fn=<SumBackward1>)
torch.Size([32])


With the sentence Representation, do the sentence encoding

In [38]:
    sent_encoder = sentEncoder(HIDDEN_SIZE * 2).to(DEVICE)

sentEncoder(
  (gru): GRU(32, 32, bidirectional=True)
)


In [41]:
    print(sent_encoder)
    print(sent_encoder.hidden_size)
    print(sent_encoder.gru)
    print(sent_encoder)

sentEncoder(
  (gru): GRU(32, 32, bidirectional=True)
)
32
GRU(32, 32, bidirectional=True)
sentEncoder(
  (gru): GRU(32, 32, bidirectional=True)
)


In [39]:
    h0_sent = torch.zeros(2, 1, sent_encoder.hidden_size, dtype=float).to(DEVICE)

In [40]:
    print(h0_sent)
    print(h0_sent.shape)

tensor([[[0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
          0., 0., 0., 0., 0., 0., 0., 0., 0.]],

        [[0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
          0., 0., 0., 0., 0., 0., 0., 0., 0.]]], dtype=torch.float64)
torch.Size([2, 1, 32])


In [42]:
    _, h0_sent = sent_encoder(sent_summ_vector, h0_sent)
    print(_)
    print(_.shape)
    print()
    print(h0_sent)
    print(h0_sent.shape)

tensor([[[ 1.0205e-02, -4.0110e-02, -3.1047e-02, -1.0309e-01,  6.1025e-02,
           9.7053e-02,  2.3997e-02, -1.1006e-01, -7.9249e-02,  1.4116e-01,
          -1.1850e-01,  7.6383e-03, -6.2079e-02, -5.2774e-02,  4.4817e-02,
          -4.2933e-02,  1.0216e-02, -8.4887e-02,  5.0992e-02, -8.3613e-02,
           2.5807e-02,  1.3175e-01,  2.9545e-02,  4.9353e-02, -2.2592e-02,
          -6.6765e-02,  5.2205e-02,  1.1331e-01, -4.4384e-02,  9.8815e-03,
           9.3066e-02, -2.8365e-02, -1.3375e-01, -1.3185e-02,  2.2988e-02,
          -1.7301e-02,  1.2917e-01,  1.4210e-01, -9.3730e-02,  6.1013e-02,
          -4.9668e-02, -5.0703e-02,  9.2445e-02, -9.9274e-02,  8.8130e-02,
           6.3865e-02,  4.4382e-02, -7.2982e-02,  5.8645e-02, -6.2847e-02,
          -1.4688e-02,  7.2641e-02, -3.0726e-02, -7.3274e-02,  1.0716e-01,
           2.6822e-03, -1.5771e-01,  2.1897e-02,  3.5497e-02, -3.3932e-02,
           6.1915e-02, -1.2432e-04, -9.6328e-02,  7.9967e-02]]],
       grad_fn=<CatBackward>)
torch

Almacena h0 de cada sentencia

In [46]:
    print(h0_sent.squeeze().shape)
    sentenc_out[i] = h0_sent.squeeze()
    print(sentenc_out)

torch.Size([2, 32])
tensor([[[ 0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
           0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
           0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
           0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
           0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
           0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
           0.0000e+00,  0.0000e+00],
         [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
           0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
           0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
           0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
           0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
           0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
           0.0000e+00,  0.0000e+00]],

    

In [49]:
print(sentenc_out.view(sentenc_out.size(0), -1).shape)

torch.Size([4, 64])


In [51]:
sentenc_out = sentenc_out.view(sentenc_out.size(0), -1)
print(sentenc_out)
print(sentenc_out.shape)

tensor([[ 0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
          0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
          0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
          0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
          0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
          0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
          0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
          0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
          0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
          0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
          0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
          0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
          0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  0

## Sentence Attention

In [54]:
sent_attention = nn.Linear(sent_encoder.hidden_size * 2, sent_encoder.hidden_size*2)
u_s = nn.Linear(sent_encoder.hidden_size*2, 1, bias = False)

In [55]:
print(sent_attention)
print()
print(u_s)

Linear(in_features=64, out_features=64, bias=True)

Linear(in_features=64, out_features=1, bias=False)


In [56]:
x = sent_attention(sentenc_out)
print(x)
print(x.shape)

tensor([[ 0.0028, -0.1099,  0.0240, -0.0190,  0.1199, -0.0506, -0.1016, -0.0022,
         -0.0449, -0.0360, -0.0733, -0.0750,  0.0149,  0.0531, -0.0629, -0.0245,
         -0.0260, -0.0269,  0.0742, -0.1036, -0.1236,  0.0388, -0.0268,  0.0266,
          0.0487,  0.1230, -0.1080,  0.0132, -0.0438, -0.0700,  0.0354, -0.0659,
         -0.0402,  0.0860,  0.1248, -0.1116, -0.1109,  0.0659, -0.0903, -0.0599,
          0.0497,  0.1199, -0.0085, -0.0229,  0.1020,  0.0017, -0.0310, -0.1151,
         -0.0694, -0.1083,  0.0136, -0.0538, -0.0688,  0.0474,  0.1114, -0.0669,
         -0.0592,  0.0922,  0.0179,  0.0563,  0.1020,  0.1081, -0.0231,  0.1137],
        [ 0.0028, -0.1099,  0.0240, -0.0190,  0.1199, -0.0506, -0.1016, -0.0022,
         -0.0449, -0.0360, -0.0733, -0.0750,  0.0149,  0.0531, -0.0629, -0.0245,
         -0.0260, -0.0269,  0.0742, -0.1036, -0.1236,  0.0388, -0.0268,  0.0266,
          0.0487,  0.1230, -0.1080,  0.0132, -0.0438, -0.0700,  0.0354, -0.0659,
         -0.0402,  0.0860, 

In [57]:
u_sent = torch.tanh(x)
print(u_sent)
print(u_sent.shape)

tensor([[ 0.0028, -0.1094,  0.0240, -0.0190,  0.1193, -0.0505, -0.1012, -0.0022,
         -0.0449, -0.0360, -0.0731, -0.0749,  0.0149,  0.0531, -0.0628, -0.0245,
         -0.0260, -0.0269,  0.0741, -0.1032, -0.1230,  0.0388, -0.0268,  0.0266,
          0.0487,  0.1224, -0.1076,  0.0132, -0.0438, -0.0699,  0.0353, -0.0658,
         -0.0401,  0.0858,  0.1242, -0.1111, -0.1105,  0.0658, -0.0901, -0.0598,
          0.0497,  0.1193, -0.0085, -0.0229,  0.1017,  0.0017, -0.0310, -0.1146,
         -0.0693, -0.1078,  0.0136, -0.0538, -0.0687,  0.0473,  0.1109, -0.0668,
         -0.0591,  0.0920,  0.0179,  0.0562,  0.1017,  0.1077, -0.0231,  0.1132],
        [ 0.0028, -0.1094,  0.0240, -0.0190,  0.1193, -0.0505, -0.1012, -0.0022,
         -0.0449, -0.0360, -0.0731, -0.0749,  0.0149,  0.0531, -0.0628, -0.0245,
         -0.0260, -0.0269,  0.0741, -0.1032, -0.1230,  0.0388, -0.0268,  0.0266,
          0.0487,  0.1224, -0.1076,  0.0132, -0.0438, -0.0699,  0.0353, -0.0658,
         -0.0401,  0.0858, 

In [58]:
y = u_s(u_sent)
print(y)
print(y.shape)

tensor([[0.0157],
        [0.0157],
        [0.0157],
        [0.0575]], grad_fn=<MmBackward>)
torch.Size([4, 1])


Sentence weights

In [59]:
sent_weights = softmax(y)
print(sent_weights)
print(sent_weights.shape)

tensor([[0.2474],
        [0.2474],
        [0.2474],
        [0.2579]], grad_fn=<SoftmaxBackward>)
torch.Size([4, 1])


In [60]:
print(sent_weights.sum(dim=0))

tensor([1.], grad_fn=<SumBackward1>)


## Document vector

In [61]:
doc_summ_vector = (u_sent * sent_weights)
print(doc_summ_vector)
print(doc_summ_vector.shape)
print()
doc_summ_vector = doc_summ_vector.sum(axis=0)
print(doc_summ_vector)
print(doc_summ_vector.shape)

tensor([[ 0.0007, -0.0271,  0.0059, -0.0047,  0.0295, -0.0125, -0.0250, -0.0006,
         -0.0111, -0.0089, -0.0181, -0.0185,  0.0037,  0.0131, -0.0155, -0.0061,
         -0.0064, -0.0067,  0.0183, -0.0255, -0.0304,  0.0096, -0.0066,  0.0066,
          0.0120,  0.0303, -0.0266,  0.0033, -0.0108, -0.0173,  0.0087, -0.0163,
         -0.0099,  0.0212,  0.0307, -0.0275, -0.0273,  0.0163, -0.0223, -0.0148,
          0.0123,  0.0295, -0.0021, -0.0057,  0.0251,  0.0004, -0.0077, -0.0284,
         -0.0172, -0.0267,  0.0034, -0.0133, -0.0170,  0.0117,  0.0274, -0.0165,
         -0.0146,  0.0228,  0.0044,  0.0139,  0.0252,  0.0266, -0.0057,  0.0280],
        [ 0.0007, -0.0271,  0.0059, -0.0047,  0.0295, -0.0125, -0.0250, -0.0006,
         -0.0111, -0.0089, -0.0181, -0.0185,  0.0037,  0.0131, -0.0155, -0.0061,
         -0.0064, -0.0067,  0.0183, -0.0255, -0.0304,  0.0096, -0.0066,  0.0066,
          0.0120,  0.0303, -0.0266,  0.0033, -0.0108, -0.0173,  0.0087, -0.0163,
         -0.0099,  0.0212, 

In [62]:
z= (u_sent * sent_weights)

Final Layer


In [64]:
dense_out = nn.Linear(sent_encoder.hidden_size*2, NUM_CLASSES)
print(dense_out)

Linear(in_features=64, out_features=6, bias=True)


In [65]:
out = dense_out(doc_summ_vector)
print(out)
print(out.shape)

tensor([-0.1060,  0.0195, -0.0629,  0.0993, -0.1410, -0.0569],
       grad_fn=<AddBackward0>)
torch.Size([6])
