In [12]:
import torch
import torch.nn as nn
import utils
from utils import END_TOKEN, UNKNOWN_TOKEN

class seq2seq(nn.Module):
    def __init__(self, post_embeddings, comment_embeddings, device):
        super().__init__()
        self.post_embeddings = post_embeddings
        self.comment_embeddings = comment_embeddings
        self.encoder = Encoder(self.post_embeddings)
        self.decoder = Decoder(self.comment_embeddings)
        self.device = device
    
    def forward(self, encoded_post_body, encoded_target_comment, tf_ratio=0.5):
        embedded = self.post_embeddings(encoded_post_body)
        out, context = self.encoder(embedded)
        
        return context

class Encoder(nn.Module):
    def __init__(self, post_embeddings):
        super().__init__()
        self.post_embeddings = post_embeddings
        self.encoder = nn.GRU(30, 1200, 3, batch_first=True, bidirectional=False)
        
    def forward(self, x):
        embedded = self.post_embedding(x)
        # push vector through encoder
        # then take just the hidden vectors as the context vectors
        out, h_n = self.encoder(embedded)

        return h_n

# only for proof of concept-- cant work with batches
class Decoder(nn.Module):
    def __init__(self, comment_embeddings):
        super().__init__()
        self.comment_embeddings = comment_embeddings
        self.decoder = nn.GRU(30, 1200, 3, batch_first=True, bidirectional=False)
        self.fc = nn.Linear(1200, comment_embeddings.num_embeddings)

    def forward(self, context, last_output_word):
        """
        Since this function gets called once at a time rather than taking in
        a sequence of vectors, we need to pass it the last output. This will be just
        a vector of numbers that can be converted to the embedding representing that last output
        """
        embedded = self.comment_embeddings(last_output_word)
        out, h_n = self.decoder(context, embedded)

        return h_n


In [2]:
post_word_to_idx, post_embeddings = utils.get_embeddings("post_embeddings_W2V_30_1000")
comment_word_to_idx, comment_embeddings = utils.get_embeddings("comment_embeddings_W2V_30_1000")


  weights = torch.FloatTensor([gse.wv.get_vector(word) for word in gse.wv.index_to_key])


In [3]:

df = utils.read_json("example.json")
df = utils.preprocess(df)


In [13]:
m = seq2seq(post_embeddings, comment_embeddings, "cpu")
# Decoder(comment_embeddings)
# comment_embeddings.num_embeddings

In [None]:
m(torch.LongTensor([[1,2]]), )

In [76]:
data = df.iloc[31]["post_body"]
data = [post_word_to_idx[word] if word in post_word_to_idx else post_word_to_idx[UNKNOWN_TOKEN] for word in data]
data = torch.LongTensor([data, data])
# emb = post_embeddings(data)
# print(emb.shape)
# m(data)
n = nn.GRU(30, 1200, 3, batch_first=True, bidirectional=False)
m(data)
# data.shape

torch.Size([2, 258, 30])
tensor([[[ 0.0045, -0.0141, -0.0066,  ..., -0.0049, -0.0082, -0.0111],
         [-0.0032, -0.0130, -0.0112,  ..., -0.0062, -0.0115, -0.0153],
         [ 0.0050, -0.0119, -0.0173,  ...,  0.0010, -0.0089, -0.0127],
         ...,
         [ 0.0468, -0.0095, -0.0345,  ...,  0.0194,  0.0504, -0.0430],
         [ 0.0382, -0.0046, -0.0411,  ...,  0.0134,  0.0509, -0.0429],
         [ 0.0283, -0.0049, -0.0542,  ...,  0.0088,  0.0707, -0.0401]],

        [[ 0.0045, -0.0141, -0.0066,  ..., -0.0049, -0.0082, -0.0111],
         [-0.0032, -0.0130, -0.0112,  ..., -0.0062, -0.0115, -0.0153],
         [ 0.0050, -0.0119, -0.0173,  ...,  0.0010, -0.0089, -0.0127],
         ...,
         [ 0.0468, -0.0095, -0.0345,  ...,  0.0194,  0.0504, -0.0430],
         [ 0.0382, -0.0046, -0.0411,  ...,  0.0134,  0.0509, -0.0429],
         [ 0.0283, -0.0049, -0.0542,  ...,  0.0088,  0.0707, -0.0401]]],
       grad_fn=<TransposeBackward1>)
____+_
tensor([[[-0.0676, -0.3967, -0.1826,  ..., -0.1

tensor([[[ 0.0045, -0.0141, -0.0066,  ..., -0.0049, -0.0082, -0.0111],
         [-0.0032, -0.0130, -0.0112,  ..., -0.0062, -0.0115, -0.0153],
         [ 0.0050, -0.0119, -0.0173,  ...,  0.0010, -0.0089, -0.0127],
         ...,
         [ 0.0468, -0.0095, -0.0345,  ...,  0.0194,  0.0504, -0.0430],
         [ 0.0382, -0.0046, -0.0411,  ...,  0.0134,  0.0509, -0.0429],
         [ 0.0283, -0.0049, -0.0542,  ...,  0.0088,  0.0707, -0.0401]],

        [[ 0.0045, -0.0141, -0.0066,  ..., -0.0049, -0.0082, -0.0111],
         [-0.0032, -0.0130, -0.0112,  ..., -0.0062, -0.0115, -0.0153],
         [ 0.0050, -0.0119, -0.0173,  ...,  0.0010, -0.0089, -0.0127],
         ...,
         [ 0.0468, -0.0095, -0.0345,  ...,  0.0194,  0.0504, -0.0430],
         [ 0.0382, -0.0046, -0.0411,  ...,  0.0134,  0.0509, -0.0429],
         [ 0.0283, -0.0049, -0.0542,  ...,  0.0088,  0.0707, -0.0401]]],
       grad_fn=<TransposeBackward1>)

In [42]:
print(d.shape)

torch.Size([1, 2, 30])
