In [0]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


https://www.analyticsvidhya.com/blog/2020/01/first-text-classification-in-pytorch/

https://github.com/Shawn1993/cnn-text-classification-pytorch/blob/master/train.py

https://arxiv.org/abs/1408.5882

https://www.kaggle.com/leighplt/pytorch-torchtext-glove

https://pytorch.org/docs/stable/nn.html#conv2d

In [0]:
from __future__ import print_function, division
import os
import torch
import pandas as pd
from skimage import io, transform
import numpy as np
import matplotlib.pyplot as plt
from torch.utils.data import Dataset, DataLoader
import torch.optim as optim
from torchvision import transforms, utils
import random
SEED = 1200

# Ignore warnings
import warnings
warnings.filterwarnings("ignore")

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')  

In [0]:
import pandas as pd
clintontrump_path = "/content/drive/My Drive/6.864/finalproject/clinton_trump_clean.csv"
alltweets_path = "/content/drive/My Drive/6.864/finalproject/ExtractedTweets.csv"

clintontrump = pd.read_csv(clintontrump_path)
alltweets = pd.read_csv(alltweets_path)
# clintontrump = clintontrump[['id', 'handle', 'text']].rename(columns={'id':'Party', 'handle':'Handle', 'text':'Tweet'})


In [0]:
print(alltweets[0:1]["Tweet"])
print(alltweets.columns)

0    Today, Senate Dems vote to #SaveTheInternet. P...
Name: Tweet, dtype: object
Index(['Party', 'Handle', 'Tweet'], dtype='object')


In [0]:
from torchtext import data    
TEXT = data.Field(tokenize='spacy',batch_first=True,include_lengths=True)
LABEL = data.LabelField(dtype = torch.long, batch_first=True)

#CLTR VS ALL
fields = [('Party', None), ('Handle',LABEL),('Tweet', TEXT)]
# fields = [('Party', LABEL), ('Handle',None),('Tweet', TEXT)]

#CLTR VS ALL
#loading custom dataset
# all_data=data.TabularDataset(path = clintontrump_path,format = 'csv',fields = fields,skip_header = True)
all_data=data.TabularDataset(path = alltweets_path,format = 'csv',fields = fields,skip_header = True)

In [0]:
#CLTR VS ALL
train_data, valid_data, test_data = all_data.split(split_ratio=[0.7, 0.2, 0.1], stratified=True, strata_field = 'Handle', random_state = random.seed(SEED))
# train_data, valid_data, test_data = all_data.split(split_ratio=[0.7, 0.2, 0.1], stratified=True, strata_field = 'Party', random_state = random.seed(SEED))

TEXT.build_vocab(train_data, min_freq=3 , vectors = "glove.6B.100d")  
LABEL.build_vocab(train_data)


In [0]:
print(len(train_data))

60518


In [0]:
print("Size of TEXT vocabulary:",len(TEXT.vocab))
print("Size of LABEL vocabulary:",len(LABEL.vocab))
print(TEXT.vocab.freqs.most_common(30))  
print(TEXT.vocab.stoi)   

VOCAB_SIZE = len(TEXT.vocab)
NUM_CATEGORIES = len(LABEL.vocab)

embs_vocab = TEXT.vocab.vectors

print(NUM_CATEGORIES)

In [0]:
BATCH_SIZE = 64

train_iterator, valid_iterator, test_iterator = data.BucketIterator.splits(
    (train_data, valid_data, test_data), 
    batch_sizes = (BATCH_SIZE, BATCH_SIZE, len(test_data)),
    sort_key = lambda x: len(x.Tweet),
    sort_within_batch=True,
    device = device)

In [0]:
import torch.nn as nn
import torch.nn.functional as F
from torch.nn.utils.rnn import pack_padded_sequence, pad_packed_sequence

class CNN(nn.Module):
  def __init__(self, embs_vocab, embed_size, hidden_size, num_conv_layers, num_categories, dropout=0.):
    super(CNN, self).__init__()

    input_channels = 1
    vocab_size = len(embs_vocab)
    embed_size = embed_size #TODO
    hidden_size = hidden_size
    num_conv_layers = num_conv_layers #UNUSED
    num_categories = num_categories 

    self.embed = nn.Embedding.from_pretrained(embs_vocab)
  
    self.conv13 = nn.Conv2d(input_channels, hidden_size, (3, embed_size))
    self.conv14 = nn.Conv2d(input_channels, hidden_size, (4, embed_size))
    self.conv15 = nn.Conv2d(input_channels, hidden_size, (5, embed_size))

    self.dropout = nn.Dropout(dropout)
    self.fc = nn.Linear(num_conv_layers*hidden_size, num_categories)

  def conv_and_pool(self, x, conv):
    x = F.relu(conv(x)).squeeze(3)  # (batch_size, hidden_size, sent_length)
    x = F.max_pool1d(x, x.size(2)).squeeze(2)
    return x

  def forward(self, x, x_length):
    #TODO: PADDING ???? 
    x = self.embed(x)  # (batch_size, sent_length, embed_dim) ??
    x = x.unsqueeze(1)  # (batch_size, input_channels, sent_length, embed_dim) ??
    # print('after unsqueeze', x.size())

    x1 = self.conv_and_pool(x,self.conv13) #(batch_size, hidden_size)
    x2 = self.conv_and_pool(x,self.conv14) #(batch_size, hidden_size)
    x3 = self.conv_and_pool(x,self.conv15) #(batch_size, hidden_size)
    x = torch.cat((x1, x2, x3), 1) # (batch_size, 3*hidden_size)

    x = self.dropout(x)  # (batch_size, 3*hidden_size)
    logits = self.fc(x)  # (batch_size, num_categories)
    return logits


In [0]:
def train(model, iterator, optimizer, criterion):
    epoch_loss = 0
    iter_count = 0
    print_every = 50

    model.train()

    for batch in iterator:
        optimizer.zero_grad()   

        text, text_lengths = batch.Tweet   
        
        if text.size(1) < 5:
          continue 

        predictions = model(text, text_lengths)
        
        #CLTR VS ALL
        loss = criterion(predictions, batch.Handle)   
        # loss = criterion(predictions, batch.Party)             
        loss.backward()       
        optimizer.step()      
        
        epoch_loss += loss.item()  

        if iter_count % print_every == 0:
            print('%d %.4f' % (iter_count, loss))
        iter_count+=1

    return epoch_loss / len(iterator)

In [0]:
def evaluate(model, iterator, criterion):
    epoch_loss = 0

    #no dropout layers
    model.eval()
    
    #no autograd
    with torch.no_grad():
        for batch in iterator:
            #retrieve text 
            text, text_lengths = batch.Tweet   
            
                    
            if text.size(1) < 5:
              continue 

            predictions = model(text, text_lengths)
            
             #CLTR VS ALL
            #compute loss and accuracy
            loss = criterion(predictions, batch.Handle)   
            # loss = criterion(predictions, batch.Party)  
            epoch_loss += loss.item()

    return epoch_loss / len(iterator)        

In [0]:
def train_and_validate(model, train_iterator, valid_iterator, optimizer, criterion, n_epochs):
  N_EPOCHS = n_epochs
  best_valid_loss = float('inf')

  for epoch in range(N_EPOCHS):
      
      train_loss = train(model, train_iterator, optimizer, criterion)
      valid_loss = evaluate(model, valid_iterator, criterion)
      
      #save the best model
      if valid_loss < best_valid_loss:
          best_valid_loss = valid_loss
          torch.save(model.state_dict(), 'saved_weights.pt')
      
      print(f'\t Train Loss: {train_loss:.3f}')
      print(f'\t Val. Loss: {valid_loss:.3f}')

In [0]:


import sklearn.metrics

def test_scores(model, test_iterator):
  # correct = 0
  # total = 0
  with torch.no_grad():
    #there should be only one batch
      for batch in test_iterator:
        text, text_lengths = batch.Tweet
        
        #CLTR VS ALL
        labels = batch.Handle
        # labels = batch.Party

        if text.size(1) < 5:
          continue 
        predictions = model(text, text_lengths)
        _, pred_classes = torch.max(predictions, 1)

        print(text[pred_classes != labels][0])



        # for val in range(200):
                  
        #   w2id = TEXT.vocab.stoi
        #   id2w = dict([(value, key) for key, value in w2id.items()])
        #   textttt = [id2w[i.item()] for i in text[pred_classes != labels][val]] 
        #   if textttt[0]!="":
        #     print(" ".join(textttt))
        #     print(labels[pred_classes != labels][val])
        #     # print(labels[pred_classes != labels][val])


        # total += labels.size(0)
        # correct += (pred_classes == labels).sum().item()

        print("ACCURACY", sklearn.metrics.accuracy_score(labels.cpu().numpy(),  pred_classes.cpu().numpy()).round(3))
        print("F1 SCORE", sklearn.metrics.f1_score(labels.cpu().numpy(),  pred_classes.cpu().numpy(), average='micro').round(3))
        print("PRECISION", sklearn.metrics.precision_score(labels.cpu().numpy(),  pred_classes.cpu().numpy(), average='micro').round(3))
        print("RECALL", sklearn.metrics.recall_score(labels.cpu().numpy(),  pred_classes.cpu().numpy(), average='micro').round(3))

        # return sklearn.metrics.f1_score(labels.cpu().numpy(),  pred_classes.cpu().numpy()).round(3)



In [0]:
CNN_EMBED_DIM = 100
# CNN_HIDDEN_DIM = 100
CNN_NUM_CONV_LAYERS = 3
CNN_DROPOUT = 0.3
CNN_LEARNING_RATE = 0.01
NUM_EPOCHS = 5

f1s = []

for i in [5, 10, 20, 50, 100, 500]:
  CNN_HIDDEN_DIM = i 
  cnn_model = CNN(embs_vocab, CNN_EMBED_DIM, CNN_HIDDEN_DIM, CNN_NUM_CONV_LAYERS, NUM_CATEGORIES, dropout=CNN_DROPOUT).to(device)
  cnn_optimizer = optim.Adam(cnn_model.parameters(), lr = CNN_LEARNING_RATE)
  cnn_criterion = nn.CrossEntropyLoss()
  train_and_validate(cnn_model, train_iterator, valid_iterator, cnn_optimizer, cnn_criterion, NUM_EPOCHS)
  f1 = test_scores(cnn_model, test_iterator)
  f1s.append(f1)

In [0]:
# test_scores(cnn_model, test_iterator)     

In [0]:
print(f1s)

[0.584, 0.791, 0.69, 0.817, 0.786, 0.675]


#Bi-Directional LSTM

In [0]:
class BiDiLSTM(nn.Module):
  def __init__(self, embs_vocab, embed_size, hidden_size, num_layers, num_categories, bidirectional=True, dropout=0.):
    super(BiDiLSTM, self).__init__()

    vocab_size = len(embs_vocab)
    embed_size = embed_size #TODO
    hidden_size = hidden_size
    num_layers = num_layers
    num_categories = num_categories 
    self.bidirectional = bidirectional

    self.embed = nn.Embedding.from_pretrained(embs_vocab)
  
    #lstm layer
    self.lstm = nn.LSTM(embed_size, 
                        hidden_size, 
                        num_layers=num_layers, 
                        bidirectional=self.bidirectional, 
                        dropout=dropout,
                        batch_first=True)
    
    #dense layer
    if self.bidirectional:
      self.fc = nn.Linear(hidden_size * 2, num_categories)
    else:
      self.fc = nn.Linear(hidden_size, num_categories)


  def forward(self, text, text_lengths):
    embedded = self.embed(text) #[batch size, sent_len, emb dim]
    packed = nn.utils.rnn.pack_padded_sequence(embedded, text_lengths, batch_first=True)
    
    output, (hidden, cell) = self.lstm(packed)

    #hidden = [batch size, num layers * num directions,hid dim]
    #cell = [batch size, num layers * num directions,hid dim]
    
    #concat the final forward and backward hidden state
    if self.bidirectional:
      hidden = torch.cat((hidden[-2,:,:], hidden[-1,:,:]), dim = 1)
    else:
      hidden = hidden.squeeze()   
       
    #hidden = [batch size, hid dim * num directions]
    logits = self.fc(hidden)

    return logits


In [0]:
LSTM_EMBED_DIM = 100
# LSTM_HIDDEN_DIM = 82
LSTM_NUM_LAYERS = 1
LSTM_DROPOUT = 0.3
LSTM_LEARNING_RATE = 0.001
BIDIRECTIONAL = True
NUM_EPOCHS = 5


f1s = []

for i in [20, 50]:
  LSTM_HIDDEN_DIM = i
  lstm_model = BiDiLSTM(embs_vocab, LSTM_EMBED_DIM, LSTM_HIDDEN_DIM, LSTM_NUM_LAYERS, NUM_CATEGORIES,bidirectional = BIDIRECTIONAL, dropout=LSTM_DROPOUT).to(device)
  lstm_optimizer = optim.Adam(lstm_model.parameters(), lr = LSTM_LEARNING_RATE)
  lstm_criterion = nn.CrossEntropyLoss()
  train_and_validate(lstm_model, train_iterator, valid_iterator, lstm_optimizer, lstm_criterion, NUM_EPOCHS)
  f1 = test_scores(lstm_model, test_iterator)
  f1s.append(f1)


In [0]:
f1s

[0.63, 0.124, 0.312, 0.152, 0.047, 0.815]

In [0]:
test_scores(lstm_model, test_iterator)

ACCURACY 0.841
F1 SCORE 0.837
PRECISION 0.791
RECALL 0.889


0.837

In [0]:
#No. of trainable parameters
def count_parameters(model):
    return sum(p.numel() for p in model.parameters() if p.requires_grad)
    
print(f'The LSTM model has {count_parameters(lstm_model):,} trainable parameters')
print(f'The CNN model has {count_parameters(cnn_model):,} trainable parameters')

The LSTM model has 2,410,002 trainable parameters
The CNN model has 604,502 trainable parameters


In [0]:
w2id = TEXT.vocab.stoi
l2id = LABEL.vocab.stoi
id2l = dict([(value, key) for key, value in l2id.items()])

text = "Make america great again."
# "We need to provide better healthcare"
# "We need to tax less"
# "We need to tax more"


import torchtext
from torchtext.data import get_tokenizer

tokenizer = get_tokenizer("spacy")
tokens = tokenizer(text)

encoding = torch.tensor([[w2id[token] for token in tokens]]).to(device)
length = torch.tensor([encoding.size(1)])

predictions = lstm_model(encoding, length)
_, pred_classes = torch.max(predictions, 1)



cnn_predictions = cnn_model(encoding, length)
_, cnn_pred_classes = torch.max(cnn_predictions, 1)

print('CNN')
print(F.softmax(cnn_predictions))
print(text, " LIKELY COMES FROM ", id2l[cnn_pred_classes.item()])

print('LSTM')
print(F.softmax(predictions))
print(text, " LIKELY COMES FROM ", id2l[pred_classes.item()])


CNN
tensor([[0.6308, 0.3692]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Make america great again.  LIKELY COMES FROM  realDonaldTrump
LSTM
tensor([[0.5658, 0.4342]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Make america great again.  LIKELY COMES FROM  realDonaldTrump


In [0]:
l2id = LABEL.vocab.stoi
l2id

defaultdict(<function torchtext.vocab._default_unk_index>,
            {'Democrat': 1, 'Republican': 0})

In [0]:
w2id = TEXT.vocab.stoi
w2id

In [0]:
test_scores(lstm_model, test_iterator)


tensor([  14, 2565,    9,   41, 1432,   21,    4,   21,   67,   41, 1078, 1707,
          41,  198,   21,    4,   21,   67,   21,   13,   21,  906,    5,   41,
        3170,   11,  198,   21,    4,   21,   67,  733, 1717, 1037,  847,   16,
        1271,    6,   10,    2,    2,    3], device='cuda:0')
In 2017 # Nevadans in 10 counties , 7 in # NV02 , will have just 1 choice , it ’s unfair for them pay a penalty bc of Obama 's failed law ( 3/3 ) <pad> <pad> <pad> <pad> <pad> <pad>
tensor(0, device='cuda:0')
I am sorry to hear about my friend & amp ; colleague @DorisMatsui ’s car accident .   I ’m glad to know she is doing well & amp ; wish her a speedy recovery <pad> <pad> <pad> <pad> <pad> <pad> <pad>
tensor(1, device='cuda:0')
Travel for def & amp ; mil personnel is not a https://t.co/ucKdra1SGZ , often it is mandatory & amp ; imperative to our security . I 'm glad we addressed # https://t.co/ucKdra1SGZ in # NDAA17 <pad> <pad> <pad> <pad> <pad> <pad> <pad>
tensor(1, device='cuda:0')
Li

0.669

# Attention Encoder

In [0]:
import torch.nn as nn
import torch.nn.functional as F
from torch.nn.utils.rnn import pack_padded_sequence, pad_packed_sequence

class Encoder(nn.Module):
  def __init__(self, input_size, hidden_size, dropout=0.):
    """
    Inputs: 
      - `input_size`: an int representing the RNN input size.
      - `hidden_size`: an int representing the RNN hidden size.
    """
    super(Encoder, self).__init__()
    self.rnn = nn.GRU(input_size, hidden_size, num_layers=1, batch_first=True,
                      dropout=dropout, bidirectional=True)

  def forward(self, inputs, lengths):
    """
    Inputs:
      - `inputs`: a 3d-tensor of shape (batch_size, max_seq_length, embed_size)
          representing a batch of padded embedded word vectors of source
          sentences.
      - `lengths`: a 1d-tensor of shape (batch_size,) representing the sequence
          lengths of `inputs`.

    Returns:
      - `outputs`: a 3d-tensor of shape
        (batch_size, max_seq_length, hidden_size).
      - `finals`: a 3d-tensor of shape (num_layers, batch_size, hidden_size).
    """
    outputs, finals = self.rnn(inputs)
    return outputs, finals

class Attention(nn.Module):
  def __init__(self, input_size, hidden_size):
    super(Attention, self).__init__()
    self.attn = nn.Linear(hidden_size, hidden_size)
    self.softmax = nn.Softmax(dim=1)

  def forward(self, dec_out, enc_outs):
    """
      - `dec_out`: a 3d-tensor of shape (1, batch_size, hidden_size) representing
          the decoder hidden state.
      - `enc_outs`: a 3d-tensor of shape
          (batch_size, max_seq_length, hidden_size) representing the encoder
          outputs for each decoding step to attend to. 
    """
    weighted_enc_hidden = self.attn(enc_outs) #dims: batch x seq_len X hidden
    energies = torch.bmm(weighted_enc_hidden, dec_out.permute(1, 2, 0)) #dims: batch x seq_len x 1
    alphas = self.softmax(energies)
    return alphas #dims: batch x seq_len x 1
    
class AttentionDecoder(nn.Module):
  """An attention-based RNN decoder."""

  def __init__(self, input_size, hidden_size, attention=None, dropout=0.):
    """
      Inputs:
        - `input_size`, `hidden_size`, and `dropout` the same as in Encoder.
        - `attention`: this is your self-defined Attention object. You can
            either define an individual class for your Attention and pass it
            here or leave `attention` as None and just implement everything
            here.
    """
    super(AttentionDecoder, self).__init__()

    self.rnn = nn.GRU(input_size, hidden_size, num_layers=1, batch_first=True,
                      dropout=dropout, bidirectional=False)
    
    self.attn = attention

    if attention is None:
      self.attn = Attention(input_size, hidden_size)

    self.w = nn.Linear(hidden_size * 2, hidden_size)
    
  def forward(self, inputs, encoder_hiddens, encoder_finals,  src_mask,
              trg_mask, hidden=None, max_len=None):
    """Unroll the decoder one step at a time.
    
    Inputs:
      - `inputs`: a 3d-tensor of shape (batch_size, max_seq_length, embed_size)
          representing a batch of padded embedded word vectors of target
          sentences (for teacher-forcing during training).
      - `encoder_hiddens`: a 3d-tensor of shape
          (batch_size, max_seq_length, hidden_size) representing the encoder
          outputs for each decoding step to attend to. 
      - `encoder_finals`: a 3d-tensor of shape
          (num_enc_layers, batch_size, hidden_size) representing the final
          encoder hidden states used to initialize the initial decoder hidden
          states.
      - `src_mask`: a 3d-tensor of shape (batch_size, 1, max_seq_length)
          representing the mask for source sentences.
      - `trg_mask`: a 3d-tensor of shape (batch_size, 1, max_seq_length)
          representing the mask for target sentences.
      - `hidden`: a 3d-tensor of shape (1, batch_size, hidden_size) representing
          the value to be used to initialize the initial decoder hidden states.
          If None, then use `encoder_finals`.
      - `max_len`: an int representing the maximum decoding length.

    Returns:
      - `outputs`: (same as in Decoder) a 3d-tensor of shape
          (batch_size, max_seq_length, hidden_size) representing the raw
          decoder outputs (before converting to a `trg_vocab_size`-dim vector).
      - `hidden`: a 3d-tensor of shape (1, batch_size, hidden_size)
          representing the last decoder hidden state.
    """

    # The maximum number of steps to unroll the RNN.
    if max_len is None:
      # max_len = trg_mask.size(-1)
        max_len = inputs.size(1)

    if hidden is None:
      hidden = self.init_hidden(encoder_finals)


    outputs = []
    all_alphas = []
    
    #todo: add connection of context to next hidden state
    #todo maybe: use dec_out instead of hidden for attention 

    for i in range(max_len):
      dec_out, hidden = self.rnn(inputs[:, i:i+1, :], hidden) # hidden dims: (1, batch_size, hidden_size)

      alphas = self.attn(hidden, encoder_hiddens)  #dims: batch x seq_len x 1
      all_alphas.append(alphas)
      context = torch.bmm(encoder_hiddens.permute(0, 2, 1), alphas)  #dims: batch x hidden_size x 1
      out = self.w(torch.cat((context.permute(2,0,1), hidden), dim=2)).squeeze(0) #dims: 1 x batch x hidden_size
      outputs.append(out)

    outputs = torch.stack(outputs, dim=1)  
    return hidden, outputs, all_alphas

  def init_hidden(self, encoder_finals):
    """Use encoder final hidden state to initialize decoder's first hidden
    state."""
    decoder_init_hiddens = encoder_finals
    ### Your code here!

    return decoder_init_hiddens

class EncoderAttentionDecoder(nn.Module):
  """A Encoder-Decoder architecture with attention.
  """
  def __init__(self, encoder, decoder, src_embed , trg_embed, generator):
    """
    Inputs:
      - `encoder`: an `Encoder` object.
      - `decoder`: an `AttentionDecoder` object.
      - `src_embed`: an nn.Embedding object representing the lookup table for
          input (source) sentences.
      - `trg_embed`: an nn.Embedding object representing the lookup table for
          output (target) sentences.
      - `generator`: a `Generator` object. Essentially a linear mapping. See
          the next code cell.
    """
    super(EncoderAttentionDecoder, self).__init__()

    self.encoder = encoder
    self.decoder = decoder
    self.src_embed = src_embed
    self.trg_embed = trg_embed
    self.generator = generator

  def forward(self, src_ids, trg_ids, src_lengths):
    """Take in and process masked source and tar get sequences.

    Inputs:
      `src_ids`: a 2d-tensor of shape (batch_size, max_seq_length) representing
        a batch of source sentences of word ids.
      `trg_ids`: a 2d-tensor of shape (batch_size, max_seq_length) representing
        a batch of target sentences of word ids.
      `src_lengths`: a 1d-tensor of shape (batch_size,) representing the
        sequence length of `src_ids`.

    Returns the decoder outputs, see the above cell.
    """
    ### Your code here!
    # You can refer to `EncoderDecoder` and extend from it.
    encoder_hiddens, encoder_finals = self.encode(src_ids, src_lengths)
    return self.decode(encoder_hiddens, encoder_finals, trg_ids[:, :-1])

  def encode(self, src_ids, src_lengths):
    return self.encoder(self.src_embed(src_ids), src_lengths)
    
  def decode(self, encoder_hiddens, encoder_finals, trg_ids, decoder_hidden=None):
    return self.decoder(self.trg_embed(trg_ids), encoder_hiddens, encoder_finals, None, None)

In [0]:

LSTM_EMBED_DIM = 100
LSTM_HIDDEN_DIM = 82
LSTM_NUM_LAYERS = 1
LSTM_DROPOUT = 0.3
LSTM_LEARNING_RATE = 0.001
BIDIRECTIONAL = True

encoder = Encoder(LSTM_EMBED_DIM, LSTM_HIDDEN_DIM, LSTM_DROPOUT)
decoder = AttentionDecoder(LSTM_EMBED_DIM, LSTM_HIDDEN_DIM, dropout = LSTM_DROPOUT)
encdec  = EncoderAttentionDecoder(encoder, decoder, nn.Embedding.from_pretrained(embs_vocab), nn.Embedding.from_pretrained(embs_vocab) )

# lstm_model = BiDiLSTM(embs_vocab, LSTM_EMBED_DIM, LSTM_HIDDEN_DIM, LSTM_NUM_LAYERS, NUM_CATEGORIES,bidirectional = BIDIRECTIONAL, dropout=LSTM_DROPOUT).to(device)
# lstm_optimizer = optim.Adam(lstm_model.parameters(), lr = LSTM_LEARNING_RATE)
# lstm_criterion = nn.CrossEntropyLoss()
