<a href="https://colab.research.google.com/github/mohripan/Belajar-NLP/blob/main/NLP_and_Sequence_Model_Part_6_LSTM.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
import spacy
from gensim.models import Word2Vec

In [15]:
# Our data
sentences = ['Believe in yourself and your abilities.',
             'Never give up on your dreams, no matter how big or small they may seem.',
             'Keep going, even when things get tough.',
             'A flock of flamingos.',
             'An octopus eight arms.',
             'A popsicle on a hot day.']

labels = [1, 1, 1, 0, 0, 0]

In [16]:
# Text preprocessing
nlp = spacy.load('en_core_web_sm')
tokenized_sentences = []
for sentence in sentences:
  doc = nlp(sentence)
  tokenized_sentences.append([token.lemma_ for token in doc])

In [17]:
# Training word2vec
w2v = Word2Vec(tokenized_sentences, vector_size = 50, min_count = 1, window = 5)

In [18]:
# Getting Embeddings
embeddings = []
for sentence in tokenized_sentences:
  embeddings.append([w2v.wv[word] for word in sentence])

In [19]:
# Convert list of embeddings into tensor
embeddings  = [torch.FloatTensor(sentence) for sentence in embeddings]

# Padding sequences
embeddings = nn.utils.rnn.pad_sequence(embeddings, batch_first = True)

# Converting label to tensor
labels = torch.tensor(labels)

In [20]:
embeddings.shape

torch.Size([6, 17, 50])

In [21]:
# LSTM model
class LSTM(nn.Module):
  def __init__(self, input_size, hidden_size, output_size):
    super(LSTM, self).__init__()
    self.hidden_size = hidden_size
    self.lstm = nn.LSTM(input_size, hidden_size, batch_first = True)
    self.fc = nn.Linear(hidden_size, output_size)

  def forward(self, x):
    lstm_out, _ = self.lstm(x)
    last_out = lstm_out[:, -1, :]
    out = self.fc(last_out)
    return out

In [22]:
# Model Initialization
model = LSTM(50, 32, 1)
criterion = nn.BCEWithLogitsLoss()
optimizer = optim.Adam(model.parameters(), lr = 0.01)

In [23]:
# Training the model
for epoch in range(50):
  model.zero_grad()
  output = model(embeddings)
  loss = criterion(output.squeeze(), labels.float())
  loss.backward()
  optimizer.step()

  print(f'Epoch: {epoch} | Loss: {loss.item():.4f}')

Epoch: 0 | Loss: 0.6970
Epoch: 1 | Loss: 0.6940
Epoch: 2 | Loss: 0.6924
Epoch: 3 | Loss: 0.6920
Epoch: 4 | Loss: 0.6921
Epoch: 5 | Loss: 0.6915
Epoch: 6 | Loss: 0.6907
Epoch: 7 | Loss: 0.6900
Epoch: 8 | Loss: 0.6895
Epoch: 9 | Loss: 0.6889
Epoch: 10 | Loss: 0.6882
Epoch: 11 | Loss: 0.6872
Epoch: 12 | Loss: 0.6859
Epoch: 13 | Loss: 0.6843
Epoch: 14 | Loss: 0.6824
Epoch: 15 | Loss: 0.6799
Epoch: 16 | Loss: 0.6759
Epoch: 17 | Loss: 0.6707
Epoch: 18 | Loss: 0.6594
Epoch: 19 | Loss: 0.6474
Epoch: 20 | Loss: 0.6627
Epoch: 21 | Loss: 0.6404
Epoch: 22 | Loss: 0.6339
Epoch: 23 | Loss: 0.6293
Epoch: 24 | Loss: 0.6147
Epoch: 25 | Loss: 0.5584
Epoch: 26 | Loss: 0.4606
Epoch: 27 | Loss: 0.7264
Epoch: 28 | Loss: 0.4784
Epoch: 29 | Loss: 0.4522
Epoch: 30 | Loss: 0.2923
Epoch: 31 | Loss: 0.4316
Epoch: 32 | Loss: 0.3557
Epoch: 33 | Loss: 0.1698
Epoch: 34 | Loss: 0.2131
Epoch: 35 | Loss: 0.1080
Epoch: 36 | Loss: 0.0607
Epoch: 37 | Loss: 0.0520
Epoch: 38 | Loss: 0.0668
Epoch: 39 | Loss: 0.0436
Epoch: 40 