# LSTM and GRU 


# PyTorch Implementation

In [31]:
# importing required libraries 

import pandas as pd

# for pytorch imports
import torch

# for functional dependencies like activation function 
import torch.nn.functional as F

# nn is basic module in Torch which provide different neural network architecture
import torch.nn as nn

# for optimizer
import torch.optim as optim

# CountVectorizer for Bagof words model
from sklearn.feature_extraction.text import CountVectorizer

# for padding .. since the LSTM takes input as sequence so it is said that 
#if we have fixed input string computation will be faster and it will improve performance 
from torch.nn.utils.rnn import pack_padded_sequence, pad_packed_sequence
from torch.utils.data import DataLoader, Dataset
from tqdm import tqdm, tqdm_notebook
from torch.utils.data import random_split

In [32]:
# GPU ..... vrooom vrooom vroooooooooom !!!!
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
device

### Class Sequences :
- it will take text dataset as input and processed the text, tokenize it to sequences, pad it
- function __getitem__ willl return the item at particular index
- __len__ return lenght of the sequence

In [33]:
class Sequences(Dataset):
    def __init__(self, path, max_seq_len):
        self.max_seq_len = max_seq_len
        df = path
        
        # BOW 
        vectorizer = CountVectorizer(stop_words='english', min_df=0.015)
        vectorizer.fit(df.review.tolist())
        
        # Creating Vocabulary
        self.token2idx = vectorizer.vocabulary_
        
        self.token2idx['<PAD>'] = max(self.token2idx.values()) + 1

        tokenizer = vectorizer.build_analyzer()
        self.encode = lambda x: [self.token2idx[token] for token in tokenizer(x)
                                 if token in self.token2idx]
        self.pad = lambda x: x + (max_seq_len - len(x)) * [self.token2idx['<PAD>']]
        
        sequences = [self.encode(sequence)[:max_seq_len] for sequence in df.review.tolist()]
        sequences, self.labels = zip(*[(sequence, label) for sequence, label
                                    in zip(sequences, df.label.tolist()) if sequence])
        self.sequences = [self.pad(sequence) for sequence in sequences]

    def __getitem__(self, i):
        assert len(self.sequences[i]) == self.max_seq_len
        return self.sequences[i], self.labels[i]
    
    def __len__(self):
        return len(self.sequences)

In [34]:
data  = pd.read_csv(r'../input/traindataset/Train dataset.csv')
data['label'] = data['sentiment']
del data['sentiment']
data.head()

__Encoding positive as 1 and negative as 0__

In [35]:
labeling = {
    'positive':1, 
    'negative':0
}

In [36]:
data['label'] = data['label'].apply(lambda x : labeling[x])


In [37]:
data.shape

In [38]:
# feeding data in class and getting its instance in return 
dataset = Sequences(data, max_seq_len=200)


In [39]:
len(dataset.token2idx)


In [40]:
def collate(batch):
    inputs = torch.LongTensor([item[0] for item in batch])
    target = torch.FloatTensor([item[1] for item in batch])
    return inputs, target

batch_size = 512
# train_loader = DataLoader(dataset, batch_size=batch_size, collate_fn=collate)
training, validation = random_split(dataset, [35000, 5000])

In [41]:
train_loader = torch.utils.data.DataLoader(dataset=training, batch_size=batch_size, shuffle=True, collate_fn=collate)
val_loader = torch.utils.data.DataLoader(dataset=validation, batch_size=batch_size, shuffle=True, collate_fn=collate)

In [42]:


class RNN(nn.Module):
    def __init__(
        self,
        vocab_size,
        batch_size,
        embedding_dimension=100,
        hidden_size=200, 
        n_layers=1,
        device='cpu'
    ):
        super(RNN, self).__init__()
        self.n_layers = n_layers
        self.hidden_size = hidden_size
        self.device = device
        self.batch_size = batch_size
        self.encoder = nn.Embedding(vocab_size, embedding_dimension)
        self.rnn = nn.LSTM(
            embedding_dimension,
            hidden_size,
            num_layers=n_layers,
            batch_first=True,
        )
        self.decoder = nn.Linear(hidden_size, 1)
        
    def init_hidden(self ):
        
        return (torch.randn(self.n_layers, self.batch_size, self.hidden_size).to(self.device),
                torch.randn(self.n_layers, self.batch_size, self.hidden_size).to(self.device) )
       
    
    def forward(self, inputs):
        # Avoid breaking if the last batch has a different size
        batch_size = inputs.size(0)
        if batch_size != self.batch_size:
            self.batch_size = batch_size
            
        encoded = self.encoder(inputs)
        output, hidden = self.rnn(encoded, self.init_hidden())
        #o
        output = self.decoder(output[:, :, -1]).squeeze()
        return output

In [43]:
model = RNN(
    hidden_size=200,
    vocab_size=len(dataset.token2idx),
    device=device,
    batch_size=batch_size
)
model = model.to(device)
model

In [44]:
criterion = nn.BCEWithLogitsLoss()
optimizer = optim.Adam([p for p in model.parameters() if p.requires_grad], lr=0.01)

In [45]:
model.train()
train_losses = []
val_losses = []
for epoch in range(10):
    progress_bar = tqdm_notebook(train_loader, leave=False)
    losses = []
    total = 0
    accuracyListPred = []
    for inputs, target in progress_bar:
        inputs, target = inputs.to(device), target.to(device)
        model.zero_grad()
        
        output = model(inputs)
    
        loss = criterion(output, target)
        
        loss.backward()
              
        nn.utils.clip_grad_norm_(model.parameters(), 3)

        optimizer.step()
        
        progress_bar.set_description(f'Loss: {loss.item():.3f}')
        
        losses.append(loss.item())
        total += 1
    
    epoch_loss = sum(losses) / total
    train_losses.append(epoch_loss)
    
    
    progress_bar_val = tqdm_notebook(val_loader, leave=False)
    losses = []
    total = 0
    for inputs, target in progress_bar_val:
#         model.eval()
        inputs, target = inputs.to(device), target.to(device)
#         model.zero_grad()
        
        output = model(inputs)
    
        loss = criterion(output, target)
        
#         progress_bar.set_description(f'Loss: {loss.item():.3f}')
        
        losses.append(loss.item())
        total += 1
        prediction = torch.sigmoid(output.detach())
        for i in range(len(prediction)):
            if(prediction[i] > 0.5):
                prediction[i] = 1
            else:
                prediction[i] = 0
#         print(len(prediction))
        
        accuracyListPred.append(target.eq(prediction).float().mean())
    
    accuracy = torch.tensor(accuracyListPred).mean() * 100
    epoch_loss_val = sum(losses) / total
    val_losses.append(epoch_loss_val)

    tqdm.write(f'Epoch #{epoch + 1}\tTrain Loss: {epoch_loss:.3f}\Val Loss: {epoch_loss_val:.3f}\tAccuracy: {accuracy:.3f}')


In [46]:
def predict_sentiment(text):
    model.eval()
    with torch.no_grad():
        test_vector = torch.LongTensor([dataset.pad(dataset.encode(text))]).to(device)
        
        output = model(test_vector)
        prediction = torch.sigmoid(output).item()

        if prediction > 0.5:
            print(f'{prediction:0.3}: Positive sentiment')
        else:
            print(f'{prediction:0.3}: Negative sentiment')

In [47]:
text= "that's nice"
predict_sentiment(text)


In [48]:
text= "that's worst"
predict_sentiment(text)

In [49]:
# QUOTE FROM RICH DAD POOR DAD
text= "In school we learn that mistakes are bad, and we are punished for making them. Yet, if you look at the way humans are designed to learn, we learn by making mistakes. We learn to walk by falling down. If we never fell down, we would never walk"
print(text)

predict_sentiment(text)

In [50]:
text = """I love this car.
This view is amazing.
I feel great this morning.
I am so excited about the concert.
He is my best friend
"""
predict_sentiment(text)

In [51]:
text="""
I do not like this car.
This view is horrible.
I feel tired this morning.
I am not looking forward to the concert.
He is my enemy
"""
predict_sentiment(text)

References : 

https://towardsdatascience.com/illustrated-guide-to-lstms-and-gru-s-a-step-by-step-explanation-44e9eb85bf21

https://towardsdatascience.com/the-exploding-and-vanishing-gradients-problem-in-time-series-6b87d558d22

https://towardsdatascience.com/multi-class-text-classification-with-lstm-1590bee1bd17

#### DO UPVOTE ⬆️⬆️⬆️⬆️⬆️⬆️
#### DO COMMENT 💬💬💬💬💬💬💬💬
#### Feel free to post for suggestions 💬💬💬💬💬💬💬💬
<img src ="https://i.pinimg.com/originals/f5/f1/26/f5f12634f6378186aa4f88455b122eda.gif" width=1000 height=800>