 ## Sentimental Analysis with GRU and Lstm using pytorch

In [14]:
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import CountVectorizer
from torch.utils.data import TensorDataset, DataLoader

In [15]:
df = pd.read_csv(r"C:/Datasets/IMDB Dataset.csv")  
print(df.head())

                                              review sentiment
0  One of the other reviewers has mentioned that ...  positive
1  A wonderful little production. <br /><br />The...  positive
2  I thought this was a wonderful way to spend ti...  positive
3  Basically there's a family where a little boy ...  negative
4  Petter Mattei's "Love in the Time of Money" is...  positive


In [16]:
df['sentiment']=df['sentiment'].map({'positive':1,'negative':0})
texts=df['review'].values
labels=df['sentiment'].values

In [27]:
vectorizer = CountVectorizer(max_features=2000, stop_words='english')
X = vectorizer.fit_transform(texts).toarray()


In [28]:
X_train, X_test, y_train, y_test = train_test_split(X, labels, test_size=0.2, random_state=42)


In [29]:
X_train = torch.tensor(X_train, dtype=torch.float32)
y_train = torch.tensor(y_train, dtype=torch.float32)
X_test = torch.tensor(X_test, dtype=torch.float32)
y_test = torch.tensor(y_test, dtype=torch.float32)

In [30]:
train_data = TensorDataset(X_train, y_train)
test_data = TensorDataset(X_test, y_test)

BATCH_SIZE = 64
train_loader = DataLoader(train_data, shuffle=True, batch_size=BATCH_SIZE)
test_loader = DataLoader(test_data, batch_size=BATCH_SIZE)


In [31]:
class LSTMClassifier(nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim):
        super(LSTMClassifier, self).__init__()
        self.lstm = nn.LSTM(input_dim, hidden_dim, batch_first=True)
        self.fc = nn.Linear(hidden_dim, output_dim)
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        x = x.unsqueeze(1)  # [batch, seq_len=1, features]
        _, (hidden, _) = self.lstm(x)
        out = self.fc(hidden[-1])
        return self.sigmoid(out)

# Or use GRU
class GRUClassifier(nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim):
        super(GRUClassifier, self).__init__()
        self.gru = nn.GRU(input_dim, hidden_dim, batch_first=True)
        self.fc = nn.Linear(hidden_dim, output_dim)
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        x = x.unsqueeze(1)
        _, hidden = self.gru(x)
        out = self.fc(hidden[-1])
        return self.sigmoid(out)




In [None]:
input_dim=X_train.shape[1]
hidden_dim=64
output_dim=1

model=LSTMClassifier(input_dim, hidden_dim, output_dim)
  
criterion=nn.BCELoss()
optimizer=optim.Adam(model.parameters(), lr=0.01)

In [None]:

input_dim = X_train.shape[1]
hidden_dim = 64
output_dim = 1

model = LSTMClassifier(input_dim, hidden_dim, output_dim)
# model = GRUClassifier(input_dim, hidden_dim, output_dim)

criterion = nn.BCELoss()
optimizer = optim.Adam(model.parameters(), lr=0.01)

In [33]:
EPOCHS = 10

for epoch in range(EPOCHS):
    model.train()
    total_loss, total_acc = 0, 0
    for X_batch, y_batch in train_loader:
        optimizer.zero_grad()
        outputs = model(X_batch).squeeze()
        loss = criterion(outputs, y_batch)
        loss.backward()
        optimizer.step()

        preds = (outputs >= 0.5).float()
        acc = (preds == y_batch).float().mean()
        total_loss += loss.item()
        total_acc += acc.item()

    print(f"Epoch [{epoch+1}/{EPOCHS}] | Loss: {total_loss/len(train_loader):.4f} | Acc: {total_acc/len(train_loader):.4f}")




Epoch [1/10] | Loss: 0.3292 | Acc: 0.8585
Epoch [2/10] | Loss: 0.2451 | Acc: 0.8962
Epoch [3/10] | Loss: 0.1483 | Acc: 0.9425
Epoch [4/10] | Loss: 0.0638 | Acc: 0.9801
Epoch [5/10] | Loss: 0.0258 | Acc: 0.9933
Epoch [6/10] | Loss: 0.0139 | Acc: 0.9965
Epoch [7/10] | Loss: 0.0054 | Acc: 0.9991
Epoch [8/10] | Loss: 0.0036 | Acc: 0.9993
Epoch [9/10] | Loss: 0.0021 | Acc: 0.9997
Epoch [10/10] | Loss: 0.0020 | Acc: 0.9996


In [34]:
model.eval()
with torch.no_grad():
    total_acc = 0
    for X_batch, y_batch in test_loader:
        preds = model(X_batch).squeeze()
        preds = (preds >= 0.5).float()
        acc = (preds == y_batch).float().mean()
        total_acc += acc.item()
    print(f"\nTest Accuracy: {total_acc/len(test_loader):.4f}")


Test Accuracy: 0.8590


# GRU

In [35]:

input_dim = X_train.shape[1]
hidden_dim = 64
output_dim = 1
model=GRUClassifier(input_dim, hidden_dim, output_dim)
criterion=nn.BCELoss()
optimizer=optim.Adam(model.parameters(), lr=0.01)

In [36]:

input_dim = X_train.shape[1]
hidden_dim = 64
output_dim = 1

model = GRUClassifier(input_dim, hidden_dim, output_dim)

criterion = nn.BCELoss()
optimizer = optim.Adam(model.parameters(), lr=0.01)

In [37]:
EPOCHS = 10

for epoch in range(EPOCHS):
    model.train()
    total_loss, total_acc = 0, 0
    for X_batch, y_batch in train_loader:
        optimizer.zero_grad()
        outputs = model(X_batch).squeeze()
        loss = criterion(outputs, y_batch)
        loss.backward()
        optimizer.step()

        preds = (outputs >= 0.5).float()
        acc = (preds == y_batch).float().mean()
        total_loss += loss.item()
        total_acc += acc.item()

    print(f"Epoch [{epoch+1}/{EPOCHS}] | Loss: {total_loss/len(train_loader):.4f} | Acc: {total_acc/len(train_loader):.4f}")




Epoch [1/10] | Loss: 0.3290 | Acc: 0.8559
Epoch [2/10] | Loss: 0.2437 | Acc: 0.8984
Epoch [3/10] | Loss: 0.1491 | Acc: 0.9425
Epoch [4/10] | Loss: 0.0671 | Acc: 0.9784
Epoch [5/10] | Loss: 0.0282 | Acc: 0.9920
Epoch [6/10] | Loss: 0.0114 | Acc: 0.9969
Epoch [7/10] | Loss: 0.0056 | Acc: 0.9987
Epoch [8/10] | Loss: 0.0036 | Acc: 0.9991
Epoch [9/10] | Loss: 0.0031 | Acc: 0.9993
Epoch [10/10] | Loss: 0.0406 | Acc: 0.9851


In [38]:
model.eval()
with torch.no_grad():
    total_acc = 0
    for X_batch, y_batch in test_loader:
        preds = model(X_batch).squeeze()
        preds = (preds >= 0.5).float()
        acc = (preds == y_batch).float().mean()
        total_acc += acc.item()
    print(f"\nTest Accuracy: {total_acc/len(test_loader):.4f}")


Test Accuracy: 0.8470
