# Sentiment Analyisis for product review

In [4]:
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.model_selection import train_test_split
from torch.utils.data import DataLoader, Dataset
import nltk
from nltk.tokenize import word_tokenize
import numpy as np

nltk.download('punkt')

# Load dataset
df = pd.read_csv("data\Amazon-Product-Reviews-Sentiment-Analysis-in-Python-Dataset.csv") 


def map_sentiment(rating):
    return 0 if rating in [1,2] else (1 if rating == 3 else 2)

df["Sentiment"] = df["Sentiment"].apply(map_sentiment)

df["tokens"] = df["Review"].apply(lambda x: word_tokenize(str(x).lower()))


word_to_index = {"<PAD>": 0, "<UNK>": 1}
index = 2
for tokens in df["tokens"]:
    for word in tokens:
        if word not in word_to_index:
            word_to_index[word] = index
            index += 1


max_len = 50 
def encode_review(tokens):
    encoded = [word_to_index.get(word, 1) for word in tokens]  
    return encoded[:max_len] + [0] * (max_len - len(encoded))

df["encoded"] = df["tokens"].apply(encode_review)


train_texts, test_texts, train_labels, test_labels = train_test_split(
    df["encoded"].tolist(), df["Sentiment"].tolist(), test_size=0.2, random_state=42
)


[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\HP\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!


In [5]:
class SentimentDataset(Dataset):
    def __init__(self, texts, labels):
        self.texts = torch.tensor(texts, dtype=torch.long)
        self.labels = torch.tensor(labels, dtype=torch.long)

    def __len__(self):
        return len(self.texts)

    def __getitem__(self, idx):
        return self.texts[idx], self.labels[idx]


train_dataset = SentimentDataset(train_texts, train_labels)
test_dataset = SentimentDataset(test_texts, test_labels)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)


In [6]:
class LSTMSentiment(nn.Module):
    def __init__(self, vocab_size, embedding_dim=128, hidden_dim=256, output_dim=3):
        super(LSTMSentiment, self).__init__()
        self.embedding = nn.Embedding(vocab_size, embedding_dim, padding_idx=0)
        self.lstm = nn.LSTM(embedding_dim, hidden_dim, batch_first=True)
        self.fc = nn.Linear(hidden_dim, output_dim)
        self.softmax = nn.Softmax(dim=1)

    def forward(self, x):
        embedded = self.embedding(x)
        lstm_out, _ = self.lstm(embedded)
        out = self.fc(lstm_out[:, -1, :])  
        return self.softmax(out)


vocab_size = len(word_to_index)
model = LSTMSentiment(vocab_size)


In [7]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)


num_epochs = 10
for epoch in range(num_epochs):
    model.train()
    total_loss = 0
    for texts, labels in train_loader:
        texts, labels = texts.to(device), labels.to(device)

        optimizer.zero_grad()
        outputs = model(texts)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        total_loss += loss.item()

    print(f"Epoch {epoch+1}/{num_epochs}, Loss: {total_loss / len(train_loader)}")


Epoch 1/10, Loss: 1.0578098149299622
Epoch 2/10, Loss: 0.9321005096435547
Epoch 3/10, Loss: 0.8718769039154053
Epoch 4/10, Loss: 0.8403631739616394
Epoch 5/10, Loss: 0.8181456240653991
Epoch 6/10, Loss: 0.7990634164810181
Epoch 7/10, Loss: 0.7865391444206238
Epoch 8/10, Loss: 0.7671841526985168
Epoch 9/10, Loss: 0.7498221593856812
Epoch 10/10, Loss: 0.7311647711753845


In [8]:
from sklearn.metrics import classification_report

model.eval()
all_preds, all_labels = [], []

with torch.no_grad():
    for texts, labels in test_loader:
        texts, labels = texts.to(device), labels.to(device)
        outputs = model(texts)
        preds = torch.argmax(outputs, dim=1)
        
        all_preds.extend(preds.cpu().numpy())
        all_labels.extend(labels.cpu().numpy())

print(classification_report(all_labels, all_preds, target_names=["Negative", "Neutral", "Positive"]))


              precision    recall  f1-score   support

    Negative       0.71      0.75      0.73      2021
     Neutral       0.34      0.28      0.30       985
    Positive       0.74      0.76      0.75      1994

    accuracy                           0.66      5000
   macro avg       0.60      0.60      0.60      5000
weighted avg       0.65      0.66      0.65      5000



In [None]:
sentiment_map = {
    0: "Negative 😡👎",
    1: "Neutral 😐🤔",
    2: "Positive 😀🔥"
}

def predict_sentiment(review):
    tokens = word_tokenize(review.lower())
    encoded = encode_review(tokens)
    input_tensor = torch.tensor([encoded], dtype=torch.long).to(device)

    with torch.no_grad():
        output = model(input_tensor)
        sentiment = torch.argmax(output).item()

    return sentiment_map[sentiment]

review_text = "Fast shipping but the product is cheaply made."
print(f"Review: {review_text}")
print(f"Sentiment: {predict_sentiment(review_text)}")


Review: Fast shipping but the product is cheaply made.
Sentiment: Negative 😡👎


In [None]:

# torch.save(model.state_dict(), "lstm_sentiment_model.pth")
# #
# with open("word_to_index.pkl", "wb") as f:
#     pickle.dump(word_to_index, f)

# print("Model and vocabulary saved successfully! ✅")


Model and vocabulary saved successfully! ✅
