In [29]:
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
from nltk.tokenize import word_tokenize
from collections import Counter
import nltk

In [31]:
df = pd.read_excel('Comment_Datasets.xlsx')
paragraphs = df['Text'].tolist()
sentiment_scores = df['Sentiment Score'].tolist()

In [33]:
tokenized_paragraphs = [word_tokenize(paragraph) for paragraph in paragraphs]
vocab = Counter()
for paragraph in tokenized_paragraphs:
    vocab.update(paragraph)
word_to_index = {word: idx for idx, word in enumerate(vocab, 1)}
index_to_word = {idx: word for word, idx in word_to_index.items()}
max_length = max(len(paragraph) for paragraph in tokenized_paragraphs)
numerical_paragraphs = [[word_to_index.get(word, 0) for word in paragraph] + [0] * (max_length - len(paragraph)) for paragraph in tokenized_paragraphs]

In [35]:
paragraphs_tensor = torch.tensor(numerical_paragraphs, dtype=torch.long)
sentiment_scores_tensor = torch.tensor(sentiment_scores, dtype=torch.float).view(-1, 1)

In [37]:
class RNNModel(nn.Module):
    def __init__(self, vocab_size, embedding_dim, hidden_size, output_size):
        super(RNNModel, self).__init__()
        self.embedding = nn.Embedding(vocab_size, embedding_dim, dtype=torch.float)
        self.rnn = nn.LSTM(embedding_dim, hidden_size)
        self.fc = nn.Linear(hidden_size, output_size)

    def forward(self, x):
        x = self.embedding(x)
        x, _ = self.rnn(x)
        x = x[:, -1, :]  # Take the last hidden state
        x = self.fc(x)
        return x

In [39]:
vocab_size = len(vocab) + 1
embedding_dim = 128
hidden_size = 256
output_size = 1  

In [41]:
model = RNNModel(vocab_size, embedding_dim, hidden_size, output_size)

In [43]:
criterion = nn.MSELoss()  # For regression
optimizer = optim.Adam(model.parameters())

# Training loop
num_epochs = 50
for epoch in range(num_epochs):
    model.train()
    outputs = model(paragraphs_tensor)
    loss = criterion(outputs, sentiment_scores_tensor)
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    print(f'Epoch {epoch+1}/{num_epochs}, Loss: {loss.item():.4f}')

Epoch 1/50, Loss: 54.7079
Epoch 2/50, Loss: 49.3383
Epoch 3/50, Loss: 44.3355
Epoch 4/50, Loss: 39.4274
Epoch 5/50, Loss: 34.5115
Epoch 6/50, Loss: 29.6135
Epoch 7/50, Loss: 24.8559
Epoch 8/50, Loss: 20.4077
Epoch 9/50, Loss: 16.4256
Epoch 10/50, Loss: 13.0156
Epoch 11/50, Loss: 10.2260
Epoch 12/50, Loss: 8.0605
Epoch 13/50, Loss: 6.4916
Epoch 14/50, Loss: 5.4660
Epoch 15/50, Loss: 4.9075
Epoch 16/50, Loss: 4.7204
Epoch 17/50, Loss: 4.7969
Epoch 18/50, Loss: 5.0249
Epoch 19/50, Loss: 5.3023
Epoch 20/50, Loss: 5.5521
Epoch 21/50, Loss: 5.7285
Epoch 22/50, Loss: 5.8129
Epoch 23/50, Loss: 5.8052
Epoch 24/50, Loss: 5.7169
Epoch 25/50, Loss: 5.5656
Epoch 26/50, Loss: 5.3717
Epoch 27/50, Loss: 5.1554
Epoch 28/50, Loss: 4.9348
Epoch 29/50, Loss: 4.7245
Epoch 30/50, Loss: 4.5349
Epoch 31/50, Loss: 4.3726
Epoch 32/50, Loss: 4.2401
Epoch 33/50, Loss: 4.1370
Epoch 34/50, Loss: 4.0605
Epoch 35/50, Loss: 4.0061
Epoch 36/50, Loss: 3.9681
Epoch 37/50, Loss: 3.9406
Epoch 38/50, Loss: 3.9178
Epoch 39/5

In [45]:
model.eval()
with torch.no_grad():
    outputs = model(paragraphs_tensor)
    test_loss = criterion(outputs, sentiment_scores_tensor)
    print(f'Test Loss: {test_loss.item():.4f}')

Test Loss: 3.1215


In [53]:
new_paragraph = str(input("Enter the sentence: "))
tokenized_new_paragraph = word_tokenize(new_paragraph)
numerical_new_paragraph = [word_to_index.get(word, 0) for word in tokenized_new_paragraph] + [0] * (max_length - len(tokenized_new_paragraph))
numerical_new_paragraph_tensor = torch.tensor(numerical_new_paragraph, dtype=torch.long).unsqueeze(0)
predicted_sentiment_score = model(numerical_new_paragraph_tensor).item()
print(f"Predicted Sentiment Score: {predicted_sentiment_score:.2f}")

Enter the sentence:  i am the best guy


Predicted Sentiment Score: 4.05
