In [18]:
import pandas as pd
import numpy as np
import nltk
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score, classification_report

In [21]:

data = pd.read_csv('sentiment_labeled_dataset.csv')
data.head()

Unnamed: 0,News,news,sentiment
0,The Ukrainian president says the country will ...,the ukrainian president says the country will ...,neutral
1,"Jeremy Bowen was on the frontline in Irpin, as...","jeremy bowen was on the frontline in irpin, as...",neutral
2,One of the world's biggest fertiliser firms sa...,one of the world's biggest fertiliser firms sa...,neutral
3,The parents of the Manchester Arena bombing's ...,the parents of the manchester arena bombing's ...,negative
4,Consumers are feeling the impact of higher ene...,consumers are feeling the impact of higher ene...,positive


In [24]:

# Loading  labeled news dataset (assuming it's in a CSV format with 'text' and 'sentiment' columns)
data = pd.read_csv('sentiment_labeled_dataset.csv')

# Text preprocessing
stop_words = set(stopwords.words('english'))

def preprocess_text(text):
    # Tokenization and removing stopwords
    words = word_tokenize(text)
    words = [word.lower() for word in words if word.isalpha()]
    words = [word for word in words if word not in stop_words]
    return ' '.join(words)

data['text'] = data['news'].apply(preprocess_text)

# Encode labels
label_encoder = LabelEncoder()
data['sentiment'] = label_encoder.fit_transform(data['sentiment'])

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(data['text'], data['sentiment'], test_size=0.2, random_state=42)

# Tokenization and Padding
from keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences  # Updated import

max_words = 5000
max_len = 100

tokenizer = Tokenizer(num_words=max_words)
tokenizer.fit_on_texts(X_train)

X_train_seq = tokenizer.texts_to_sequences(X_train)
X_test_seq = tokenizer.texts_to_sequences(X_test)

X_train_pad = pad_sequences(X_train_seq, maxlen=max_len)
X_test_pad = pad_sequences(X_test_seq, maxlen=max_len)


# Converting to PyTorch tensors
X_train_tensor = torch.tensor(X_train_pad, dtype=torch.long)
y_train_tensor = torch.tensor(y_train.values, dtype=torch.long)
X_test_tensor = torch.tensor(X_test_pad, dtype=torch.long)

# Creating PyTorch datasets and dataloaders
train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)

# Defining a simple feedforward neural network
class SentimentClassifier(nn.Module):
    def __init__(self, input_size, hidden_size, num_classes):
        super(SentimentClassifier, self).__init__()
        self.embedding = nn.Embedding(max_words, 100)
        self.fc1 = nn.Linear(input_size, hidden_size)
        self.relu = nn.ReLU()
        self.fc2 = nn.Linear(hidden_size, num_classes)

    def forward(self, x):
        x = self.embedding(x)
        x = x.view(x.size(0), -1)  # Flatten the input
        x = self.fc1(x)
        x = self.relu(x)
        x = self.fc2(x)
        return x

# Initializing the model, loss function, and optimizer
model = SentimentClassifier(input_size=max_len * 100, hidden_size=128, num_classes=len(label_encoder.classes_))
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Traininging the model
num_epochs = 10
for epoch in range(num_epochs):
    model.train()
    for inputs, labels in train_loader:
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

# Evaluation
model.eval()
with torch.no_grad():
    y_pred = model(X_test_tensor).argmax(dim=1).numpy()

# Inverse transform labels to original sentiment labels
y_pred = label_encoder.inverse_transform(y_pred)
y_test_original = label_encoder.inverse_transform(y_test)

# Calculating accuracy and classification report
accuracy = accuracy_score(y_test_original, y_pred)
report = classification_report(y_test_original, y_pred)

print(f"Accuracy: {accuracy}")
print(report)

Accuracy: 0.7479964381121995
              precision    recall  f1-score   support

    negative       0.68      0.62      0.65       905
     neutral       0.74      0.80      0.77      1860
    positive       0.79      0.76      0.78      1727

    accuracy                           0.75      4492
   macro avg       0.74      0.73      0.73      4492
weighted avg       0.75      0.75      0.75      4492

