In [1]:
!pip install -U datasets torch transformers



In [2]:
from datasets import load_dataset

dataset = load_dataset("fhamborg/news_sentiment_newsmtsc", trust_remote_code=True)

In [3]:
from transformers import AutoTokenizer

# Replace 'bert-base-uncased' with your specific model's name
tokenizer = AutoTokenizer.from_pretrained('bert-base-uncased')

tokenizer_config.json:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/570 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

In [24]:
def tokenize_function(examples):
    return tokenizer(
        examples['sentence'],   # Adjust key based on your dataset
        padding="max_length",  # Pad to max_length (helps batching)
        truncation=True,       # Truncate sequences longer than max_length
        max_length=512         # Set a suitable max_length
        
    )

# Tokenize the dataset
tokenized_dataset = dataset.map(tokenize_function, batched=True)

Map:   0%|          | 0/8739 [00:00<?, ? examples/s]

In [50]:
train_data = tokenized_dataset['train']
test_data = tokenized_dataset['test']
validation_data = tokenized_dataset['validation']




In [51]:
train_data.set_format(type='torch', columns=['input_ids', 'attention_mask', 'polarity'])

test_data.set_format(type='torch', columns=['input_ids', 'attention_mask', 'polarity'])

validation_data.set_format(type='torch', columns=['input_ids', 'attention_mask', 'polarity'])

In [63]:
train_data['polarity']

tensor([ 0, -1, -1,  ...,  0,  1, -1])

In [53]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F

# Define the RNN model
import torch
import torch.nn as nn

class RNNModel(nn.Module):
    def __init__(self, 
                 input_size: int, 
                 hidden_size: int = 128,
                 num_layers: int = 2,
                 num_classes: int = 2,
                 dropout_rate: float = 0.3):
        
        super(RNNModel, self).__init__()
        
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        
        # RNN Layer
        self.rnn = nn.RNN(input_size, 
                          hidden_size, 
                          num_layers, 
                          batch_first=True, 
                          dropout=dropout_rate if num_layers > 1 else 0.0)
        
        # Fully connected layer
        self.fc = nn.Linear(hidden_size, num_classes)

    def forward(self, x):
        # Initialize hidden state with zeros
        h0 = torch.zeros(self.num_layers, 
                         x.size(0), 
                         self.hidden_size).to(x.device)
        
        # Forward propagate through RNN
        out, _ = self.rnn(x, h0)  # out: [batch_size, seq_length, hidden_size]
        
        # Get the output from the last time step
        out = out[:, -1, :]  # [batch_size, hidden_size]
        
        # Pass through the fully connected layer
        out = self.fc(out)  # [batch_size, num_classes]
        return out


class GRUModel(nn.Module):
    def __init__(self, input_size: int, hidden_size: int = 128, num_layers: int = 2,
                 num_classes: int = 2, dropout_rate: float = 0.3):
        """
        GRU Model for sequence classification
        """
        super(GRUModel, self).__init__()
        
        # Determine number of directions
        
        # GRU Layer
        self.gru = nn.GRU(
            input_size=input_size, 
            hidden_size=hidden_size, 
            num_layers=num_layers, 
            batch_first=True,
            dropout=dropout_rate if num_layers > 1 else 0,
        )
        
        # Dropout and normalization
        self.dropout = nn.Dropout(dropout_rate)
        
        # Fully connected layers
        fc_input_size = hidden_size
        self.fc1 = nn.Linear(fc_input_size, fc_input_size // 2)
        self.fc2 = nn.Linear(fc_input_size // 2, num_classes)
        
        # Layer normalization
        self.layer_norm = nn.LayerNorm(fc_input_size // 2)
    
    def forward(self, x):
        """
        Forward pass of the GRU model
        """
        # Initialize hidden state
        batch_size = x.size(0)
        h0 = torch.zeros(self.gru.num_layers, batch_size, 
            self.gru.hidden_size, device=x.device
        
        )
        
        # GRU processing
        out, hidden = self.gru(x, h0)
        
        # Extract the last hidden state
     
        hidden = hidden[-1]
        
        # Apply dropout and fully connected layers
        x = self.dropout(hidden)
        x = F.relu(self.fc1(x))
        x = self.layer_norm(x)
        x = self.dropout(x)
        logits = self.fc2(x)
        
        return logits

In [54]:
class LSTMModel(nn.Module):
    def __init__(self, input_size: int, hidden_size: int = 128,num_layers: int = 3,
                 num_classes: int = 3,dropout_rate: float = 0.3):
        
        super(LSTMModel, self).__init__()
        
        # LSTM Layer
        self.lstm = nn.LSTM(
            input_size=input_size, 
            hidden_size=hidden_size, 
            num_layers=num_layers, 
            batch_first=True,
            dropout=dropout_rate,
        )
        
        # Dropout and normalization
        self.dropout = nn.Dropout(dropout_rate)
        
        # Fully connected layers
        fc_input_size = hidden_size
        self.fc1 = nn.Linear(fc_input_size, fc_input_size // 2)
        self.fc = nn.Linear(fc_input_size // 2, fc_input_size // 2)
        self.fc2 = nn.Linear(fc_input_size // 2, num_classes)
        
        # Layer normalization
        self.layer_norm = nn.LayerNorm(fc_input_size // 2)
    
    def forward(self, x):
        """
        Forward pass of the LSTM model
        
        """
        # Initialize hidden and cell states
        batch_size = x.size(0)
        h0 = torch.zeros(self.lstm.num_layers, batch_size, 
            self.lstm.hidden_size, device=x.device)
        c0 = torch.zeros(self.lstm.num_layers, batch_size, 
            self.lstm.hidden_size, device=x.device)
        
        # LSTM processing
        _, (hidden, _) = self.lstm(x, (h0, c0))
        
        hidden = hidden[-1]
        
        # Apply dropout and fully connected layers
        x = self.dropout(hidden)
        x = F.relu(self.fc1(x))
        x = self.layer_norm(x)
        x = self.dropout(x)
        # x = F.relu(self.fc(x))
        # x = self.layer_norm(x)
        # x = self.dropout(x)
        logits = self.fc2(x)
        
        return logits

In [55]:
# parameters
parameters = {
    'input_size': train_data['input_ids'].shape[1],
    'hidden_size': 128,
    'num_layers': 5,
    'num_classes': 3,
    'dropout_rate': 0.3,
    
}
num_epochs = 20
learning_rate = 0.001


In [56]:
RNN = RNNModel(**parameters)
LSTM = LSTMModel(**parameters)
GRU = GRUModel(**parameters)

criterion = nn.CrossEntropyLoss()



In [68]:
def train_and_evaluate(model):
   optimizer = optim.Adam(model.parameters(), lr=learning_rate)
   
   # Training loop

   model.train()  # Set model to training mode

   for epoch in range(num_epochs):
      
      train_inputs = train_data['input_ids'].float()
      train_labels = train_data['polarity']


      # Forward pass
      outputs = model(train_inputs.unsqueeze(1))  # Shape: [batch_size, seq_length, input_size]

      train_labels[train_labels == -1] = 2
      outputs[outputs == -1] = 2
      

      # Calculate the loss
      loss = criterion(outputs, train_labels)

      # Backward pass and optimization
      optimizer.zero_grad()
      loss.backward()
      optimizer.step()

      # Calculate accuracy
      _, predicted = torch.max(outputs, dim=1)
      correct = (predicted == train_labels).sum().item()
      accuracy = correct / train_labels.size(0)

      print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}, Accuracy: {accuracy:.4f}')

   # Evaluation loop
   # Evaluation
   model.eval()  # Set model to evaluation mode
   with torch.no_grad():
      test_inputs = test_data['input_ids'].float()
      test_labels = test_data['polarity']

      test_labels[test_labels == -1] = 2
      
      
      test_outputs = model(test_inputs.unsqueeze(1))
      test_outputs[test_outputs == -1] = 2
      print(test_outputs)
      _, predicted = torch.max(test_outputs, 1)
      
      
      accuracy = (predicted == test_labels).sum().item() / test_labels.size(0)
      print(f'Test Accuracy: {accuracy:.4f}')

   




In [60]:
print(train_data['polarity'].unique())  # Check unique values in labels


tensor([-1,  0,  1])


In [69]:
train_and_evaluate(RNN)

Epoch [1/20], Loss: 1.0785, Accuracy: 0.4084
Epoch [2/20], Loss: 1.0784, Accuracy: 0.4101
Epoch [3/20], Loss: 1.0803, Accuracy: 0.4082
Epoch [4/20], Loss: 1.0776, Accuracy: 0.4143
Epoch [5/20], Loss: 1.0793, Accuracy: 0.4089
Epoch [6/20], Loss: 1.0791, Accuracy: 0.4121
Epoch [7/20], Loss: 1.0777, Accuracy: 0.4158
Epoch [8/20], Loss: 1.0789, Accuracy: 0.4083
Epoch [9/20], Loss: 1.0778, Accuracy: 0.4173
Epoch [10/20], Loss: 1.0762, Accuracy: 0.4188
Epoch [11/20], Loss: 1.0766, Accuracy: 0.4155
Epoch [12/20], Loss: 1.0769, Accuracy: 0.4169
Epoch [13/20], Loss: 1.0782, Accuracy: 0.4101
Epoch [14/20], Loss: 1.0779, Accuracy: 0.4131
Epoch [15/20], Loss: 1.0752, Accuracy: 0.4165
Epoch [16/20], Loss: 1.0752, Accuracy: 0.4179
Epoch [17/20], Loss: 1.0773, Accuracy: 0.4165
Epoch [18/20], Loss: 1.0748, Accuracy: 0.4156
Epoch [19/20], Loss: 1.0787, Accuracy: 0.4121
Epoch [20/20], Loss: 1.0763, Accuracy: 0.4170
tensor([[-0.2434, -0.1919,  0.2016],
        [ 0.2463, -0.2491, -0.0076],
        [ 0.276

In [70]:
train_and_evaluate(LSTM)

Epoch [1/20], Loss: 1.2293, Accuracy: 0.2964
Epoch [2/20], Loss: 1.1691, Accuracy: 0.3238
Epoch [3/20], Loss: 1.1382, Accuracy: 0.3401
Epoch [4/20], Loss: 1.1409, Accuracy: 0.3609
Epoch [5/20], Loss: 1.1536, Accuracy: 0.3602
Epoch [6/20], Loss: 1.1513, Accuracy: 0.3687
Epoch [7/20], Loss: 1.1511, Accuracy: 0.3708
Epoch [8/20], Loss: 1.1428, Accuracy: 0.3610
Epoch [9/20], Loss: 1.1358, Accuracy: 0.3618
Epoch [10/20], Loss: 1.1259, Accuracy: 0.3632
Epoch [11/20], Loss: 1.1229, Accuracy: 0.3603
Epoch [12/20], Loss: 1.1268, Accuracy: 0.3491
Epoch [13/20], Loss: 1.1265, Accuracy: 0.3423
Epoch [14/20], Loss: 1.1270, Accuracy: 0.3350
Epoch [15/20], Loss: 1.1248, Accuracy: 0.3450
Epoch [16/20], Loss: 1.1300, Accuracy: 0.3350
Epoch [17/20], Loss: 1.1213, Accuracy: 0.3384
Epoch [18/20], Loss: 1.1224, Accuracy: 0.3391
Epoch [19/20], Loss: 1.1185, Accuracy: 0.3472
Epoch [20/20], Loss: 1.1206, Accuracy: 0.3455
tensor([[ 0.2540, -0.0115,  0.3126],
        [ 0.2618, -0.0159,  0.3067],
        [ 0.259

In [71]:
train_and_evaluate(GRU)

Epoch [1/20], Loss: 1.3013, Accuracy: 0.3744
Epoch [2/20], Loss: 1.1653, Accuracy: 0.3600
Epoch [3/20], Loss: 1.1358, Accuracy: 0.3490
Epoch [4/20], Loss: 1.1574, Accuracy: 0.3440
Epoch [5/20], Loss: 1.1621, Accuracy: 0.3400
Epoch [6/20], Loss: 1.1508, Accuracy: 0.3440
Epoch [7/20], Loss: 1.1358, Accuracy: 0.3486
Epoch [8/20], Loss: 1.1255, Accuracy: 0.3410
Epoch [9/20], Loss: 1.1196, Accuracy: 0.3600
Epoch [10/20], Loss: 1.1190, Accuracy: 0.3606
Epoch [11/20], Loss: 1.1188, Accuracy: 0.3646
Epoch [12/20], Loss: 1.1209, Accuracy: 0.3603
Epoch [13/20], Loss: 1.1185, Accuracy: 0.3719
Epoch [14/20], Loss: 1.1212, Accuracy: 0.3695
Epoch [15/20], Loss: 1.1151, Accuracy: 0.3772
Epoch [16/20], Loss: 1.1130, Accuracy: 0.3681
Epoch [17/20], Loss: 1.1110, Accuracy: 0.3688
Epoch [18/20], Loss: 1.1077, Accuracy: 0.3725
Epoch [19/20], Loss: 1.1099, Accuracy: 0.3633
Epoch [20/20], Loss: 1.1045, Accuracy: 0.3674
tensor([[-0.0799, -0.3015, -0.0253],
        [-0.0428, -0.3625, -0.0571],
        [ 0.060