#WITHOUT CUDA


In [3]:

# Import necessary libraries
import numpy as np
import torch
import torch.nn as nn
from torch.nn import init
import torch.optim as optim
import math
import random
import os
import time
from tqdm import tqdm
import json
import string
import pickle
import pandas as pd

unk = '<UNK>'
# Consult the PyTorch documentation for information on the functions used below:
# https://pytorch.org/docs/stable/torch.html
class RNN(nn.Module):
    def __init__(self, input_dim, h):  # Add relevant parameters
        super(RNN, self).__init__()
        self.h = h
        self.numOfLayer = 1
        self.rnn = nn.RNN(input_dim, h, self.numOfLayer, nonlinearity='tanh')
        self.W = nn.Linear(h, 5)
        self.softmax = nn.LogSoftmax(dim=1)
        self.loss = nn.NLLLoss()

    def compute_Loss(self, predicted_vector, gold_label):
        return self.loss(predicted_vector, gold_label)

    def forward(self, inputs):
        # obtain hidden layer representation
        # inputs shape: (seq_len, batch_size, input_dim)
        _, hidden = self.rnn(inputs)  # hidden shape: (num_layers, batch_size, hidden_size)

        # obtain output layer representations
        # We take the last hidden state and pass it through the linear layer
        output = self.W(hidden[-1])  # shape: (batch_size, 5)

        # obtain probability distribution using log softmax
        predicted_vector = self.softmax(output)  # shape: (batch_size, 5)

        return predicted_vector


def load_data(train_data, val_data, test_data):
  # Load training data
  with open(train_data) as training_f:
      training = json.load(training_f)

  # Load validation data
  with open(val_data) as valid_f:
      validation = json.load(valid_f)

  # Load test data
  with open(test_data) as test_f:
      testing = json.load(test_f)

  tra = []
  val = []
  test = []

  # Process training data
  for elt in training:
      tra.append((elt["text"].split(), int(elt["stars"]-1)))

  # Process validation data
  for elt in validation:
      val.append((elt["text"].split(), int(elt["stars"]-1)))

  # Process test data
  for elt in testing:
      test.append((elt["text"].split(), int(elt["stars"]-1)))

  return tra, val, test



In [4]:
# Define hyperparameter combinations
hidden_dims = [16, 64, 256]
epochs_list = [5]#[5, 10, 30]

# Create a directory for results if it doesn't exist
!mkdir -p results

# Function to train and evaluate model
def train_and_evaluate(hidden_dim, num_epochs, train_data, valid_data, test_data, word_embedding):
    model = RNN(50, hidden_dim)
    optimizer = optim.Adam(model.parameters(), lr=0.01)

    training_accuracies = []
    validation_accuracies = []
    training_losses = []

    stopping_condition = False
    epoch = 0
    last_train_accuracy = 0
    last_validation_accuracy = 0

    max_epochs = num_epochs

    while not stopping_condition and epoch < max_epochs:
            # Training Phase
            random.shuffle(train_data)
            model.train()
            # You will need further code to operationalize training, ffnn.py may be helpful
            print("Training started for epoch {}".format(epoch + 1))
            train_data = train_data
            correct = 0
            total = 0
            minibatch_size = 16
            N = len(train_data)

            loss_total = 0
            loss_count = 0
            for minibatch_index in tqdm(range(N // minibatch_size)):
                optimizer.zero_grad()
                loss = None
                for example_index in range(minibatch_size):
                    idx = minibatch_index * minibatch_size + example_index
                    if idx >= len(train_data):
                      continue

                    input_words, gold_label = train_data[minibatch_index * minibatch_size + example_index]
                    input_words = " ".join(input_words)

                    # Remove punctuation
                    input_words = input_words.translate(input_words.maketrans("", "", string.punctuation)).split()

                    # Look up word embedding dictionary
                    vectors = [word_embedding[i.lower()] if i.lower() in word_embedding.keys() else word_embedding['unk'] for i in input_words ]

                    # Transform the input into required shape
                    vectors = torch.tensor(vectors).view(len(vectors), 1, -1)
                    output = model(vectors)

                    # Get loss
                    example_loss = model.compute_Loss(output.view(1,-1), torch.tensor([gold_label]))

                    # Get predicted label
                    predicted_label = torch.argmax(output)

                    correct += int(predicted_label == gold_label)
                    # print(predicted_label, gold_label)
                    total += 1
                    if loss is None:
                        loss = example_loss
                    else:
                        loss += example_loss

                loss = loss / minibatch_size
                loss_total += loss.data
                loss_count += 1
                loss.backward()
                optimizer.step()

            epoch_loss = loss_total/loss_count if loss_count > 0 else 0
            training_loss = float(epoch_loss)
            training_accuracy = correct/total

            print(loss_total/loss_count)
            print("Training completed for epoch {}".format(epoch + 1))
            print("Training accuracy for epoch {}: {}".format(epoch + 1, correct / total))

            #VALIDATION PHASE

            model.eval()
            correct = 0
            total = 0
            random.shuffle(valid_data)
            print("Validation started for epoch {}".format(epoch + 1))
            valid_data = valid_data

            with torch.no_grad():  # Add this to prevent gradient computation during validation
                for input_words, gold_label in tqdm(valid_data):
                    input_words = " ".join(input_words)
                    input_words = input_words.translate(input_words.maketrans("", "", string.punctuation)).split()
                    vectors = [word_embedding[i.lower()] if i.lower() in word_embedding.keys() else word_embedding['unk'] for i
                            in input_words]

                    vectors = torch.tensor(vectors).view(len(vectors), 1, -1)
                    output = model(vectors)
                    predicted_label = torch.argmax(output)
                    correct += int(predicted_label == gold_label)
                    total += 1
                    # print(predicted_label, gold_label)
            validation_accuracy = correct/total
            print("Validation completed for epoch {}".format(epoch + 1))
            print("Validation accuracy for epoch {}: {}".format(epoch + 1, correct / total))

            # Store metrics
            training_accuracies.append(training_accuracy)
            validation_accuracies.append(validation_accuracy)
            training_losses.append(training_loss)

            if validation_accuracy < last_validation_accuracy and training_accuracy > last_train_accuracy:
                stopping_condition=True
                print("Training done to avoid overfitting!")
                print("Best validation accuracy is:", last_validation_accuracy)
            else:
                last_validation_accuracy = validation_accuracy
                last_train_accuracy = training_accuracy

            epoch += 1

    print("========== Training completed ==========")

    return training_accuracies, validation_accuracies, training_losses, test_accuracy



In [6]:
# Main execution
if __name__ == "__main__":
    # Load data
    print("========== Loading data ==========")
    train_data, valid_data, test_data = load_data(
        '/content/drive/MyDrive/NLPA2/training.json',
        '/content/drive/MyDrive/NLPA2/validation.json',
        '/content/drive/MyDrive/NLPA2/test.json'
    )

    # Load word embeddings
    word_embedding = pickle.load(open('/content/drive/MyDrive/NLPA2/word_embedding.pkl', 'rb'))

    # Run all combinations
    all_results = []

    for hidden_dim in hidden_dims:
        for num_epochs in epochs_list:
            print(f"\n========== Training model with hidden_dim={hidden_dim}, epochs={num_epochs} ==========")

            # Train and evaluate model
            train_accs, val_accs, train_losses, test_acc = train_and_evaluate(
                hidden_dim, num_epochs, train_data, valid_data, test_data, word_embedding
            )

            # Create results DataFrame
            results_df = pd.DataFrame({
                'epoch': range(1, len(train_accs) + 1),
                'training_accuracy': train_accs,
                'validation_accuracy': val_accs,
                'training_loss': train_losses,
                'test_accuracy': [test_acc] * len(train_accs),
                'hidden_dim': [hidden_dim] * len(train_accs),
                'max_epochs': [num_epochs] * len(train_accs)
            })

            # Save individual result
            filename = f"results/rnn_{num_epochs}_{hidden_dim}.csv"
            results_df.to_csv(filename, index=False)
            print(f"Results saved to {filename}")

            # Store summary for combined results
            all_results.append({
                'hidden_dim': hidden_dim,
                'epochs': num_epochs,
                'final_train_acc': train_accs[-1],
                'final_val_acc': val_accs[-1],
                'final_test_acc': test_acc,
                'best_val_acc': max(val_accs),
                'total_epochs_run': len(train_accs)
            })

    # Create and save combined results
    combined_results = pd.DataFrame(all_results)
    combined_results.to_csv('results/combined_results.csv', index=False)
    print("\nCombined results saved to results/combined_results.csv")

    # Display combined results
    print("\nSummary of all experiments:")
    print(combined_results)


Training started for epoch 1


  vectors = torch.tensor(vectors).view(len(vectors), 1, -1)
  6%|▌         | 61/1000 [00:09<02:32,  6.17it/s]


KeyboardInterrupt: 

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


# CUDA ENABLED TRAINING

In [12]:
import numpy as np
import torch
import torch.nn as nn
from torch.nn import init
import torch.optim as optim
import math
import random
import os
import time
from tqdm import tqdm
import json
import string
import pickle
import pandas as pd

# Check if CUDA is available and set device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using device: {device}")

unk = '<UNK>'

class RNN(nn.Module):
    def __init__(self, input_dim, h):
        super(RNN, self).__init__()
        self.h = h
        self.numOfLayer = 1
        self.rnn = nn.RNN(input_dim, h, self.numOfLayer, nonlinearity='tanh', batch_first=False)
        self.W = nn.Linear(h, 5)
        self.softmax = nn.LogSoftmax(dim=1)
        self.loss = nn.NLLLoss()

        # Move model to GPU
        self.to(device)

    def compute_Loss(self, predicted_vector, gold_label):
        return self.loss(predicted_vector, gold_label.to(device))

    def forward(self, inputs):
        _, hidden = self.rnn(inputs)
        output = self.W(hidden[-1])
        predicted_vector = self.softmax(output)
        return predicted_vector

def load_data(train_data, val_data, test_data):
    # Load and process data (same as before)
    with open(train_data) as training_f:
        training = json.load(training_f)
    with open(val_data) as valid_f:
        validation = json.load(valid_f)
    with open(test_data) as test_f:
        testing = json.load(test_f)

    tra = [(elt["text"].split(), int(elt["stars"]-1)) for elt in training]
    val = [(elt["text"].split(), int(elt["stars"]-1)) for elt in validation]
    test = [(elt["text"].split(), int(elt["stars"]-1)) for elt in testing]

    return tra, val, test

def process_batch(batch_data, word_embedding):
    # Process a batch of data in parallel
    processed_inputs = []
    labels = []

    for input_words, gold_label in batch_data:
        input_words = " ".join(input_words)
        input_words = input_words.translate(str.maketrans("", "", string.punctuation)).split()
        vectors = [word_embedding[i.lower()] if i.lower() in word_embedding else word_embedding['unk']
                  for i in input_words]
        processed_inputs.append(vectors)
        labels.append(gold_label)

    # Pad sequences in the batch to same length
    max_len = max(len(seq) for seq in processed_inputs)
    padded_inputs = [seq + [word_embedding['unk']] * (max_len - len(seq)) for seq in processed_inputs]

    # Convert to tensors and move to GPU
    inputs_tensor = torch.tensor(padded_inputs, dtype=torch.float32).to(device)
    labels_tensor = torch.tensor(labels, dtype=torch.long).to(device)

    # Reshape for RNN input (seq_len, batch_size, input_dim)
    inputs_tensor = inputs_tensor.permute(1, 0, 2)

    return inputs_tensor, labels_tensor

def train_and_evaluate(hidden_dim, num_epochs, train_data, valid_data, test_data, word_embedding):
    model = RNN(50, hidden_dim)
    optimizer = optim.Adam(model.parameters(), lr=0.01)

    training_accuracies = []
    validation_accuracies = []
    training_losses = []

    stopping_condition = False
    epoch = 0
    last_train_accuracy = 0
    last_validation_accuracy = 0
    minibatch_size = 32  # Increased batch size for GPU

    while not stopping_condition and epoch < num_epochs:
        # Training Phase
        model.train()
        random.shuffle(train_data)
        print(f"Training started for epoch {epoch + 1}")

        correct = 0
        total = 0
        loss_total = 0
        loss_count = 0

        # Process data in batches
        for i in tqdm(range(0, len(train_data), minibatch_size)):
            batch_data = train_data[i:i + minibatch_size]
            vectors, labels = process_batch(batch_data, word_embedding)

            optimizer.zero_grad()
            output = model(vectors)
            loss = model.compute_Loss(output, labels)

            loss.backward()
            optimizer.step()

            predicted_labels = torch.argmax(output, dim=1)
            correct += (predicted_labels == labels).sum().item()
            total += len(labels)

            loss_total += loss.item()
            loss_count += 1

        training_accuracy = correct/total
        training_loss = loss_total/loss_count

        # Validation Phase
        model.eval()
        correct = 0
        total = 0
        print(f"Validation started for epoch {epoch + 1}")

        with torch.no_grad():
            for i in tqdm(range(0, len(valid_data), minibatch_size)):
                batch_data = valid_data[i:i + minibatch_size]
                vectors, labels = process_batch(batch_data, word_embedding)

                output = model(vectors)
                predicted_labels = torch.argmax(output, dim=1)
                correct += (predicted_labels == labels).sum().item()
                total += len(labels)

        validation_accuracy = correct/total

        # Store metrics
        training_accuracies.append(training_accuracy)
        validation_accuracies.append(validation_accuracy)
        training_losses.append(training_loss)

        print(f"Epoch {epoch + 1}:")
        print(f"Training accuracy: {training_accuracy:.4f}")
        print(f"Validation accuracy: {validation_accuracy:.4f}")
        print(f"Training loss: {training_loss:.4f}")

        # Early stopping check
        if validation_accuracy < last_validation_accuracy and training_accuracy > last_train_accuracy:
            stopping_condition = True
            print("Training stopped to avoid overfitting!")
            print(f"Best validation accuracy: {last_validation_accuracy:.4f}")
        else:
            last_validation_accuracy = validation_accuracy
            last_train_accuracy = training_accuracy

        epoch += 1

    # Test Phase
    model.eval()
    correct = 0
    total = 0

    with torch.no_grad():
        for i in tqdm(range(0, len(test_data), minibatch_size)):
            batch_data = test_data[i:i + minibatch_size]
            vectors, labels = process_batch(batch_data, word_embedding)

            output = model(vectors)
            predicted_labels = torch.argmax(output, dim=1)
            correct += (predicted_labels == labels).sum().item()
            total += len(labels)

    test_accuracy = correct/total
    print(f"Test accuracy: {test_accuracy:.4f}")

    return training_accuracies, validation_accuracies, training_losses, test_accuracy

if __name__ == "__main__":
    train_data, valid_data, test_data = load_data(
        '/content/drive/MyDrive/NLPA2/training.json',
        '/content/drive/MyDrive/NLPA2/validation.json',
        '/content/drive/MyDrive/NLPA2/test.json'
    )

    # Load word embeddings
    word_embedding = pickle.load(open('/content/drive/MyDrive/NLPA2/word_embedding.pkl', 'rb'))

    # Create results directory
    os.makedirs('results', exist_ok=True)

    hidden_dims = [32] #[16, 32, 64, 128 ,256]
    epochs_list = [10] #[5, 10, 30]
    all_results = []

    for hidden_dim in hidden_dims:
        for num_epochs in epochs_list:
            print(f"\n========== Training model with hidden_dim={hidden_dim}, epochs={num_epochs} ==========")

            train_accs, val_accs, train_losses, test_acc = train_and_evaluate(
                hidden_dim, num_epochs, train_data, valid_data, test_data, word_embedding
            )

            # Save results
            results_df = pd.DataFrame({
                'epoch': range(1, len(train_accs) + 1),
                'training_accuracy': train_accs,
                'validation_accuracy': val_accs,
                'training_loss': train_losses,
                'test_accuracy': [test_acc] * len(train_accs),
                'hidden_dim': [hidden_dim] * len(train_accs),
                'max_epochs': [num_epochs] * len(train_accs)
            })

            filename = f"results/rnn_{num_epochs}_{hidden_dim}.csv"
            results_df.to_csv(filename, index=False)

            all_results.append({
                'hidden_dim': hidden_dim,
                'epochs': num_epochs,
                'final_train_acc': train_accs[-1],
                'final_val_acc': val_accs[-1],
                'final_test_acc': test_acc,
                'best_val_acc': max(val_accs),
                'total_epochs_run': len(train_accs)
            })

    # Save combined results
    combined_results = pd.DataFrame(all_results)
    combined_results.to_csv('results/combined_results.csv', index=False)
    print("\nCombined results saved to results/combined_results.csv")
    print("\nSummary of all experiments:")
    print(combined_results)

Using device: cuda

Training started for epoch 1


100%|██████████| 500/500 [01:06<00:00,  7.52it/s]


Validation started for epoch 1


100%|██████████| 25/25 [00:03<00:00,  7.86it/s]


Epoch 1:
Training accuracy: 0.2011
Validation accuracy: 0.4012
Training loss: 1.6361
Training started for epoch 2


100%|██████████| 500/500 [01:05<00:00,  7.62it/s]


Validation started for epoch 2


100%|██████████| 25/25 [00:03<00:00,  7.80it/s]


Epoch 2:
Training accuracy: 0.1973
Validation accuracy: 0.0163
Training loss: 1.6331
Training started for epoch 3


100%|██████████| 500/500 [01:05<00:00,  7.63it/s]


Validation started for epoch 3


100%|██████████| 25/25 [00:03<00:00,  7.93it/s]


Epoch 3:
Training accuracy: 0.2001
Validation accuracy: 0.4012
Training loss: 1.6323
Training started for epoch 4


100%|██████████| 500/500 [01:04<00:00,  7.72it/s]


Validation started for epoch 4


100%|██████████| 25/25 [00:03<00:00,  7.87it/s]


Epoch 4:
Training accuracy: 0.1984
Validation accuracy: 0.0150
Training loss: 1.6348
Training started for epoch 5


100%|██████████| 500/500 [01:06<00:00,  7.52it/s]


Validation started for epoch 5


100%|██████████| 25/25 [00:03<00:00,  7.82it/s]


Epoch 5:
Training accuracy: 0.1994
Validation accuracy: 0.2087
Training loss: 1.6280
Training started for epoch 6


100%|██████████| 500/500 [01:05<00:00,  7.59it/s]


Validation started for epoch 6


100%|██████████| 25/25 [00:03<00:00,  7.82it/s]


Epoch 6:
Training accuracy: 0.2034
Validation accuracy: 0.0187
Training loss: 1.6317
Training stopped to avoid overfitting!
Best validation accuracy: 0.2087


100%|██████████| 25/25 [00:02<00:00,  9.65it/s]

Test accuracy: 0.3875

Combined results saved to results/combined_results.csv

Summary of all experiments:
   hidden_dim  epochs  final_train_acc  final_val_acc  final_test_acc  \
0          32      10         0.203375        0.01875          0.3875   

   best_val_acc  total_epochs_run  
0       0.40125                 6  



