<a href="https://colab.research.google.com/github/tinayiluo0322/Computer-Engineering-Machine-Learning-and-Deep-Neural-Nets-Projects/blob/main/RNN%20and%20Transformers/LabRNN.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Implement and train a LSTM for sentiment analysis

### Luopeiwen Yi

## Step 0: set up the environment

In [37]:
import functools
import sys
import numpy as np
import pandas as pd
import random
import re
import matplotlib.pyplot as plt
import tqdm
import nltk
from sklearn.model_selection import train_test_split
from nltk.corpus import stopwords
from collections import Counter
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset

nltk.download('stopwords')

torch.backends.cudnn.benchmark = True

import os
os.makedirs("resources", exist_ok=True)

[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


In [38]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [39]:
import sys
import os

In [40]:
# Change this to the absolute path where dataset.py and utils.py are stored
CODE_PATH = "/content/drive/MyDrive/ECE661 Assignment 3"

# Add this path to sys.path so Python can find it
sys.path.append(CODE_PATH)

# Check if Colab can see the files
print("Files in directory:", os.listdir(CODE_PATH))

Files in directory: ['LabRNN.ipynb', 'ECE_661__Homework_3_RNN_and_Transformers.pdf', 'LabLLM.ipynb', 'IMDBDataset.csv', '.DS_Store']


## Hyperparameters

In [41]:
class HyperParams:
    def __init__(self):
        # Constance hyperparameters. They have been tested and don't need to be tuned.
        self.PAD_INDEX = 0
        self.UNK_INDEX = 1
        self.PAD_TOKEN = '<pad>'
        self.UNK_TOKEN = '<unk>'
        self.STOP_WORDS = set(stopwords.words('english'))
        self.MAX_LENGTH = 256
        self.BATCH_SIZE = 96
        self.EMBEDDING_DIM = 1
        self.HIDDEN_DIM = 100
        self.OUTPUT_DIM = 2
        self.N_LAYERS = 1
        self.DROPOUT_RATE = 0.0
        self.LR = 0.001
        self.N_EPOCHS = 5
        self.WD = 0
        self.SEED = 12
        self.BIDIRECTIONAL = False

## Data Loader


In [42]:
def load_imdb(base_csv: str = '/content/drive/MyDrive/ECE661 Assignment 3/IMDBDataset.csv'):
    """
    Load the IMDB dataset
    :param base_csv: the path of the dataset file.
    :return: train, validation and test set.
    """
    # Load dataset
    df = pd.read_csv(base_csv)

    # Ensure the dataset has expected columns
    assert 'review' in df.columns and 'sentiment' in df.columns, "CSV file must have 'review' and 'sentiment' columns"

    # Convert labels to binary (0 for negative, 1 for positive)
    df['sentiment'] = df['sentiment'].map({'negative': 0, 'positive': 1})

    # Shuffle dataset for randomness
    df = df.sample(frac=1, random_state=HyperParams().SEED).reset_index(drop=True)

    # Split dataset: 70% train, 10% validation, 20% test
    train_ratio, valid_ratio = 0.7, 0.1
    train_size = int(len(df) * train_ratio)
    valid_size = int(len(df) * valid_ratio)

    x_train, y_train = df['review'][:train_size], df['sentiment'][:train_size]
    x_valid, y_valid = df['review'][train_size:train_size + valid_size], df['sentiment'][train_size:train_size + valid_size]
    x_test, y_test = df['review'][train_size + valid_size:], df['sentiment'][train_size + valid_size:]

    print(f'Shape of train data: {x_train.shape}')
    print(f'Shape of validation data: {x_valid.shape}')
    print(f'Shape of test data: {x_test.shape}')

    return x_train, x_valid, x_test, y_train, y_valid, y_test

In [43]:
# test the sample and print out
x_train, x_valid, x_test, y_train, y_valid, y_test = load_imdb()
print(x_train.head())
print(y_train.head())

Shape of train data: (35000,)
Shape of validation data: (5000,)
Shape of test data: (10000,)
0    It's hard to tell if Noonan and Marshall are t...
1    Well, where do I start...<br /><br />As one of...
2    "MY WIFE AND KIDS," in my opinion, is an absol...
3    What a surprise. A basic copycat of the comedy...
4    Josef Von Sternberg directs this magnificent s...
Name: review, dtype: object
0    0
1    1
2    1
3    1
4    1
Name: sentiment, dtype: int64


## Build a Vocabulary


In [44]:
def build_vocab(x_train: list, min_freq: int = 5, hparams=None) -> dict:
    """
    Build a vocabulary based on the training corpus.

    :param x_train: List. The training corpus. Each sample in the list is a string of text.
    :param min_freq: Int. The frequency threshold for selecting words.
    :param hparams: HyperParams object containing stopwords and special tokens.
    :return: Dictionary {word: index}
    """
    # Initialize Counter to track word frequencies
    word_freq = Counter()

    # Tokenize and count words while filtering out stop words
    for text in x_train:
        words = re.findall(r'\b\w+\b', text.lower())  # Simple word tokenization
        filtered_words = [word for word in words if word not in hparams.STOP_WORDS]
        word_freq.update(filtered_words)

    # Filter words by frequency threshold
    corpus = {word: freq for word, freq in word_freq.items() if freq >= min_freq}

    # Create vocab dictionary with word indices
    corpus_ = sorted(corpus.keys())  # Sort words for consistency
    vocab = {w: i + 2 for i, w in enumerate(corpus_)}  # Offset indices to reserve space for PAD and UNK tokens

    # Add special tokens
    vocab[hparams.PAD_TOKEN] = hparams.PAD_INDEX
    vocab[hparams.UNK_TOKEN] = hparams.UNK_INDEX

    return vocab

In [45]:
# test the function
hparams = HyperParams()
vocab = build_vocab(x_train, hparams=hparams)
print(vocab)



## Tokenize Function


In [46]:
def tokenize(vocab: dict, example: str) -> list:
    """
    Tokenize the given example string into a list of token indices.

    :param vocab: dict, the vocabulary.
    :param example: a string of text.
    :return: a list of token indices.
    """
    # Tokenize the text (convert to lowercase and extract words)
    words = re.findall(r'\b\w+\b', example.lower())

    # Convert words to indices, using UNK_INDEX if the word is not in vocab
    token_indices = [vocab.get(word, vocab['<unk>']) for word in words]

    return token_indices

In [47]:
# test the function
print(tokenize(vocab, x_train.iloc[0]))

[1, 1, 13661, 1, 30047, 1, 20712, 1, 18657, 1, 31203, 1, 1605, 400, 6818, 18669, 17527, 7319, 17181, 1, 1, 1, 6001, 29967, 1, 33119, 1, 1, 1, 1, 31203, 1, 14940, 1, 10868, 19501, 1, 1, 2588, 21152, 32896, 17235, 1, 1, 33120, 15209, 29119, 22477, 20712, 1, 13998, 33520, 1, 1110, 26461, 1, 18657, 1, 1, 5251, 33043, 18657, 26601, 1, 1, 31203, 1, 1, 7670, 18669, 31355, 1, 8286, 18850, 4672, 6678, 1, 1, 1, 1, 5993, 10442, 24828, 5716, 12816, 16866, 1, 20712, 30344, 1, 1, 1, 1, 1, 12187, 29427, 1, 1, 1, 1, 26601, 1, 30305, 1, 11482, 1, 1821, 1, 17400, 1, 17961, 1, 28624, 1, 1, 10461, 1, 10463, 1, 1, 13952, 1, 26437, 6001, 18855, 1, 1, 1, 1, 1, 1, 1, 1, 2155, 1, 24802, 26588, 1, 1, 1, 30234, 1, 1, 4509, 1, 1, 1, 1, 1, 3761, 1, 1, 9068, 3186, 1, 1, 30366, 33630, 21101, 16147, 5075, 33018, 22517, 1, 1, 27568, 1, 1, 23297, 12877, 1, 1, 30366, 28878, 27136, 26093, 4234, 1, 1, 1, 1, 1, 198, 19455, 1, 1, 30301, 1, 1, 16718, 1, 1, 2155, 1, 30393, 29091, 1, 1, 26437, 1, 1, 1, 28538, 31710, 1, 1, 1, 1

## IMDB class

In [48]:
class IMDB(Dataset):
    def __init__(self, x, y, vocab, max_length=256) -> None:
        """
        :param x: list of reviews
        :param y: list of labels
        :param vocab: vocabulary dictionary {word:index}.
        :param max_length: the maximum sequence length.
        """
        self.x = x
        self.y = y
        self.vocab = vocab
        self.max_length = max_length

    def __getitem__(self, idx: int):
        """
        Return the tokenized review and label by the given index.
        :param idx: index of the sample.
        :return: a dictionary containing three keys: 'ids', 'length', 'label'
                 which represent the list of token ids, the length of the sequence,
                 and the binary label.
        """
        # Get the review text and label
        review = self.x.iloc[idx]  # Pandas Series indexing
        label = self.y.iloc[idx]   # Convert 'positive'/'negative' to binary (1/0)

        # Tokenize the review
        token_ids = tokenize(self.vocab, review)

        # Truncate if longer than max_length, otherwise keep as is
        token_ids = token_ids[:self.max_length]

        # Store the length before padding
        length = len(token_ids)

        # Return dictionary containing the required keys
        return {
            'ids': token_ids,
            'length': length,
            'label': label
        }

    def __len__(self) -> int:
        return len(self.x)

def collate(batch, pad_index):
    batch_ids = [torch.LongTensor(i['ids']) for i in batch]
    batch_ids = nn.utils.rnn.pad_sequence(batch_ids, padding_value=pad_index, batch_first=True)
    batch_length = torch.Tensor([i['length'] for i in batch])
    batch_label = torch.LongTensor([i['label'] for i in batch])
    batch = {'ids': batch_ids, 'length': batch_length, 'label': batch_label}
    return batch

collate_fn = collate

In [49]:
# test the function
train_data = IMDB(x_train, y_train, vocab, hparams.MAX_LENGTH)
print(train_data[0])

{'ids': [1, 1, 13661, 1, 30047, 1, 20712, 1, 18657, 1, 31203, 1, 1605, 400, 6818, 18669, 17527, 7319, 17181, 1, 1, 1, 6001, 29967, 1, 33119, 1, 1, 1, 1, 31203, 1, 14940, 1, 10868, 19501, 1, 1, 2588, 21152, 32896, 17235, 1, 1, 33120, 15209, 29119, 22477, 20712, 1, 13998, 33520, 1, 1110, 26461, 1, 18657, 1, 1, 5251, 33043, 18657, 26601, 1, 1, 31203, 1, 1, 7670, 18669, 31355, 1, 8286, 18850, 4672, 6678, 1, 1, 1, 1, 5993, 10442, 24828, 5716, 12816, 16866, 1, 20712, 30344, 1, 1, 1, 1, 1, 12187, 29427, 1, 1, 1, 1, 26601, 1, 30305, 1, 11482, 1, 1821, 1, 17400, 1, 17961, 1, 28624, 1, 1, 10461, 1, 10463, 1, 1, 13952, 1, 26437, 6001, 18855, 1, 1, 1, 1, 1, 1, 1, 1, 2155, 1, 24802, 26588, 1, 1, 1, 30234, 1, 1, 4509, 1, 1, 1, 1, 1, 3761, 1, 1, 9068, 3186, 1, 1, 30366, 33630, 21101, 16147, 5075, 33018, 22517, 1, 1, 27568, 1, 1, 23297, 12877, 1, 1, 30366, 28878, 27136, 26093, 4234, 1, 1, 1, 1, 1, 198, 19455, 1, 1, 30301, 1, 1, 16718, 1, 1, 2155, 1, 30393, 29091, 1, 1, 26437, 1, 1, 1, 28538, 31710, 1,

## LSTM Model


In [50]:
def init_weights(m):
    if isinstance(m, nn.Embedding):
        nn.init.xavier_normal_(m.weight)
    elif isinstance(m, nn.Linear):
        nn.init.xavier_normal_(m.weight)
        nn.init.zeros_(m.bias)
    elif isinstance(m, nn.LSTM) or isinstance(m, nn.GRU):
        for name, param in m.named_parameters():
            if 'bias' in name:
                nn.init.zeros_(param)
            elif 'weight' in name:
                nn.init.orthogonal_(param)

class LSTM(nn.Module):
    def __init__(
        self,
        vocab_size: int,
        embedding_dim: int,
        hidden_dim: int,
        output_dim: int,
        n_layers: int,
        dropout_rate: float,
        pad_index: int,
        bidirectional: bool,
        **kwargs):
        """
        Create an LSTM model for classification.
        :param vocab_size: size of the vocabulary
        :param embedding_dim: dimension of embeddings
        :param hidden_dim: dimension of hidden features
        :param output_dim: dimension of the output layer which equals to the number of labels.
        :param n_layers: number of layers.
        :param dropout_rate: dropout rate.
        :param pad_index: index of the padding token.
        :param bidirectional: whether to use bidirectional LSTM.
        """
        super().__init__()

        # Embedding layer
        self.embedding = nn.Embedding(
            num_embeddings=vocab_size,
            embedding_dim=embedding_dim,
            padding_idx=pad_index  # Ignores PAD token during training
        )

        # LSTM layer
        self.lstm = nn.LSTM(
            input_size=embedding_dim,
            hidden_size=hidden_dim,
            num_layers=n_layers,
            dropout=dropout_rate if n_layers > 1 else 0,  # Dropout only if more than 1 layer
            batch_first=True,
            bidirectional=bidirectional
        )

        # Fully connected output layer
        self.fc = nn.Linear(hidden_dim * (2 if bidirectional else 1), output_dim)

        # Dropout layer
        self.dropout = nn.Dropout(dropout_rate)

        # Weight initialization (DO NOT CHANGE)
        if "weight_init_fn" not in kwargs:
            self.apply(init_weights)
        else:
            self.apply(kwargs["weight_init_fn"])

    def forward(self, ids: torch.Tensor, length: torch.Tensor):
        """
        Forward pass through the LSTM model.
        :param ids: [batch size, seq len] batch of token ids.
        :param length: [batch size] batch of lengths of token ids.
        :return: prediction of size [batch size, output dim].
        """
        # Pass input through embedding layer
        embedded = self.embedding(ids)  # Shape: [batch_size, seq_len, embedding_dim]

        # Pack the sequences for variable-length handling
        packed_embedded = nn.utils.rnn.pack_padded_sequence(embedded, length.cpu(), batch_first=True, enforce_sorted=False)

        # Pass through LSTM
        packed_output, (hidden, cell) = self.lstm(packed_embedded)

        # If bidirectional, concatenate last forward and backward hidden states
        if self.lstm.bidirectional:
            hidden = torch.cat((hidden[-2,:,:], hidden[-1,:,:]), dim=1)
        else:
            hidden = hidden[-1,:,:]  # Take the last layer's hidden state

        # Apply dropout
        hidden = self.dropout(hidden)

        # Fully connected layer for classification
        prediction = self.fc(hidden)  # Shape: [batch_size, output_dim]

        return prediction

## Training Code

In [51]:
def count_parameters(model):
    return sum(p.numel() for p in model.parameters() if p.requires_grad)


def train(dataloader, model, criterion, optimizer, scheduler, device):
    model.train()
    epoch_losses = []
    epoch_accs = []

    for batch in tqdm.tqdm(dataloader, desc='training...', file=sys.stdout):
        ids = batch['ids'].to(device)
        length = batch['length']
        label = batch['label'].to(device)
        prediction = model(ids, length)
        loss = criterion(prediction, label)
        accuracy = get_accuracy(prediction, label)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        epoch_losses.append(loss.item())
        epoch_accs.append(accuracy.item())
        scheduler.step()

    return epoch_losses, epoch_accs

def evaluate(dataloader, model, criterion, device):
    model.eval()
    epoch_losses = []
    epoch_accs = []

    with torch.no_grad():
        for batch in tqdm.tqdm(dataloader, desc='evaluating...', file=sys.stdout):
            ids = batch['ids'].to(device)
            length = batch['length']
            label = batch['label'].to(device)
            prediction = model(ids, length)
            loss = criterion(prediction, label)
            accuracy = get_accuracy(prediction, label)
            epoch_losses.append(loss.item())
            epoch_accs.append(accuracy.item())

    return epoch_losses, epoch_accs

def get_accuracy(prediction, label):
    batch_size, _ = prediction.shape
    predicted_classes = prediction.argmax(dim=-1)
    correct_predictions = predicted_classes.eq(label).sum()
    accuracy = correct_predictions / batch_size
    return accuracy

def predict_sentiment(text, model, vocab, device, hparams):
    tokens = tokenize(vocab, text)
    ids = [vocab[t] if t in vocab else hparams.UNK_INDEX for t in tokens]  # Use hparams.UNK_INDEX
    length = torch.LongTensor([len(ids)])
    tensor = torch.LongTensor(ids).unsqueeze(dim=0).to(device)
    prediction = model(tensor, length).squeeze(dim=0)
    probability = torch.softmax(prediction, dim=-1)
    predicted_class = prediction.argmax(dim=-1).item()
    predicted_probability = probability[predicted_class].item()
    return predicted_class, predicted_probability

### Learning Rate Warmup

In [52]:
class ConstantWithWarmup(torch.optim.lr_scheduler._LRScheduler):
    def __init__(
        self,
        optimizer,
        num_warmup_steps: int,
    ):
        self.num_warmup_steps = num_warmup_steps
        super().__init__(optimizer)

    def get_lr(self):
        if self._step_count <= self.num_warmup_steps:
            # warmup
            scale = 1.0 - (self.num_warmup_steps - self._step_count) / self.num_warmup_steps
            lr = [base_lr * scale for base_lr in self.base_lrs]
            self.last_lr = lr
        else:
            lr = self.base_lrs
        return lr

### Implement the Training / Validation Iteration

In [53]:
def train_and_test_model_with_hparams(hparams, model_type="lstm", **kwargs):
    # Seeding. DO NOT TOUCH! DO NOT TOUCH hparams.SEED!
    torch.manual_seed(hparams.SEED)
    random.seed(hparams.SEED)
    np.random.seed(hparams.SEED)

    # Load dataset
    x_train, x_valid, x_test, y_train, y_valid, y_test = load_imdb()
    vocab = build_vocab(x_train, hparams=hparams)
    vocab_size = len(vocab)
    print(f'Length of vocabulary is {vocab_size}')

    # Create datasets
    train_data = IMDB(x_train, y_train, vocab, hparams.MAX_LENGTH)
    valid_data = IMDB(x_valid, y_valid, vocab, hparams.MAX_LENGTH)
    test_data = IMDB(x_test, y_test, vocab, hparams.MAX_LENGTH)

    # Create dataloaders
    collate = functools.partial(collate_fn, pad_index=hparams.PAD_INDEX)
    train_dataloader = torch.utils.data.DataLoader(
        train_data, batch_size=hparams.BATCH_SIZE, collate_fn=collate, shuffle=True)
    valid_dataloader = torch.utils.data.DataLoader(
        valid_data, batch_size=hparams.BATCH_SIZE, collate_fn=collate)
    test_dataloader = torch.utils.data.DataLoader(
        test_data, batch_size=hparams.BATCH_SIZE, collate_fn=collate)

    # Initialize Model
    model = LSTM(
            vocab_size,
            hparams.EMBEDDING_DIM,
            hparams.HIDDEN_DIM,
            hparams.OUTPUT_DIM,
            hparams.N_LAYERS,
            hparams.DROPOUT_RATE,
            hparams.PAD_INDEX,
            hparams.BIDIRECTIONAL,
            **kwargs)

    num_params = count_parameters(model)
    print(f'The model has {num_params:,} trainable parameters')

    # Move model to device
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    model = model.to(device)

    # Optimizer and loss function
    optimizer = optim.Adam(model.parameters(), lr=hparams.LR, weight_decay=hparams.WD, eps=1e-6)
    criterion = nn.CrossEntropyLoss().to(device)

    # Warmup Scheduler. DO NOT TOUCH!
    WARMUP_STEPS = 200
    lr_scheduler = ConstantWithWarmup(optimizer, WARMUP_STEPS)

    best_valid_loss = float('inf')
    best_model_state = None  # To save the best model

    for epoch in range(hparams.N_EPOCHS):
        # Training phase
        train_loss, train_acc = train(train_dataloader, model, criterion, optimizer, lr_scheduler, device)

        # Validation phase
        valid_loss, valid_acc = evaluate(valid_dataloader, model, criterion, device)

        epoch_train_loss = np.mean(train_loss)
        epoch_train_acc = np.mean(train_acc)
        epoch_valid_loss = np.mean(valid_loss)
        epoch_valid_acc = np.mean(valid_acc)

        # Save the model that achieves the smallest validation loss.
        if epoch_valid_loss < best_valid_loss:
            best_valid_loss = epoch_valid_loss
            best_model_state = model.state_dict().copy()  # Save model state

        print(f'epoch: {epoch+1}')
        print(f'train_loss: {epoch_train_loss:.3f}, train_acc: {epoch_train_acc:.3f}')
        print(f'valid_loss: {epoch_valid_loss:.3f}, valid_acc: {epoch_valid_acc:.3f}')

    # Load the best model's weights.
    model.load_state_dict(best_model_state)

    # Evaluate test loss on testing dataset (NOT validation)
    test_loss, test_acc = evaluate(test_dataloader, model, criterion, device)
    epoch_test_loss = np.mean(test_loss)
    epoch_test_acc = np.mean(test_acc)

    print(f'test_loss: {epoch_test_loss:.3f}, test_acc: {epoch_test_acc:.3f}')

    # Select one entry from the test set and predict its sentiment
    test_sample_text = x_test.iloc[0]
    predicted_class, predicted_prob = predict_sentiment(test_sample_text, model, vocab, device, hparams)

    # Print prediction results
    print("\nSample Test Review:")
    print(test_sample_text)
    print(f"Predicted Sentiment: {'Positive' if predicted_class == 1 else 'Negative'} ({predicted_prob:.3f})")

    # Free memory
    del model
    torch.cuda.empty_cache()

    return {
        'num_params': num_params,
        "test_loss": epoch_test_loss,
        "test_acc": epoch_test_acc,
    }

### Train LSTM model

Train the model with default hyperparameter settings.

In [54]:
org_hyperparams = HyperParams()
_ = train_and_test_model_with_hparams(org_hyperparams, "lstm_1layer_base_adam_e32_h100")

Shape of train data: (35000,)
Shape of validation data: (5000,)
Shape of test data: (10000,)
Length of vocabulary is 33873
The model has 75,275 trainable parameters
training...: 100%|██████████| 365/365 [00:10<00:00, 33.48it/s]
evaluating...: 100%|██████████| 53/53 [00:01<00:00, 51.66it/s]
epoch: 1
train_loss: 0.691, train_acc: 0.532
valid_loss: 0.678, valid_acc: 0.551
training...: 100%|██████████| 365/365 [00:10<00:00, 33.27it/s]
evaluating...: 100%|██████████| 53/53 [00:01<00:00, 52.15it/s]
epoch: 2
train_loss: 0.642, train_acc: 0.642
valid_loss: 0.610, valid_acc: 0.639
training...: 100%|██████████| 365/365 [00:10<00:00, 33.51it/s]
evaluating...: 100%|██████████| 53/53 [00:01<00:00, 52.50it/s]
epoch: 3
train_loss: 0.613, train_acc: 0.660
valid_loss: 0.591, valid_acc: 0.674
training...: 100%|██████████| 365/365 [00:10<00:00, 33.51it/s]
evaluating...: 100%|██████████| 53/53 [00:01<00:00, 52.69it/s]
epoch: 4
train_loss: 0.416, train_acc: 0.814
valid_loss: 0.365, valid_acc: 0.849
trainin

### **Observations on the Learning Dynamics of Training and Validation Loss**

#### **1. Steady and Consistent Decrease in Loss Values**
- **Training loss consistently decreases** from **0.691 → 0.254**, showing that the model is learning effectively.
- **Validation loss also decreases** from **0.678 → 0.330**, indicating that the model is generalizing well.
- **Accuracy steadily improves** from **53.2% to 89.8% (train)** and **55.1% to 86.8% (validation)**.

#### **2. Key Learning Phases**
- **Epoch 1-3**: The model is still learning basic sentiment features, and both training/validation loss decrease moderately.
- **Epoch 4-5**: The model significantly improves, achieving a much lower loss and higher accuracy.

**This suggests that the model is training correctly and converging well.**  
**No signs of overfitting**—validation accuracy remains close to training accuracy.

---

### **Test Performance Analysis**
- **Test loss**: **0.333**, which is close to the validation loss (**0.330**).
- **Test accuracy**: **86.9%**, consistent with the validation accuracy (**86.8%**).
- **Conclusion**: The model generalizes well on unseen test data.

---

### **Prediction on First Test Review**
#### **Actual Review Text:**
*"I first saw this in the movie theater when it came out, and the crowd was really into the movie which made the experience all the more fun. This is a great cast of characters, many big names in it, a few of which were not as recognized then as they are now. I think it's a great idea if you follow any of these actors, or have loved them in other movies, to add it to your watched list. Some of the scenes actually remind me of the type of well-done comedy as in The Birdcage or even The Clue, kind of odd spontaneous-appearing comedy, with some really professional delivery from these beloved actors. The movie did a great job at giving you some insight, perhaps even very realistic, into the culture of a daytime soap."*

**Expected Sentiment: Positive**  
**Predicted Sentiment: Negative (0.716 confidence)**  

---

### **Why Did the Model Get It Wrong?**
1. **Review Contains Positive Language**  
   - Words like **"great," "fun," "beloved actors," "well-done comedy," "professional delivery"** suggest a positive review.

2. **Possible Explanation for Misclassification**  
   - The review is **long and complex**, containing **many neutral descriptions**.
   - The model **might have focused on words like "soap" and "daytime"**, which could be **associated with negative reviews** in the training data.
   - If the model **wasn't trained on similar long reviews**, it might struggle with **contextually nuanced sentiment**.

---

### **Ways to Improve the Model**
1. **Train on More Diverse Examples**  
   - Ensure the dataset contains **long, descriptive reviews** with clear sentiment labels.

2. **Use Pretrained Word Embeddings (e.g., GloVe, Word2Vec)**  
   - Instead of randomly initialized embeddings, using **pretrained embeddings** might improve word understanding.

3. **Increase Model Complexity (e.g., BiLSTM, Attention Mechanism)**  
   - A **Bidirectional LSTM (BiLSTM)** could improve how the model captures long-range dependencies.
   - **Attention mechanisms** could help **focus on sentiment-heavy words** rather than treating all words equally.
