In [None]:

# IMPORTANT: RUN THIS CELL IN ORDER TO IMPORT YOUR KAGGLE DATA SOURCES
# TO THE CORRECT LOCATION (/kaggle/input) IN YOUR NOTEBOOK,
# THEN FEEL FREE TO DELETE THIS CELL.
# NOTE: THIS NOTEBOOK ENVIRONMENT DIFFERS FROM KAGGLE'S PYTHON
# ENVIRONMENT SO THERE MAY BE MISSING LIBRARIES USED BY YOUR
# NOTEBOOK.

import os
import sys
from tempfile import NamedTemporaryFile
from urllib.request import urlopen
from urllib.parse import unquote, urlparse
from urllib.error import HTTPError
from zipfile import ZipFile
import tarfile
import shutil

CHUNK_SIZE = 40960
DATA_SOURCE_MAPPING = 'glove-6b:https%3A%2F%2Fstorage.googleapis.com%2Fkaggle-data-sets%2F5624978%2F9291509%2Fbundle%2Farchive.zip%3FX-Goog-Algorithm%3DGOOG4-RSA-SHA256%26X-Goog-Credential%3Dgcp-kaggle-com%2540kaggle-161607.iam.gserviceaccount.com%252F20240901%252Fauto%252Fstorage%252Fgoog4_request%26X-Goog-Date%3D20240901T083220Z%26X-Goog-Expires%3D259200%26X-Goog-SignedHeaders%3Dhost%26X-Goog-Signature%3Da4b2be8990b8c605aa299392f6c0627c7c3b450d92714c7e6f71dc1e7cee28bb3275f6366e7f572b9e552ab63d419aebc0ff0262e9b2bf581994d4175bc83cc78affd214742e0c9b25fdd7c17c6f2402e31c74d7fd777f34f4efb99e8ec742e33da58e39ad843ac09f77372278396978ca543f02d48187c92e9c69774ee01956eb51473b1edfdd3fa81d2f4ba207c34e67e5650139ead97470b954e580f2b53264114145ebf0f624241c4d389a793d97dcc4a57c2ee2358f9af7d3b8fdcc4afb4b1037cb79e28b47df70c94bbcb13b01db8e2b8b01da96808598e2e915e0f0a804d41cc74f9150b6e68250383918ec341ad39f2b8023301df9bdb7f4f4209bf4,imdbdataset:https%3A%2F%2Fstorage.googleapis.com%2Fkaggle-data-sets%2F5625457%2F9292186%2Fbundle%2Farchive.zip%3FX-Goog-Algorithm%3DGOOG4-RSA-SHA256%26X-Goog-Credential%3Dgcp-kaggle-com%2540kaggle-161607.iam.gserviceaccount.com%252F20240901%252Fauto%252Fstorage%252Fgoog4_request%26X-Goog-Date%3D20240901T083220Z%26X-Goog-Expires%3D259200%26X-Goog-SignedHeaders%3Dhost%26X-Goog-Signature%3Dc5750c32bb54cd3b9eb1a708c3081258d0789aee56a69b2d5a6f0794a630eff09ca86784b21c01de7dd513137ab3cb528d39c35f92118907ba5bb058ff362d0919237ee0386881e88dcbbb041cd5af70dfb435aafd9a9c791a2df6fee22ae1ad47579bb0718d13c499fb8049061876e8eadf1ab391c834a5c28989a0fa75fd1a1a5e4e09d4257a7960e872ff8e4928235db65b09cbb504e0f32ffe65e29eafb0fbbaa75ccd7c6a644055f0cfe5d3faca0b30d9b39ddf63bfd42767cb2d8bd2db416078b709edb615b0783e374c105c20042ae99da6f43b0a9dcaa84c94cf06e9930d77b2841dc36a0551e8030c2fca2b9ad655a510dcb6ff6490cfccc8626a99'

KAGGLE_INPUT_PATH='/kaggle/input'
KAGGLE_WORKING_PATH='/kaggle/working'
KAGGLE_SYMLINK='kaggle'

!umount /kaggle/input/ 2> /dev/null
shutil.rmtree('/kaggle/input', ignore_errors=True)
os.makedirs(KAGGLE_INPUT_PATH, 0o777, exist_ok=True)
os.makedirs(KAGGLE_WORKING_PATH, 0o777, exist_ok=True)

try:
  os.symlink(KAGGLE_INPUT_PATH, os.path.join("..", 'input'), target_is_directory=True)
except FileExistsError:
  pass
try:
  os.symlink(KAGGLE_WORKING_PATH, os.path.join("..", 'working'), target_is_directory=True)
except FileExistsError:
  pass

for data_source_mapping in DATA_SOURCE_MAPPING.split(','):
    directory, download_url_encoded = data_source_mapping.split(':')
    download_url = unquote(download_url_encoded)
    filename = urlparse(download_url).path
    destination_path = os.path.join(KAGGLE_INPUT_PATH, directory)
    try:
        with urlopen(download_url) as fileres, NamedTemporaryFile() as tfile:
            total_length = fileres.headers['content-length']
            print(f'Downloading {directory}, {total_length} bytes compressed')
            dl = 0
            data = fileres.read(CHUNK_SIZE)
            while len(data) > 0:
                dl += len(data)
                tfile.write(data)
                done = int(50 * dl / int(total_length))
                sys.stdout.write(f"\r[{'=' * done}{' ' * (50-done)}] {dl} bytes downloaded")
                sys.stdout.flush()
                data = fileres.read(CHUNK_SIZE)
            if filename.endswith('.zip'):
              with ZipFile(tfile) as zfile:
                zfile.extractall(destination_path)
            else:
              with tarfile.open(tfile.name) as tarfile:
                tarfile.extractall(destination_path)
            print(f'\nDownloaded and uncompressed: {directory}')
    except HTTPError as e:
        print(f'Failed to load (likely expired) {download_url} to path {destination_path}')
        continue
    except OSError as e:
        print(f'Failed to load {download_url} to path {destination_path}')
        continue

print('Data source import complete.')


In [None]:
# Install necessary packages if not already installed
# !pip install torch torchvision torchtext pandas numpy scikit-learn nltk

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader

import pandas as pd
import numpy as np
import nltk
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report

import re
from tqdm import tqdm
tqdm.pandas()

nltk.download('punkt')
nltk.download('stopwords')


[nltk_data] Downloading package punkt to /usr/share/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package stopwords to /usr/share/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


True

In [None]:
# Assuming the dataset is downloaded and available locally
df = pd.read_csv('/kaggle/input/imdbdataset/IMDB Dataset.csv')
df.head()


Unnamed: 0,review,sentiment
0,One of the other reviewers has mentioned that ...,positive
1,A wonderful little production. <br /><br />The...,positive
2,I thought this was a wonderful way to spend ti...,positive
3,Basically there's a family where a little boy ...,negative
4,"Petter Mattei's ""Love in the Time of Money"" is...",positive


In [None]:
# Initialize stopwords
stop_words = set(stopwords.words('english'))

def preprocess_text(text):
    # Lowercase
    text = text.lower()
    # Remove HTML tags
    text = re.sub(r'<.*?>', '', text)
    # Remove special characters and numbers
    text = re.sub(r'[^a-zA-Z\s]', '', text)
    # Tokenize
    tokens = word_tokenize(text)
    # Remove stopwords
    tokens = [word for word in tokens if word not in stop_words]
    return tokens

# Apply preprocessing
df['clean_review'] = df['review'].progress_apply(preprocess_text)
df.head()


100%|██████████| 50000/50000 [01:30<00:00, 554.49it/s]


Unnamed: 0,review,sentiment,clean_review
0,One of the other reviewers has mentioned that ...,positive,"[one, reviewers, mentioned, watching, oz, epis..."
1,A wonderful little production. <br /><br />The...,positive,"[wonderful, little, production, filming, techn..."
2,I thought this was a wonderful way to spend ti...,positive,"[thought, wonderful, way, spend, time, hot, su..."
3,Basically there's a family where a little boy ...,negative,"[basically, theres, family, little, boy, jake,..."
4,"Petter Mattei's ""Love in the Time of Money"" is...",positive,"[petter, matteis, love, time, money, visually,..."


In [None]:
label_dict = {'positive': 1, 'negative': 0}
df['label'] = df['sentiment'].map(label_dict)
df.head()


Unnamed: 0,review,sentiment,clean_review,label
0,One of the other reviewers has mentioned that ...,positive,"[one, reviewers, mentioned, watching, oz, epis...",1
1,A wonderful little production. <br /><br />The...,positive,"[wonderful, little, production, filming, techn...",1
2,I thought this was a wonderful way to spend ti...,positive,"[thought, wonderful, way, spend, time, hot, su...",1
3,Basically there's a family where a little boy ...,negative,"[basically, theres, family, little, boy, jake,...",0
4,"Petter Mattei's ""Love in the Time of Money"" is...",positive,"[petter, matteis, love, time, money, visually,...",1


In [None]:
X = df['clean_review']
y = df['label']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [None]:
from collections import Counter

# Build vocabulary
all_words = [word for tokens in X_train for word in tokens]
word_counts = Counter(all_words)
vocab = sorted(word_counts, key=word_counts.get, reverse=True)
vocab_size = len(vocab)

print(f"Vocab size: {vocab_size}")

# Word to index mapping
word_to_idx = {word: idx+2 for idx, word in enumerate(vocab)}
word_to_idx["<PAD>"] = 0
word_to_idx["<UNK>"] = 1

# Index to word mapping
idx_to_word = {idx: word for word, idx in word_to_idx.items()}


Vocab size: 186211


In [None]:
def encode_text(text):
    return [word_to_idx.get(word, word_to_idx["<UNK>"]) for word in text]

# Encode training and testing data
X_train_enc = X_train.apply(encode_text)
X_test_enc = X_test.apply(encode_text)

In [None]:
from torch.nn.utils.rnn import pad_sequence

In [None]:
def pad_sequences(sequences, max_len):
    sequences = [torch.tensor(seq[:max_len]) for seq in sequences]
    sequences_padded = pad_sequence(sequences, batch_first=True, padding_value=word_to_idx["<PAD>"])
    return sequences_padded

max_len = 200  # Define max length

X_train_padded = pad_sequences(X_train_enc, max_len)
X_test_padded = pad_sequences(X_test_enc, max_len)

# Convert labels to tensors
y_train_tensor = torch.tensor(y_train.values)
y_test_tensor = torch.tensor(y_test.values)


In [None]:
train_dataset = torch.utils.data.TensorDataset(X_train_padded, y_train_tensor)
test_dataset = torch.utils.data.TensorDataset(X_test_padded, y_test_tensor)

batch_size = 64

train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=batch_size)


In [None]:
embedding_dim = 100
glove_path = '/kaggle/input/glove-6b/glove.6B.100d.txt'

# Load GloVe embeddings
embeddings_index = {}
with open(glove_path, encoding='utf8') as f:
    for line in tqdm(f, desc="Loading GloVe"):
        values = line.strip().split()
        word = values[0]
        coeffs = np.asarray(values[1:], dtype='float32')
        embeddings_index[word] = coeffs

print(f"Loaded {len(embeddings_index)} word vectors.")

Loading GloVe: 400000it [00:12, 31667.74it/s]

Loaded 400000 word vectors.





In [None]:
embedding_matrix = np.zeros((vocab_size + 2, embedding_dim))

In [None]:
for word, idx in word_to_idx.items():
    if word in embeddings_index:
        embedding_matrix[idx] = embeddings_index[word]
    else:
        # Initialize random embedding for words not in GloVe
        embedding_matrix[idx] = np.random.normal(scale=0.6, size=(embedding_dim,))

In [None]:
# Convert to tensor
embedding_matrix = torch.tensor(embedding_matrix, dtype=torch.float32)
print(f"Embedding matrix shape: {embedding_matrix.shape}")


Embedding matrix shape: torch.Size([186213, 100])


In [None]:
class RNNClassifier(nn.Module):
    def __init__(self, embedding_matrix, hidden_dim, output_dim, n_layers, drop_prob=0.5):
        super(RNNClassifier, self).__init__()
        vocab_size, embedding_dim = embedding_matrix.shape

        self.embedding = nn.Embedding.from_pretrained(embedding_matrix, freeze=False)
        self.rnn = nn.RNN(embedding_dim, hidden_dim, num_layers=n_layers, batch_first=True, dropout=drop_prob)
        self.fc = nn.Linear(hidden_dim, output_dim)
        self.dropout = nn.Dropout(drop_prob)

    def forward(self, x):
        embeds = self.embedding(x)
        rnn_out, hidden = self.rnn(embeds)
        out = self.dropout(hidden[-1])
        out = self.fc(out)
        return out


In [None]:
class LSTMClassifier(nn.Module):
    def __init__(self, embedding_matrix, hidden_dim, output_dim, n_layers, drop_prob=0.5):
        super(LSTMClassifier, self).__init__()
        vocab_size, embedding_dim = embedding_matrix.shape

        self.embedding = nn.Embedding.from_pretrained(embedding_matrix, freeze=False)
        self.lstm = nn.LSTM(embedding_dim, hidden_dim, num_layers=n_layers, batch_first=True, dropout=drop_prob)
        self.fc = nn.Linear(hidden_dim, output_dim)
        self.dropout = nn.Dropout(drop_prob)

    def forward(self, x):
        embeds = self.embedding(x)
        lstm_out, (hidden, cell) = self.lstm(embeds)
        out = self.dropout(hidden[-1])
        out = self.fc(out)
        return out


In [None]:
def train_model(model, train_loader, valid_loader, criterion, optimizer, epochs, device):
    model.to(device)
    best_valid_loss = float('inf')

    for epoch in range(epochs):
        model.train()
        epoch_loss = 0
        epoch_acc = 0

        for texts, labels in tqdm(train_loader, desc=f"Epoch {epoch+1}/{epochs}"):
            texts = texts.to(device)
            labels = labels.to(device)

            optimizer.zero_grad()
            predictions = model(texts)
            loss = criterion(predictions.squeeze(), labels.float())
            preds = torch.round(torch.sigmoid(predictions.squeeze()))
            acc = (preds == labels).float().mean()

            loss.backward()
            optimizer.step()

            epoch_loss += loss.item()
            epoch_acc += acc.item()

        valid_loss, valid_acc = evaluate_model(model, valid_loader, criterion, device)

        print(f'\nEpoch {epoch+1}/{epochs}')
        print(f'Train Loss: {epoch_loss/len(train_loader):.3f} | Train Acc: {epoch_acc/len(train_loader):.3f}')
        print(f'Val. Loss: {valid_loss:.3f} |  Val. Acc: {valid_acc:.3f}')

    return model


In [None]:
def evaluate_model(model, data_loader, criterion, device):
    model.eval()
    epoch_loss = 0
    epoch_acc = 0

    with torch.no_grad():
        for texts, labels in data_loader:
            texts = texts.to(device)
            labels = labels.to(device)

            predictions = model(texts)
            loss = criterion(predictions.squeeze(), labels.float())
            preds = torch.round(torch.sigmoid(predictions.squeeze()))
            acc = (preds == labels).float().mean()

            epoch_loss += loss.item()
            epoch_acc += acc.item()

    return epoch_loss / len(data_loader), epoch_acc / len(data_loader)


In [None]:
# Set parameters
hidden_dim = 128
output_dim = 1
n_layers = 2
epochs = 5
learning_rate = 0.001

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Initialize model, criterion and optimizer
rnn_model = RNNClassifier(embedding_matrix, hidden_dim, output_dim, n_layers)
criterion = nn.BCEWithLogitsLoss()
optimizer = optim.Adam(rnn_model.parameters(), lr=learning_rate)

# Train the model
trained_rnn_model = train_model(rnn_model, train_loader, test_loader, criterion, optimizer, epochs, device)


Epoch 1/5: 100%|██████████| 625/625 [00:04<00:00, 126.51it/s]



Epoch 1/5
Train Loss: 0.698 | Train Acc: 0.503
Val. Loss: 0.693 |  Val. Acc: 0.505


Epoch 2/5: 100%|██████████| 625/625 [00:04<00:00, 148.66it/s]



Epoch 2/5
Train Loss: 0.694 | Train Acc: 0.509
Val. Loss: 0.693 |  Val. Acc: 0.504


Epoch 3/5: 100%|██████████| 625/625 [00:04<00:00, 149.11it/s]



Epoch 3/5
Train Loss: 0.691 | Train Acc: 0.517
Val. Loss: 0.695 |  Val. Acc: 0.502


Epoch 4/5: 100%|██████████| 625/625 [00:04<00:00, 149.13it/s]



Epoch 4/5
Train Loss: 0.694 | Train Acc: 0.514
Val. Loss: 0.692 |  Val. Acc: 0.522


Epoch 5/5: 100%|██████████| 625/625 [00:04<00:00, 149.56it/s]



Epoch 5/5
Train Loss: 0.694 | Train Acc: 0.515
Val. Loss: 0.693 |  Val. Acc: 0.518


In [None]:
# Initialize model, criterion and optimizer
lstm_model = LSTMClassifier(embedding_matrix, hidden_dim, output_dim, n_layers)
criterion = nn.BCEWithLogitsLoss()
optimizer = optim.Adam(lstm_model.parameters(), lr=learning_rate)

# Train the model
trained_lstm_model = train_model(lstm_model, train_loader, test_loader, criterion, optimizer, epochs, device)


Epoch 1/5: 100%|██████████| 625/625 [00:10<00:00, 58.30it/s]



Epoch 1/5
Train Loss: 0.693 | Train Acc: 0.506
Val. Loss: 0.693 |  Val. Acc: 0.506


Epoch 2/5: 100%|██████████| 625/625 [00:10<00:00, 58.34it/s]



Epoch 2/5
Train Loss: 0.685 | Train Acc: 0.529
Val. Loss: 0.687 |  Val. Acc: 0.529


Epoch 3/5: 100%|██████████| 625/625 [00:10<00:00, 58.48it/s]



Epoch 3/5
Train Loss: 0.603 | Train Acc: 0.655
Val. Loss: 0.544 |  Val. Acc: 0.731


Epoch 4/5: 100%|██████████| 625/625 [00:10<00:00, 58.48it/s]



Epoch 4/5
Train Loss: 0.402 | Train Acc: 0.839
Val. Loss: 0.441 |  Val. Acc: 0.788


Epoch 5/5: 100%|██████████| 625/625 [00:10<00:00, 58.57it/s]



Epoch 5/5
Train Loss: 0.258 | Train Acc: 0.898
Val. Loss: 0.441 |  Val. Acc: 0.819


In [None]:
def test_model(model, data_loader, device):
    model.eval()
    all_preds = []
    all_labels = []

    with torch.no_grad():
        for texts, labels in data_loader:
            texts = texts.to(device)
            labels = labels.to(device)

            predictions = model(texts)
            preds = torch.round(torch.sigmoid(predictions.squeeze()))

            all_preds.extend(preds.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())

    acc = accuracy_score(all_labels, all_preds)
    report = classification_report(all_labels, all_preds, target_names=['Negative', 'Positive'])

    print(f'Accuracy: {acc:.3f}')
    print(report)


In [None]:
print("RNN Model Evaluation:")
test_model(trained_rnn_model, test_loader, device)

print("LSTM Model Evaluation:")
test_model(trained_lstm_model, test_loader, device)


RNN Model Evaluation:
Accuracy: 0.518
              precision    recall  f1-score   support

    Negative       0.51      0.65      0.57      4961
    Positive       0.53      0.39      0.45      5039

    accuracy                           0.52     10000
   macro avg       0.52      0.52      0.51     10000
weighted avg       0.52      0.52      0.51     10000

LSTM Model Evaluation:
Accuracy: 0.819
              precision    recall  f1-score   support

    Negative       0.96      0.66      0.78      4961
    Positive       0.74      0.97      0.84      5039

    accuracy                           0.82     10000
   macro avg       0.85      0.82      0.81     10000
weighted avg       0.85      0.82      0.81     10000



In [None]:
class RNNClassifierOnTheFly(nn.Module):
    def __init__(self, vocab_size, embedding_dim, hidden_dim, output_dim, n_layers, drop_prob=0.5):
        super(RNNClassifierOnTheFly, self).__init__()

        self.embedding = nn.Embedding(vocab_size, embedding_dim)
        self.rnn = nn.RNN(embedding_dim, hidden_dim, num_layers=n_layers, batch_first=True, dropout=drop_prob)
        self.fc = nn.Linear(hidden_dim, output_dim)
        self.dropout = nn.Dropout(drop_prob)

    def forward(self, x):
        embeds = self.embedding(x)
        rnn_out, hidden = self.rnn(embeds)
        out = self.dropout(hidden[-1])
        out = self.fc(out)
        return out


In [None]:
class LSTMClassifierOnTheFly(nn.Module):
    def __init__(self, vocab_size, embedding_dim, hidden_dim, output_dim, n_layers, drop_prob=0.5):
        super(LSTMClassifierOnTheFly, self).__init__()

        self.embedding = nn.Embedding(vocab_size, embedding_dim)
        self.lstm = nn.LSTM(embedding_dim, hidden_dim, num_layers=n_layers, batch_first=True, dropout=drop_prob)
        self.fc = nn.Linear(hidden_dim, output_dim)
        self.dropout = nn.Dropout(drop_prob)

    def forward(self, x):
        embeds = self.embedding(x)
        lstm_out, (hidden, cell) = self.lstm(embeds)
        out = self.dropout(hidden[-1])
        out = self.fc(out)
        return out


In [None]:
# Set parameters
embedding_dim = 100  # Same as GloVe
hidden_dim = 128
output_dim = 1
n_layers = 2
epochs = 5
learning_rate = 0.001

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Initialize model, criterion, and optimizer
rnn_model_otf = RNNClassifierOnTheFly(len(word_to_idx), embedding_dim, hidden_dim, output_dim, n_layers)
criterion = nn.BCEWithLogitsLoss()
optimizer = optim.Adam(rnn_model_otf.parameters(), lr=learning_rate)

# Train the model
trained_rnn_model_otf = train_model(rnn_model_otf, train_loader, test_loader, criterion, optimizer, epochs, device)


Epoch 1/5: 100%|██████████| 625/625 [00:04<00:00, 141.57it/s]



Epoch 1/5
Train Loss: 0.699 | Train Acc: 0.505
Val. Loss: 0.693 |  Val. Acc: 0.501


Epoch 2/5: 100%|██████████| 625/625 [00:04<00:00, 139.08it/s]



Epoch 2/5
Train Loss: 0.694 | Train Acc: 0.508
Val. Loss: 0.694 |  Val. Acc: 0.501


Epoch 3/5: 100%|██████████| 625/625 [00:04<00:00, 139.67it/s]



Epoch 3/5
Train Loss: 0.693 | Train Acc: 0.514
Val. Loss: 0.696 |  Val. Acc: 0.502


Epoch 4/5: 100%|██████████| 625/625 [00:04<00:00, 139.08it/s]



Epoch 4/5
Train Loss: 0.694 | Train Acc: 0.507
Val. Loss: 0.694 |  Val. Acc: 0.498


Epoch 5/5: 100%|██████████| 625/625 [00:04<00:00, 140.15it/s]



Epoch 5/5
Train Loss: 0.696 | Train Acc: 0.500
Val. Loss: 0.693 |  Val. Acc: 0.504


In [None]:
# Initialize model, criterion, and optimizer
lstm_model_otf = LSTMClassifierOnTheFly(len(word_to_idx), embedding_dim, hidden_dim, output_dim, n_layers)
criterion = nn.BCEWithLogitsLoss()
optimizer = optim.Adam(lstm_model_otf.parameters(), lr=learning_rate)

# Train the model
trained_lstm_model_otf = train_model(lstm_model_otf, train_loader, test_loader, criterion, optimizer, epochs, device)


Epoch 1/5: 100%|██████████| 625/625 [00:10<00:00, 58.33it/s]



Epoch 1/5
Train Loss: 0.693 | Train Acc: 0.504
Val. Loss: 0.688 |  Val. Acc: 0.518


Epoch 2/5: 100%|██████████| 625/625 [00:10<00:00, 58.01it/s]



Epoch 2/5
Train Loss: 0.680 | Train Acc: 0.553
Val. Loss: 0.686 |  Val. Acc: 0.528


Epoch 3/5: 100%|██████████| 625/625 [00:10<00:00, 58.06it/s]



Epoch 3/5
Train Loss: 0.667 | Train Acc: 0.571
Val. Loss: 0.681 |  Val. Acc: 0.548


Epoch 4/5: 100%|██████████| 625/625 [00:10<00:00, 58.25it/s]



Epoch 4/5
Train Loss: 0.561 | Train Acc: 0.724
Val. Loss: 0.508 |  Val. Acc: 0.780


Epoch 5/5: 100%|██████████| 625/625 [00:10<00:00, 58.14it/s]



Epoch 5/5
Train Loss: 0.436 | Train Acc: 0.808
Val. Loss: 0.436 |  Val. Acc: 0.818


In [None]:
print("RNN Model with On-the-Fly Embeddings Evaluation:")
test_model(trained_rnn_model_otf, test_loader, device)

print("LSTM Model with On-the-Fly Embeddings Evaluation:")
test_model(trained_lstm_model_otf, test_loader, device)


RNN Model with On-the-Fly Embeddings Evaluation:
Accuracy: 0.504
              precision    recall  f1-score   support

    Negative       0.48      0.01      0.02      4961
    Positive       0.50      0.99      0.67      5039

    accuracy                           0.50     10000
   macro avg       0.49      0.50      0.34     10000
weighted avg       0.49      0.50      0.35     10000

LSTM Model with On-the-Fly Embeddings Evaluation:
Accuracy: 0.818
              precision    recall  f1-score   support

    Negative       0.81      0.83      0.82      4961
    Positive       0.83      0.80      0.82      5039

    accuracy                           0.82     10000
   macro avg       0.82      0.82      0.82     10000
weighted avg       0.82      0.82      0.82     10000

