In [25]:
# Testing SST-2 dataset 25K reviews samples 

import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from sklearn.metrics import accuracy_score, classification_report, precision_score, recall_score, f1_score
from tqdm import tqdm
import json
from tokenizers import Tokenizer
import warnings
warnings.filterwarnings("ignore", category=UserWarning, message="The PyTorch API of nested tensors")

# =======================
# Load CONFIG
# =======================
CONFIG = {
    "d_model": 512,
    "nhead": 8,
    "num_layers": 6,
    "dim_feedforward": 2048,
    "batch_size": 16,
    "max_seq_len": 768,
    "device": "cuda" if torch.cuda.is_available() else "cpu",
    "tokenizer_path": "movie_review_tokenizer.json",
    "model_path": "CLS_epoch_62.pt",
    "test_ids": "SST-2/SST_padded_token_ids.pt",
    "test_attention_masks": "SST-2/SST_padded_attention_masks.pt",
    "test_sentiment": "SST-2/SST_sentiment_labels.pt"
}

# =======================
# 1. Classification Model Architecture
# =======================
class HybridClassificationHead(nn.Module):
    def __init__(self, hidden_size=CONFIG["d_model"], num_classes=2):
        super().__init__()
        self.cnn = nn.Conv1d(hidden_size, hidden_size, kernel_size=3, padding=1)
        self.relu = nn.ReLU()
        self.pool = nn.AdaptiveMaxPool1d(1)
        self.classifier = nn.Sequential(
            nn.Dropout(0.3),
            nn.Linear(hidden_size * 2, hidden_size),
            nn.ReLU(),
            nn.Dropout(0.3),
            nn.Linear(hidden_size, num_classes),
        )

    def forward(self, token_embeddings):
        mean_pool = token_embeddings.mean(dim=1)
        cnn_out = self.relu(self.cnn(token_embeddings.transpose(1, 2)))
        max_pool = self.pool(cnn_out).squeeze(-1)
        concat = torch.cat([mean_pool, max_pool], dim=-1)
        return self.classifier(concat)

class CustomMLM(nn.Module):
    def __init__(self, config):
        super().__init__()
        if "vocab_size" not in config:
            tokenizer = Tokenizer.from_file(config["tokenizer_path"])
            config["vocab_size"] = tokenizer.get_vocab_size()
            
        self.embedding = nn.Embedding(config["vocab_size"], config["d_model"])
        self.pos_encoder = nn.Embedding(config["max_seq_len"], config["d_model"])
        encoder_layer = nn.TransformerEncoderLayer(
            d_model=config["d_model"],
            nhead=config["nhead"],
            dim_feedforward=config["dim_feedforward"],
            batch_first=True,
        )
        self.encoder = nn.TransformerEncoder(encoder_layer, num_layers=config["num_layers"])
        self.config = config

    def forward(self, input_ids, attention_mask):
        seq_len = input_ids.size(1)
        positions = torch.arange(seq_len, device=input_ids.device).unsqueeze(0)
        embeddings = self.embedding(input_ids) + self.pos_encoder(positions)
        embeddings = self.encoder(embeddings, src_key_padding_mask=~attention_mask.bool())
        return embeddings

class SentimentClassifier(nn.Module):
    def __init__(self, mlm_model):
        super().__init__()
        self.mlm = mlm_model
        self.head = HybridClassificationHead()

    def forward(self, input_ids, attention_mask):
        embeddings = self.mlm(input_ids, attention_mask)
        return self.head(embeddings)

# =======================
# 2. Load model and data
# =======================
class SentimentDataset(Dataset):
    def __init__(self, token_ids, attention_mask, labels):
        self.token_ids = token_ids
        self.attention_mask = attention_mask
        self.labels = labels

    def __len__(self):
        return len(self.labels)

    def __getitem__(self, idx):
        return {
            "input_ids": self.token_ids[idx].long(),
            "attention_mask": self.attention_mask[idx],
            "labels": self.labels[idx].clone().detach().long(),
        }

# Loading model
mlm_model = CustomMLM(CONFIG).to(CONFIG["device"])
model = SentimentClassifier(mlm_model).to(CONFIG["device"])
model.load_state_dict(torch.load(CONFIG["model_path"], map_location=torch.device(CONFIG["device"])))
model.eval()

# Loading test dataset
test_dataset = SentimentDataset(
    torch.load(CONFIG["test_ids"]),  
    torch.load(CONFIG["test_attention_masks"]),
    torch.load(CONFIG["test_sentiment"])
)
test_loader = DataLoader(test_dataset, batch_size=CONFIG["batch_size"])

# =======================
# 3. Evaluation
# =======================
def evaluate_model(model, test_loader):
    model.eval()
    all_preds = []
    all_labels = []
    
    with torch.no_grad():
        for batch in tqdm(test_loader, desc="Evaluating"):
            input_ids = batch["input_ids"].to(CONFIG["device"])
            attention_mask = batch["attention_mask"].to(CONFIG["device"])
            labels = batch["labels"].to(CONFIG["device"])
            
            logits = model(input_ids, attention_mask)
            preds = torch.argmax(logits, dim=-1)
            
            all_preds.extend(preds.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())
    
    # Calculate metrics
    accuracy = accuracy_score(all_labels, all_preds)
    precision = precision_score(all_labels, all_preds, average='weighted')
    recall = recall_score(all_labels, all_preds, average='weighted')
    f1 = f1_score(all_labels, all_preds, average='weighted')
    
    return {
        "accuracy": accuracy,
        "precision": precision,
        "recall": recall,
        "f1": f1
    }

# Evaluate
metrics = evaluate_model(model, test_loader)
print("\nTest Metrics:")
print(f"Accuracy: {metrics['accuracy']:.4f}")
print(f"Precision: {metrics['precision']:.4f}")
print(f"Recall: {metrics['recall']:.4f}")
print(f"F1 Score: {metrics['f1']:.4f}")


Evaluating: 100%|████████████████████████████████████████████████████████████████| 1563/1563 [3:53:01<00:00,  8.95s/it]


Test Metrics:
Accuracy: 0.4450
Precision: 0.1980
Recall: 0.4450
F1 Score: 0.2741



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [12]:
# Testing IMDb dataset 25K reviews

import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from sklearn.metrics import accuracy_score, classification_report, precision_score, recall_score, f1_score
from tqdm import tqdm
import json
from tokenizers import Tokenizer
import warnings
warnings.filterwarnings("ignore", category=UserWarning, message="The PyTorch API of nested tensors")

# =======================
# Load CONFIG
# =======================
CONFIG = {
    "d_model": 512,
    "nhead": 8,
    "num_layers": 6,
    "dim_feedforward": 2048,
    "batch_size": 16,
    "max_seq_len": 768,
    "device": "cuda" if torch.cuda.is_available() else "cpu",
    "tokenizer_path": "movie_review_tokenizer.json",
    "model_path": "epoch_62.pt",
    "test_ids": "IMDb/padded_token_ids_test.pt",
    "test_attention_masks": "IMDb/padded_attention_masks_test.pt",
    "test_sentiment": "IMDb/sentiment_labels_test.pt"
}

# =======================
# 1. Classification Model Architecture
# =======================
class HybridClassificationHead(nn.Module):
    def __init__(self, hidden_size=CONFIG["d_model"], num_classes=2):
        super().__init__()
        self.cnn = nn.Conv1d(hidden_size, hidden_size, kernel_size=3, padding=1)
        self.relu = nn.ReLU()
        self.pool = nn.AdaptiveMaxPool1d(1)
        self.classifier = nn.Sequential(
            nn.Dropout(0.3),
            nn.Linear(hidden_size * 2, hidden_size),
            nn.ReLU(),
            nn.Dropout(0.3),
            nn.Linear(hidden_size, num_classes),
        )

    def forward(self, token_embeddings):
        mean_pool = token_embeddings.mean(dim=1)
        cnn_out = self.relu(self.cnn(token_embeddings.transpose(1, 2)))
        max_pool = self.pool(cnn_out).squeeze(-1)
        concat = torch.cat([mean_pool, max_pool], dim=-1)
        return self.classifier(concat)

class CustomMLM(nn.Module):
    def __init__(self, config):
        super().__init__()
        if "vocab_size" not in config:
            tokenizer = Tokenizer.from_file(config["tokenizer_path"])
            config["vocab_size"] = tokenizer.get_vocab_size()
            
        self.embedding = nn.Embedding(config["vocab_size"], config["d_model"])
        self.pos_encoder = nn.Embedding(config["max_seq_len"], config["d_model"])
        encoder_layer = nn.TransformerEncoderLayer(
            d_model=config["d_model"],
            nhead=config["nhead"],
            dim_feedforward=config["dim_feedforward"],
            batch_first=True,
        )
        self.encoder = nn.TransformerEncoder(encoder_layer, num_layers=config["num_layers"])
        self.config = config

    def forward(self, input_ids, attention_mask):
        seq_len = input_ids.size(1)
        positions = torch.arange(seq_len, device=input_ids.device).unsqueeze(0)
        embeddings = self.embedding(input_ids) + self.pos_encoder(positions)
        embeddings = self.encoder(embeddings, src_key_padding_mask=~attention_mask.bool())
        return embeddings

class SentimentClassifier(nn.Module):
    def __init__(self, mlm_model):
        super().__init__()
        self.mlm = mlm_model
        self.head = HybridClassificationHead()

    def forward(self, input_ids, attention_mask):
        embeddings = self.mlm(input_ids, attention_mask)
        return self.head(embeddings)

# =======================
# 2. Load model and data
# =======================
class SentimentDataset(Dataset):
    def __init__(self, token_ids, attention_mask, labels):
        self.token_ids = token_ids
        self.attention_mask = attention_mask
        self.labels = labels

    def __len__(self):
        return len(self.labels)

    def __getitem__(self, idx):
        return {
            "input_ids": self.token_ids[idx].long(),
            "attention_mask": self.attention_mask[idx],
            "labels": self.labels[idx].clone().detach().long(),
        }

# Loading model
mlm_model = CustomMLM(CONFIG).to(CONFIG["device"])
model = SentimentClassifier(mlm_model).to(CONFIG["device"])
model.load_state_dict(torch.load(CONFIG["model_path"], map_location=torch.device(CONFIG["device"])))
model.eval()

# Loading test dataset
test_dataset = SentimentDataset(
    torch.load(CONFIG["test_ids"]),  
    torch.load(CONFIG["test_attention_masks"]),
    torch.load(CONFIG["test_sentiment"])
)
test_loader = DataLoader(test_dataset, batch_size=CONFIG["batch_size"])

# =======================
# 3. Evaluation
# =======================
def evaluate_model(model, test_loader):
    model.eval()
    all_preds = []
    all_labels = []
    
    with torch.no_grad():
        for batch in tqdm(test_loader, desc="Evaluating"):
            input_ids = batch["input_ids"].to(CONFIG["device"])
            attention_mask = batch["attention_mask"].to(CONFIG["device"])
            labels = batch["labels"].to(CONFIG["device"])
            
            logits = model(input_ids, attention_mask)
            preds = torch.argmax(logits, dim=-1)
            
            all_preds.extend(preds.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())
    
    # Calculate metrics
    accuracy = accuracy_score(all_labels, all_preds)
    precision = precision_score(all_labels, all_preds, average='weighted')
    recall = recall_score(all_labels, all_preds, average='weighted')
    f1 = f1_score(all_labels, all_preds, average='weighted')
    
    return {
        "accuracy": accuracy,
        "precision": precision,
        "recall": recall,
        "f1": f1
    }

# Evaluate
metrics = evaluate_model(model, test_loader)
print("\nTest Metrics:")
print(f"Accuracy: {metrics['accuracy']:.4f}")
print(f"Precision: {metrics['precision']:.4f}")
print(f"Recall: {metrics['recall']:.4f}")
print(f"F1 Score: {metrics['f1']:.4f}")

Evaluating: 100%|████████████████████████████████████████████████████████████████| 1563/1563 [2:08:15<00:00,  4.92s/it]



Test Metrics:
Accuracy: 0.8610
Precision: 0.8617
Recall: 0.8610
F1 Score: 0.8610


In [14]:
# Testig MB dataset 2K reviews 

import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from sklearn.metrics import accuracy_score, classification_report, precision_score, recall_score, f1_score
from tqdm import tqdm
import json
from tokenizers import Tokenizer
import warnings
warnings.filterwarnings("ignore", category=UserWarning, message="The PyTorch API of nested tensors")

# =======================
# Load CONFIG
# =======================
CONFIG = {
    "d_model": 512,
    "nhead": 8,
    "num_layers": 6,
    "dim_feedforward": 2048,
    "batch_size": 16,
    "max_seq_len": 768,
    "device": "cuda" if torch.cuda.is_available() else "cpu",
    "tokenizer_path": "movie_review_tokenizer.json",
    "model_path": "epoch_62.pt",
    "test_ids": "MB/MB_padded_token_ids.pt",
    "test_attention_masks": "MB/MB_padded_attention_masks.pt",
    "test_sentiment": "MB/MB_sentiment_labels.pt"
}

# =======================
# 1. Classification Model Architecture
# =======================
class HybridClassificationHead(nn.Module):
    def __init__(self, hidden_size=CONFIG["d_model"], num_classes=2):
        super().__init__()
        self.cnn = nn.Conv1d(hidden_size, hidden_size, kernel_size=3, padding=1)
        self.relu = nn.ReLU()
        self.pool = nn.AdaptiveMaxPool1d(1)
        self.classifier = nn.Sequential(
            nn.Dropout(0.3),
            nn.Linear(hidden_size * 2, hidden_size),
            nn.ReLU(),
            nn.Dropout(0.3),
            nn.Linear(hidden_size, num_classes),
        )

    def forward(self, token_embeddings):
        mean_pool = token_embeddings.mean(dim=1)
        cnn_out = self.relu(self.cnn(token_embeddings.transpose(1, 2)))
        max_pool = self.pool(cnn_out).squeeze(-1)
        concat = torch.cat([mean_pool, max_pool], dim=-1)
        return self.classifier(concat)

class CustomMLM(nn.Module):
    def __init__(self, config):
        super().__init__()
        if "vocab_size" not in config:
            tokenizer = Tokenizer.from_file(config["tokenizer_path"])
            config["vocab_size"] = tokenizer.get_vocab_size()
            
        self.embedding = nn.Embedding(config["vocab_size"], config["d_model"])
        self.pos_encoder = nn.Embedding(config["max_seq_len"], config["d_model"])
        encoder_layer = nn.TransformerEncoderLayer(
            d_model=config["d_model"],
            nhead=config["nhead"],
            dim_feedforward=config["dim_feedforward"],
            batch_first=True,
        )
        self.encoder = nn.TransformerEncoder(encoder_layer, num_layers=config["num_layers"])
        self.config = config

    def forward(self, input_ids, attention_mask):
        seq_len = input_ids.size(1)
        positions = torch.arange(seq_len, device=input_ids.device).unsqueeze(0)
        embeddings = self.embedding(input_ids) + self.pos_encoder(positions)
        embeddings = self.encoder(embeddings, src_key_padding_mask=~attention_mask.bool())
        return embeddings

class SentimentClassifier(nn.Module):
    def __init__(self, mlm_model):
        super().__init__()
        self.mlm = mlm_model
        self.head = HybridClassificationHead()

    def forward(self, input_ids, attention_mask):
        embeddings = self.mlm(input_ids, attention_mask)
        return self.head(embeddings)

# =======================
# 2. Load model and data
# =======================
class SentimentDataset(Dataset):
    def __init__(self, token_ids, attention_mask, labels):
        self.token_ids = token_ids
        self.attention_mask = attention_mask
        self.labels = labels

    def __len__(self):
        return len(self.labels)

    def __getitem__(self, idx):
        return {
            "input_ids": self.token_ids[idx].long(),
            "attention_mask": self.attention_mask[idx],
            "labels": self.labels[idx].clone().detach().long(),
        }

# Loading model
mlm_model = CustomMLM(CONFIG).to(CONFIG["device"])
model = SentimentClassifier(mlm_model).to(CONFIG["device"])
model.load_state_dict(torch.load(CONFIG["model_path"], map_location=torch.device(CONFIG["device"])))
model.eval()

# Loading test dataset
test_dataset = SentimentDataset(
    torch.load(CONFIG["test_ids"]),  
    torch.load(CONFIG["test_attention_masks"]),
    torch.load(CONFIG["test_sentiment"])
)
test_loader = DataLoader(test_dataset, batch_size=CONFIG["batch_size"])

# =======================
# 3. Evaluation
# =======================
def evaluate_model(model, test_loader):
    model.eval()
    all_preds = []
    all_labels = []
    
    with torch.no_grad():
        for batch in tqdm(test_loader, desc="Evaluating"):
            input_ids = batch["input_ids"].to(CONFIG["device"])
            attention_mask = batch["attention_mask"].to(CONFIG["device"])
            labels = batch["labels"].to(CONFIG["device"])
            
            logits = model(input_ids, attention_mask)
            preds = torch.argmax(logits, dim=-1)
            
            all_preds.extend(preds.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())
    
    # Calculate metrics
    accuracy = accuracy_score(all_labels, all_preds)
    precision = precision_score(all_labels, all_preds, average='weighted')
    recall = recall_score(all_labels, all_preds, average='weighted')
    f1 = f1_score(all_labels, all_preds, average='weighted')
    
    return {
        "accuracy": accuracy,
        "precision": precision,
        "recall": recall,
        "f1": f1
    }

# Evaluate
metrics = evaluate_model(model, test_loader)
print("\nTest Metrics:")
print(f"Accuracy: {metrics['accuracy']:.4f}")
print(f"Precision: {metrics['precision']:.4f}")
print(f"Recall: {metrics['recall']:.4f}")
print(f"F1 Score: {metrics['f1']:.4f}")

Evaluating: 100%|████████████████████████████████████████████████████████████████████| 125/125 [18:06<00:00,  8.70s/it]


Test Metrics:
Accuracy: 0.7715
Precision: 0.7868
Recall: 0.7715
F1 Score: 0.7684





In [24]:
# Testig Amazon dataset 25K reviews 

import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from sklearn.metrics import accuracy_score, classification_report, precision_score, recall_score, f1_score
from tqdm import tqdm
import json
from tokenizers import Tokenizer
import warnings
warnings.filterwarnings("ignore", category=UserWarning, message="The PyTorch API of nested tensors")

# =======================
# Load CONFIG
# =======================
CONFIG = {
    "d_model": 512,
    "nhead": 8,
    "num_layers": 6,
    "dim_feedforward": 2048,
    "batch_size": 16,
    "max_seq_len": 768,
    "device": "cuda" if torch.cuda.is_available() else "cpu",
    "tokenizer_path": "movie_review_tokenizer.json",
    "model_path": "epoch_62.pt",
    "test_ids": "Amazon/A_padded_token_ids.pt",
    "test_attention_masks": "Amazon/A_padded_attention_masks.pt",
    "test_sentiment": "Amazon/A_sentiment_labels.pt"
}

# =======================
# 1. Classification Model Architecture
# =======================
class HybridClassificationHead(nn.Module):
    def __init__(self, hidden_size=CONFIG["d_model"], num_classes=2):
        super().__init__()
        self.cnn = nn.Conv1d(hidden_size, hidden_size, kernel_size=3, padding=1)
        self.relu = nn.ReLU()
        self.pool = nn.AdaptiveMaxPool1d(1)
        self.classifier = nn.Sequential(
            nn.Dropout(0.3),
            nn.Linear(hidden_size * 2, hidden_size),
            nn.ReLU(),
            nn.Dropout(0.3),
            nn.Linear(hidden_size, num_classes),
        )

    def forward(self, token_embeddings):
        mean_pool = token_embeddings.mean(dim=1)
        cnn_out = self.relu(self.cnn(token_embeddings.transpose(1, 2)))
        max_pool = self.pool(cnn_out).squeeze(-1)
        concat = torch.cat([mean_pool, max_pool], dim=-1)
        return self.classifier(concat)

class CustomMLM(nn.Module):
    def __init__(self, config):
        super().__init__()
        if "vocab_size" not in config:
            tokenizer = Tokenizer.from_file(config["tokenizer_path"])
            config["vocab_size"] = tokenizer.get_vocab_size()
            
        self.embedding = nn.Embedding(config["vocab_size"], config["d_model"])
        self.pos_encoder = nn.Embedding(config["max_seq_len"], config["d_model"])
        encoder_layer = nn.TransformerEncoderLayer(
            d_model=config["d_model"],
            nhead=config["nhead"],
            dim_feedforward=config["dim_feedforward"],
            batch_first=True,
        )
        self.encoder = nn.TransformerEncoder(encoder_layer, num_layers=config["num_layers"])
        self.config = config

    def forward(self, input_ids, attention_mask):
        seq_len = input_ids.size(1)
        positions = torch.arange(seq_len, device=input_ids.device).unsqueeze(0)
        embeddings = self.embedding(input_ids) + self.pos_encoder(positions)
        embeddings = self.encoder(embeddings, src_key_padding_mask=~attention_mask.bool())
        return embeddings

class SentimentClassifier(nn.Module):
    def __init__(self, mlm_model):
        super().__init__()
        self.mlm = mlm_model
        self.head = HybridClassificationHead()

    def forward(self, input_ids, attention_mask):
        embeddings = self.mlm(input_ids, attention_mask)
        return self.head(embeddings)

# =======================
# 2. Load model and data
# =======================
class SentimentDataset(Dataset):
    def __init__(self, token_ids, attention_mask, labels):
        self.token_ids = token_ids
        self.attention_mask = attention_mask
        self.labels = labels

    def __len__(self):
        return len(self.labels)

    def __getitem__(self, idx):
        return {
            "input_ids": self.token_ids[idx].long(),
            "attention_mask": self.attention_mask[idx],
            "labels": self.labels[idx].clone().detach().long(),
        }

# Loading model
mlm_model = CustomMLM(CONFIG).to(CONFIG["device"])
model = SentimentClassifier(mlm_model).to(CONFIG["device"])
model.load_state_dict(torch.load(CONFIG["model_path"], map_location=torch.device(CONFIG["device"])))
model.eval()

# Loading test dataset
test_dataset = SentimentDataset(
    torch.load(CONFIG["test_ids"]),  
    torch.load(CONFIG["test_attention_masks"]),
    torch.load(CONFIG["test_sentiment"])
)
test_loader = DataLoader(test_dataset, batch_size=CONFIG["batch_size"])

# =======================
# 3. Evaluation
# =======================
def evaluate_model(model, test_loader):
    model.eval()
    all_preds = []
    all_labels = []
    
    with torch.no_grad():
        for batch in tqdm(test_loader, desc="Evaluating"):
            input_ids = batch["input_ids"].to(CONFIG["device"])
            attention_mask = batch["attention_mask"].to(CONFIG["device"])
            labels = batch["labels"].to(CONFIG["device"])
            
            logits = model(input_ids, attention_mask)
            preds = torch.argmax(logits, dim=-1)
            
            all_preds.extend(preds.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())
    
    # Calculate metrics
    accuracy = accuracy_score(all_labels, all_preds)
    precision = precision_score(all_labels, all_preds, average='weighted')
    recall = recall_score(all_labels, all_preds, average='weighted')
    f1 = f1_score(all_labels, all_preds, average='weighted')
    
    return {
        "accuracy": accuracy,
        "precision": precision,
        "recall": recall,
        "f1": f1
    }

# Evaluate
metrics = evaluate_model(model, test_loader)
print("\nTest Metrics:")
print(f"Accuracy: {metrics['accuracy']:.4f}")
print(f"Precision: {metrics['precision']:.4f}")
print(f"Recall: {metrics['recall']:.4f}")
print(f"F1 Score: {metrics['f1']:.4f}")

Evaluating: 100%|████████████████████████████████████████████████████████████████| 1563/1563 [3:58:47<00:00,  9.17s/it]



Test Metrics:
Accuracy: 0.5080
Precision: 0.6806
Recall: 0.5080
F1 Score: 0.3535
