**PREPROCESSING**

In [4]:
!pip uninstall -y numpy gensim
!pip install --no-cache-dir numpy==1.23.5 gensim

Found existing installation: numpy 1.26.4
Uninstalling numpy-1.26.4:
  Successfully uninstalled numpy-1.26.4
Found existing installation: gensim 4.3.3
Uninstalling gensim-4.3.3:
  Successfully uninstalled gensim-4.3.3
Collecting numpy==1.23.5
  Downloading numpy-1.23.5-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (2.3 kB)
Collecting gensim
  Downloading gensim-4.3.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (8.1 kB)
Downloading numpy-1.23.5-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (17.1 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m17.1/17.1 MB[0m [31m239.9 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading gensim-4.3.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (26.7 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m26.7/26.7 MB[0m [31m284.3 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: numpy, gensim
[31mERROR: pip's dependency resolver does not

In [3]:
import re
import numpy as np
import gensim.downloader as api
from gensim.utils import simple_preprocess
from sklearn.feature_extraction.text import ENGLISH_STOP_WORDS

word2vec_model = api.load("word2vec-google-news-300")



In [6]:
import pandas as pd
file_path = 'Combined Data.csv'
df = pd.read_csv(file_path)
def preprocess_and_tokenize(text):
    if not isinstance(text, str):
        return []

    text = re.sub(r"[^A-Za-z0-9\s]", "", text.lower())
    tokens = simple_preprocess(text, deacc=True)
    tokens = [word for word in tokens if word not in ENGLISH_STOP_WORDS]
    return tokens

df['tokens'] = df['statement'].apply(preprocess_and_tokenize)
def get_average_word2vec(tokens, model, vector_size=300):
    vectors = [model[word] for word in tokens if word in model]
    return np.mean(vectors, axis=0) if vectors else np.zeros(vector_size)

df['word2vec_vector'] = df['tokens'].apply(lambda x: get_average_word2vec(x, word2vec_model))


df['status'] = df['status'].apply(lambda x: 0 if x == 'Normal' else 1)

df['status'].value_counts()

Unnamed: 0_level_0,count
status,Unnamed: 1_level_1
1,36692
0,16351


**DATA SPLIT**

In [9]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import DataLoader, TensorDataset
from sklearn.model_selection import train_test_split
import torch
import numpy as np

X = np.stack(df['word2vec_vector'].values)
y = df['status'].values

X_tensor = torch.tensor(X, dtype=torch.float32)
y_tensor = torch.tensor(y, dtype=torch.long)
X_train, X_test, y_train, y_test = train_test_split(X_tensor, y_tensor, test_size=0.2, random_state=42)

train_data = TensorDataset(X_train, y_train)
test_data = TensorDataset(X_test, y_test)

train_loader = DataLoader(train_data, batch_size=32, shuffle=True)
test_loader = DataLoader(test_data, batch_size=32, shuffle=False)



**TRANSFORMER BASELINE**

In [14]:
class PositionalEncoding(nn.Module):
    def __init__(self, d_model, max_len=5000):
        super(PositionalEncoding, self).__init__()
        self.d_model = d_model
        pe = torch.zeros(max_len, d_model)
        position = torch.arange(0, max_len, dtype=torch.float).unsqueeze(1)
        div_term = torch.exp(torch.arange(0, d_model, 2).float() * (-torch.log(torch.tensor(10000.0)) / d_model))
        pe[:, 0::2] = torch.sin(position * div_term)
        pe[:, 1::2] = torch.cos(position * div_term)
        self.pe = pe.unsqueeze(0)

    def forward(self, x):
        if x.shape[-1] != self.d_model:
            raise ValueError(f"Input has wrong last dimension {x.shape[-1]}, expected {self.d_model}")
        return x + self.pe[:, :x.size(1), :].to(x.device)

class TransformerModel(nn.Module):
    def __init__(self, input_size, d_model=128, num_heads=4, num_layers=2, num_classes=1):
        super(TransformerModel, self).__init__()

        self.embedding = nn.Linear(input_size, d_model)
        self.pos_encoder = PositionalEncoding(d_model)

        encoder_layers = nn.TransformerEncoderLayer(d_model=d_model, nhead=num_heads, dim_feedforward=256, dropout=0.1, batch_first=True)
        self.transformer_encoder = nn.TransformerEncoder(encoder_layers, num_layers=num_layers)

        self.fc = nn.Linear(d_model, num_classes)
        self.dropout = nn.Dropout(0.3)

    def forward(self, x):
      x = self.embedding(x)
      x = x.unsqueeze(1)
      x = self.pos_encoder(x)
      x = self.transformer_encoder(x)

      x = x.mean(dim=1)
      x = self.dropout(F.relu(x))
      x = torch.sigmoid(self.fc(x))
      return x

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

input_size = X_train.shape[1]
model = TransformerModel(input_size=input_size).to(device)

criterion = nn.BCELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

num_epochs = 10

for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    correct = 0
    total = 0

    for inputs, labels in train_loader:
        inputs, labels = inputs.to(device), labels.float().unsqueeze(1).to(device)

        optimizer.zero_grad()
        outputs = model(inputs)

        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        predicted = (outputs > 0.5).float()
        correct += (predicted == labels).sum().item()
        total += labels.size(0)

        running_loss += loss.item()

    accuracy = 100 * correct / total
    print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {running_loss/len(train_loader):.4f}, Accuracy: {accuracy:.2f}%")

model.eval()
correct = 0
total = 0
test_loss = 0.0

with torch.no_grad():
    for inputs, labels in test_loader:
        inputs, labels = inputs.to(device), labels.float().unsqueeze(1).to(device)

        outputs = model(inputs)
        loss = criterion(outputs, labels)
        test_loss += loss.item()

        predicted = (outputs > 0.5).float()
        correct += (predicted == labels).sum().item()
        total += labels.size(0)

test_accuracy = 100 * correct / total
print(f"Test Loss: {test_loss/len(test_loader):.4f}, Test Accuracy: {test_accuracy:.2f}%")

Epoch [1/10], Loss: 0.3614, Accuracy: 85.78%
Epoch [2/10], Loss: 0.3231, Accuracy: 87.77%
Epoch [3/10], Loss: 0.3109, Accuracy: 88.16%
Epoch [4/10], Loss: 0.2864, Accuracy: 89.04%
Epoch [5/10], Loss: 0.2771, Accuracy: 89.42%
Epoch [6/10], Loss: 0.2597, Accuracy: 89.89%
Epoch [7/10], Loss: 0.2440, Accuracy: 90.53%
Epoch [8/10], Loss: 0.2372, Accuracy: 90.74%
Epoch [9/10], Loss: 0.2305, Accuracy: 90.84%
Epoch [10/10], Loss: 0.2262, Accuracy: 91.16%
Test Loss: 0.2352, Test Accuracy: 90.32%


**Focal Loss**

In [17]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F

class FocalLoss(nn.Module):
    def __init__(self, alpha=0.25, gamma=2.0, reduction='mean'):
        super(FocalLoss, self).__init__()
        self.alpha = alpha
        self.gamma = gamma
        self.reduction = reduction

    def forward(self, inputs, targets):
        bce_loss = F.binary_cross_entropy(inputs, targets, reduction="none")
        p_t = inputs * targets + (1 - inputs) * (1 - targets)
        focal_loss = self.alpha * (1 - p_t) ** self.gamma * bce_loss

        if self.reduction == "mean":
            return focal_loss.mean()
        elif self.reduction == "sum":
            return focal_loss.sum()
        return focal_loss

class TransformerModel(nn.Module):
    def __init__(self, input_size, d_model=128, num_heads=4, num_layers=2, num_classes=1):
        super(TransformerModel, self).__init__()
        self.embedding = nn.Linear(input_size, d_model)
        self.pos_encoder = PositionalEncoding(d_model)
        encoder_layers = nn.TransformerEncoderLayer(d_model=d_model, nhead=num_heads, dim_feedforward=256, dropout=0.1, batch_first=True)
        self.transformer_encoder = nn.TransformerEncoder(encoder_layers, num_layers=num_layers)
        self.fc = nn.Linear(d_model, num_classes)
        self.dropout = nn.Dropout(0.3)

    def forward(self, x):
        x = self.embedding(x)
        x = x.unsqueeze(1)
        x = self.pos_encoder(x)
        x = self.transformer_encoder(x)
        x = x.mean(dim=1)
        x = self.dropout(F.relu(x))
        x = torch.sigmoid(self.fc(x))
        return x

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
input_size = X_train.shape[1]
model = TransformerModel(input_size=input_size).to(device)
criterion = FocalLoss(alpha=0.25, gamma=2.0)
optimizer = optim.Adam(model.parameters(), lr=0.001)
num_epochs = 10

for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    correct = 0
    total = 0

    for inputs, labels in train_loader:
        inputs, labels = inputs.to(device), labels.float().unsqueeze(1).to(device)
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        predicted = (outputs > 0.5).float()
        correct += (predicted == labels).sum().item()
        total += labels.size(0)
        running_loss += loss.item()

    accuracy = 100 * correct / total
    print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {running_loss/len(train_loader):.4f}, Accuracy: {accuracy:.2f}%")

model.eval()
correct = 0
total = 0
test_loss = 0.0

with torch.no_grad():
    for inputs, labels in test_loader:
        inputs, labels = inputs.to(device), labels.float().unsqueeze(1).to(device)
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        test_loss += loss.item()
        predicted = (outputs > 0.5).float()
        correct += (predicted == labels).sum().item()
        total += labels.size(0)

test_accuracy = 100 * correct / total
print(f"Test Loss: {test_loss/len(test_loader):.4f}, Test Accuracy: {test_accuracy:.2f}%")


Epoch [1/10], Loss: 0.0244, Accuracy: 85.22%
Epoch [2/10], Loss: 0.0217, Accuracy: 87.67%
Epoch [3/10], Loss: 0.0211, Accuracy: 88.00%
Epoch [4/10], Loss: 0.0211, Accuracy: 87.94%
Epoch [5/10], Loss: 0.0206, Accuracy: 88.37%
Epoch [6/10], Loss: 0.0184, Accuracy: 89.35%
Epoch [7/10], Loss: 0.0173, Accuracy: 90.06%
Epoch [8/10], Loss: 0.0165, Accuracy: 90.48%
Epoch [9/10], Loss: 0.0158, Accuracy: 90.76%
Epoch [10/10], Loss: 0.0155, Accuracy: 91.13%
Test Loss: 0.0164, Test Accuracy: 90.56%


**WEIGHTED BCE**

In [21]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import numpy as np

class TransformerModel(nn.Module):
    def __init__(self, input_size, d_model=128, num_heads=4, num_layers=2, num_classes=1):
        super(TransformerModel, self).__init__()
        self.embedding = nn.Linear(input_size, d_model)
        encoder_layers = nn.TransformerEncoderLayer(d_model=d_model, nhead=num_heads, dim_feedforward=256, dropout=0.1, batch_first=True)
        self.transformer_encoder = nn.TransformerEncoder(encoder_layers, num_layers=num_layers)
        self.fc = nn.Linear(d_model, num_classes)
        self.dropout = nn.Dropout(0.3)

    def forward(self, x):
        x = self.embedding(x)
        x = x.unsqueeze(1)
        x = self.transformer_encoder(x)
        x = x.mean(dim=1)
        x = self.dropout(F.relu(x))
        x = self.fc(x)
        return x

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
input_size = X_train.shape[1]
model = TransformerModel(input_size=input_size).to(device)

labels_np = np.concatenate([labels.numpy() for _, labels in train_loader])
num_pos = np.sum(labels_np == 1)
num_neg = np.sum(labels_np == 0)
total_samples = num_pos + num_neg

weight_pos = total_samples / (2 * num_pos)
weight_neg = total_samples / (2 * num_neg)
class_weights = torch.tensor([weight_neg, weight_pos], dtype=torch.float).to(device)

pos_weight = class_weights[1]
criterion = nn.BCEWithLogitsLoss(pos_weight=pos_weight)
optimizer = optim.Adam(model.parameters(), lr=0.001)

num_epochs = 10
for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    correct = 0
    total = 0

    for inputs, labels in train_loader:
        inputs, labels = inputs.to(device), labels.float().unsqueeze(1).to(device)
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        predicted = (torch.sigmoid(outputs) > 0.5).float()
        correct += (predicted == labels).sum().item()
        total += labels.size(0)
        running_loss += loss.item()

    accuracy = 100 * correct / total
    print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {running_loss/len(train_loader):.4f}, Accuracy: {accuracy:.2f}%")

model.eval()
correct = 0
total = 0
test_loss = 0.0

with torch.no_grad():
    for inputs, labels in test_loader:
        inputs, labels = inputs.to(device), labels.float().unsqueeze(1).to(device)
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        test_loss += loss.item()
        predicted = (torch.sigmoid(outputs) > 0.5).float()
        correct += (predicted == labels).sum().item()
        total += labels.size(0)

test_accuracy = 100 * correct / total
print(f"Test Loss: {test_loss/len(test_loader):.4f}, Test Accuracy: {test_accuracy:.2f}%")


Epoch [1/10], Loss: 0.2292, Accuracy: 89.32%
Epoch [2/10], Loss: 0.1891, Accuracy: 91.44%
Epoch [3/10], Loss: 0.1786, Accuracy: 91.93%
Epoch [4/10], Loss: 0.1743, Accuracy: 91.85%
Epoch [5/10], Loss: 0.1630, Accuracy: 92.52%
Epoch [6/10], Loss: 0.1585, Accuracy: 92.82%
Epoch [7/10], Loss: 0.1566, Accuracy: 92.91%
Epoch [8/10], Loss: 0.1543, Accuracy: 92.86%
Epoch [9/10], Loss: 0.1494, Accuracy: 93.26%
Epoch [10/10], Loss: 0.1492, Accuracy: 93.03%
Test Loss: 0.1626, Test Accuracy: 92.70%


**BATCH SIZE VARIATION**

In [23]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import DataLoader, TensorDataset

class PositionalEncoding(nn.Module):
    def __init__(self, d_model, max_len=5000):
        super(PositionalEncoding, self).__init__()
        self.d_model = d_model
        pe = torch.zeros(max_len, d_model)
        position = torch.arange(0, max_len, dtype=torch.float).unsqueeze(1)
        div_term = torch.exp(torch.arange(0, d_model, 2).float() * (-torch.log(torch.tensor(10000.0)) / d_model))
        pe[:, 0::2] = torch.sin(position * div_term)
        pe[:, 1::2] = torch.cos(position * div_term)
        self.pe = pe.unsqueeze(0)

    def forward(self, x):
        if x.shape[-1] != self.d_model:
            raise ValueError(f"Input has wrong last dimension {x.shape[-1]}, expected {self.d_model}")
        return x + self.pe[:, :x.size(1), :].to(x.device)

class TransformerModel(nn.Module):
    def __init__(self, input_size, d_model=128, num_heads=4, num_layers=2, num_classes=1):
        super(TransformerModel, self).__init__()
        self.embedding = nn.Linear(input_size, d_model)
        self.pos_encoder = PositionalEncoding(d_model)
        encoder_layers = nn.TransformerEncoderLayer(d_model=d_model, nhead=num_heads, dim_feedforward=256, dropout=0.1, batch_first=True)
        self.transformer_encoder = nn.TransformerEncoder(encoder_layers, num_layers=num_layers)
        self.fc = nn.Linear(d_model, num_classes)
        self.dropout = nn.Dropout(0.3)

    def forward(self, x):
        x = self.embedding(x)
        x = x.unsqueeze(1)
        x = self.pos_encoder(x)
        x = self.transformer_encoder(x)
        x = x.mean(dim=1)
        x = self.dropout(F.relu(x))
        x = self.fc(x)
        return x

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

train_dataset = TensorDataset(X_train, y_train)
test_dataset = TensorDataset(X_test, y_test)

batch_sizes = [32, 64, 256, 1024, 3200]
num_epochs = 10

for batch_size in batch_sizes:
    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

    model = TransformerModel(input_size=X_train.shape[1]).to(device)
    optimizer = optim.Adam(model.parameters(), lr=0.001)

    for epoch in range(num_epochs):
        model.train()
        running_loss = 0.0
        correct = 0
        total = 0

        for inputs, labels in train_loader:
            inputs, labels = inputs.to(device), labels.float().unsqueeze(1).to(device)
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            predicted = (torch.sigmoid(outputs) > 0.5).float()
            correct += (predicted == labels).sum().item()
            total += labels.size(0)
            running_loss += loss.item()

        accuracy = 100 * correct / total
        print(f"Batch Size: {batch_size}, Epoch [{epoch+1}/{num_epochs}], Loss: {running_loss/len(train_loader):.4f}, Accuracy: {accuracy:.2f}%")

    model.eval()
    correct = 0
    total = 0
    test_loss = 0.0

    with torch.no_grad():
        for inputs, labels in test_loader:
            inputs, labels = inputs.to(device), labels.float().unsqueeze(1).to(device)
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            test_loss += loss.item()
            predicted = (torch.sigmoid(outputs) > 0.5).float()
            correct += (predicted == labels).sum().item()
            total += labels.size(0)

    test_accuracy = 100 * correct / total
    print(f"Batch Size: {batch_size}, Test Loss: {test_loss/len(test_loader):.4f}, Test Accuracy: {test_accuracy:.2f}%")


Batch Size: 32, Epoch [1/10], Loss: 0.3613, Accuracy: 85.89%
Batch Size: 32, Epoch [2/10], Loss: 0.3220, Accuracy: 87.72%
Batch Size: 32, Epoch [3/10], Loss: 0.3148, Accuracy: 87.87%
Batch Size: 32, Epoch [4/10], Loss: 0.2898, Accuracy: 88.95%
Batch Size: 32, Epoch [5/10], Loss: 0.2754, Accuracy: 89.54%
Batch Size: 32, Epoch [6/10], Loss: 0.2642, Accuracy: 89.80%
Batch Size: 32, Epoch [7/10], Loss: 0.2578, Accuracy: 90.14%
Batch Size: 32, Epoch [8/10], Loss: 0.2462, Accuracy: 90.46%
Batch Size: 32, Epoch [9/10], Loss: 0.2383, Accuracy: 90.69%
Batch Size: 32, Epoch [10/10], Loss: 0.2327, Accuracy: 90.96%
Batch Size: 32, Test Loss: 0.2520, Test Accuracy: 90.07%
Batch Size: 64, Epoch [1/10], Loss: 0.3626, Accuracy: 85.78%
Batch Size: 64, Epoch [2/10], Loss: 0.3151, Accuracy: 88.00%
Batch Size: 64, Epoch [3/10], Loss: 0.2731, Accuracy: 89.57%
Batch Size: 64, Epoch [4/10], Loss: 0.2515, Accuracy: 90.34%
Batch Size: 64, Epoch [5/10], Loss: 0.2350, Accuracy: 90.83%
Batch Size: 64, Epoch [6/10