**Preprocessing**

In [None]:
!pip install gensim nltk



In [None]:
import pandas as pd
import nltk
from nltk.stem import WordNetLemmatizer
from nltk.tokenize import word_tokenize
from nltk.corpus import stopwords
import re
from gensim.models import Word2Vec

In [None]:
file_path = 'Combined Data.csv'
df = pd.read_csv(file_path)
nltk.download('punkt_tab')
nltk.download('wordnet')
nltk.download('stopwords')

lemmatizer = WordNetLemmatizer()

stop_words = set(stopwords.words('english'))

def preprocess_and_tokenize(text):
    if not isinstance(text, str):
        text = ""

    text = re.sub(r'[^A-Za-z0-9\s]', '', text)

    tokens = word_tokenize(text.lower())

    lemmatized_tokens = [lemmatizer.lemmatize(token) for token in tokens]

    return lemmatized_tokens

df['tokens'] = df['statement'].apply(preprocess_and_tokenize)

df[['statement', 'tokens']].head()

sentences = df['tokens'].tolist()

word2vec_model = Word2Vec.load("fine_tuned_word2vec.model")

import numpy as np

def get_average_word2vec(tokens, model, vector_size=300):
    vectors = []

    for word in tokens:
        if word in model.key_to_index:
            vectors.append(model[word])

    if vectors:
        return np.mean(vectors, axis=0)
    else:
        return np.zeros(vector_size)

df['word2vec_vector'] = df['tokens'].apply(lambda x: get_average_word2vec(x, word2vec_model.wv))

df['status'] = df['status'].apply(lambda x: 0 if x == 'Normal' else 1)
df['status'].value_counts()




[nltk_data] Downloading package punkt_tab to /root/nltk_data...
[nltk_data]   Package punkt_tab is already up-to-date!
[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


Unnamed: 0_level_0,count
status,Unnamed: 1_level_1
1,36692
0,16351


**Data Split**

In [None]:
from sklearn.model_selection import train_test_split
import torch
import numpy as np
X = np.stack(df['word2vec_vector'].values)
y = df['status'].values
X_tensor = torch.tensor(X, dtype=torch.float32)
y_tensor = torch.tensor(y, dtype=torch.long)
X_train, X_test, y_train, y_test = train_test_split(X_tensor, y_tensor, test_size=0.2, random_state=42)
from torch.utils.data import DataLoader, TensorDataset
train_data = TensorDataset(X_train, y_train)
test_data = TensorDataset(X_test, y_test)
train_loader = DataLoader(train_data, batch_size=32, shuffle=True)
test_loader = DataLoader(test_data, batch_size=32, shuffle=False)

**BASELINE MODEL**

In [None]:
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

class CNNModel(nn.Module):
    def __init__(self, input_size):
        super(CNNModel, self).__init__()
        self.conv1 = nn.Conv1d(in_channels=1, out_channels=64, kernel_size=3, padding=1)
        self.pool = nn.MaxPool1d(kernel_size=2)
        self.fc1 = nn.Linear(64 * (input_size // 2), 64)
        self.fc2 = nn.Linear(64, 1)
        self.dropout = nn.Dropout(0.5)

    def forward(self, x):
        x = self.conv1(x)
        x = F.relu(x)
        x = self.pool(x)
        x = torch.flatten(x, 1)
        x = self.dropout(F.relu(self.fc1(x)))
        x = torch.sigmoid(self.fc2(x))
        return x

input_size = X_train.shape[1]
model = CNNModel(input_size)
criterion = nn.BCELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

num_epochs = 10

for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    correct = 0
    total = 0

    for inputs, labels in train_loader:
        inputs = inputs.unsqueeze(1)
        labels = labels.float().unsqueeze(1)

        optimizer.zero_grad()

        outputs = model(inputs)

        loss = criterion(outputs, labels)

        loss.backward()
        optimizer.step()

        predicted = (outputs > 0.5).float()
        correct += (predicted == labels).sum().item()
        total += labels.size(0)

        running_loss += loss.item()

    accuracy = 100 * correct / total
    print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {running_loss/len(train_loader):.4f}, Accuracy: {accuracy:.2f}%")

model.eval()
correct = 0
total = 0
y_true = []
y_pred = []

with torch.no_grad():
    for inputs, labels in test_loader:
        inputs = inputs.unsqueeze(1)
        labels = labels.float().unsqueeze(1)

        outputs = model(inputs)
        predicted = (outputs > 0.5).float()

        correct += (predicted == labels).sum().item()
        total += labels.size(0)

        y_true.extend(labels.numpy())
        y_pred.extend(predicted.numpy())

accuracy = 100 * correct / total
print(f"Test Accuracy: {accuracy:.2f}%")

from sklearn.metrics import classification_report, roc_auc_score

print(classification_report(y_true, y_pred))

roc_auc = roc_auc_score(y_true, y_pred)
print(f"ROC-AUC: {roc_auc:.4f}")






Epoch [1/10], Loss: 0.2612, Accuracy: 89.69%
Epoch [2/10], Loss: 0.2191, Accuracy: 91.65%
Epoch [3/10], Loss: 0.2050, Accuracy: 92.03%
Epoch [4/10], Loss: 0.1962, Accuracy: 92.29%
Epoch [5/10], Loss: 0.1931, Accuracy: 92.53%
Epoch [6/10], Loss: 0.1871, Accuracy: 92.68%
Epoch [7/10], Loss: 0.1812, Accuracy: 93.00%
Epoch [8/10], Loss: 0.1789, Accuracy: 93.04%
Epoch [9/10], Loss: 0.1778, Accuracy: 93.15%
Epoch [10/10], Loss: 0.1766, Accuracy: 93.12%
Test Accuracy: 92.97%
              precision    recall  f1-score   support

         0.0       0.85      0.94      0.89      3327
         1.0       0.97      0.93      0.95      7282

    accuracy                           0.93     10609
   macro avg       0.91      0.93      0.92     10609
weighted avg       0.93      0.93      0.93     10609

ROC-AUC: 0.9318


**FOCAL LOSS**

In [None]:
class CNNModel(nn.Module):
    def __init__(self, input_size):
        super(CNNModel, self).__init__()
        self.conv1 = nn.Conv1d(in_channels=1, out_channels=64, kernel_size=3, padding=1)
        self.pool = nn.MaxPool1d(kernel_size=2)
        self.fc1 = nn.Linear(64 * (input_size // 2), 64)
        self.fc2 = nn.Linear(64, 1)
        self.dropout = nn.Dropout(0.5)

    def forward(self, x):
        x = self.conv1(x)
        x = F.relu(x)
        x = self.pool(x)
        x = torch.flatten(x, 1)
        x = self.dropout(F.relu(self.fc1(x)))
        x = torch.sigmoid(self.fc2(x))
        return x

input_size = X_train.shape[1]
model = CNNModel(input_size)

class FocalLoss(nn.Module):
    def __init__(self, alpha=0.25, gamma=2.0, reduction='mean'):
        super(FocalLoss, self).__init__()
        self.alpha = alpha
        self.gamma = gamma
        self.reduction = reduction
        self.bce = nn.BCELoss(reduction='none')

    def forward(self, inputs, targets):
        bce_loss = self.bce(inputs, targets)
        p_t = targets * inputs + (1 - targets) * (1 - inputs)  # p_t = P if y=1 else 1-P
        focal_weight = self.alpha * (1 - p_t) ** self.gamma  # Apply gamma factor
        loss = focal_weight * bce_loss

        if self.reduction == 'mean':
            return loss.mean()
        elif self.reduction == 'sum':
            return loss.sum()
        return loss

criterion = FocalLoss(alpha=0.75, gamma=3.0)

optimizer = optim.Adam(model.parameters(), lr=0.001)

num_epochs = 10

for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    correct = 0
    total = 0

    for inputs, labels in train_loader:
        inputs = inputs.unsqueeze(1)
        labels = labels.float().unsqueeze(1)

        optimizer.zero_grad()

        outputs = model(inputs)

        loss = criterion(outputs, labels)

        loss.backward()
        optimizer.step()

        predicted = (outputs > 0.5).float()
        correct += (predicted == labels).sum().item()
        total += labels.size(0)

        running_loss += loss.item()

    accuracy = 100 * correct / total
    print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {running_loss/len(train_loader):.4f}, Accuracy: {accuracy:.2f}%")

model.eval()
correct = 0
total = 0
y_true = []
y_pred = []

with torch.no_grad():
    for inputs, labels in test_loader:
        inputs = inputs.unsqueeze(1)
        labels = labels.float().unsqueeze(1)

        outputs = model(inputs)
        predicted = (outputs > 0.5).float()

        correct += (predicted == labels).sum().item()
        total += labels.size(0)

        y_true.extend(labels.numpy())
        y_pred.extend(predicted.numpy())

accuracy = 100 * correct / total
print(f"Test Accuracy: {accuracy:.2f}%")

from sklearn.metrics import classification_report, roc_auc_score

print(classification_report(y_true, y_pred))

roc_auc = roc_auc_score(y_true, y_pred)
print(f"ROC-AUC: {roc_auc:.4f}")




Epoch [1/10], Loss: 0.0396, Accuracy: 78.25%
Epoch [2/10], Loss: 0.0354, Accuracy: 80.12%
Epoch [3/10], Loss: 0.0321, Accuracy: 85.13%
Epoch [4/10], Loss: 0.0302, Accuracy: 87.15%
Epoch [5/10], Loss: 0.0281, Accuracy: 88.74%
Epoch [6/10], Loss: 0.0265, Accuracy: 89.43%
Epoch [7/10], Loss: 0.0259, Accuracy: 89.74%
Epoch [8/10], Loss: 0.0252, Accuracy: 90.25%
Epoch [9/10], Loss: 0.0247, Accuracy: 90.36%
Epoch [10/10], Loss: 0.0236, Accuracy: 90.76%
Test Accuracy: 92.99%
              precision    recall  f1-score   support

         0.0       0.89      0.89      0.89      3327
         1.0       0.95      0.95      0.95      7282

    accuracy                           0.93     10609
   macro avg       0.92      0.92      0.92     10609
weighted avg       0.93      0.93      0.93     10609

ROC-AUC: 0.9179


**WEIGHTED BCE**

In [None]:
import torch.optim as optim

device='cpu'
pos_weight = torch.tensor([16351 / 36692]).to(device)
criterion = nn.BCEWithLogitsLoss(pos_weight=pos_weight)

optimizer = optim.Adam(model.parameters(), lr=0.001)

num_epochs = 10

for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    correct = 0
    total = 0

    for inputs, labels in train_loader:
        inputs = inputs.unsqueeze(1)
        labels = labels.float().unsqueeze(1)

        optimizer.zero_grad()

        outputs = model(inputs)

        loss = criterion(outputs, labels)

        loss.backward()
        optimizer.step()

        predicted = (outputs > 0.5).float()
        correct += (predicted == labels).sum().item()
        total += labels.size(0)

        running_loss += loss.item()

    accuracy = 100 * correct / total
    print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {running_loss/len(train_loader):.4f}, Accuracy: {accuracy:.2f}%")

model.eval()
correct = 0
total = 0
y_true = []
y_pred = []

with torch.no_grad():
    for inputs, labels in test_loader:
        inputs = inputs.unsqueeze(1)
        labels = labels.float().unsqueeze(1)

        outputs = model(inputs)
        predicted = (outputs > 0.5).float()

        correct += (predicted == labels).sum().item()
        total += labels.size(0)

        y_true.extend(labels.numpy())
        y_pred.extend(predicted.numpy())

accuracy = 100 * correct / total
print(f"Test Accuracy: {accuracy:.2f}%")

from sklearn.metrics import classification_report, roc_auc_score

print(classification_report(y_true, y_pred))

roc_auc = roc_auc_score(y_true, y_pred)
print(f"ROC-AUC: {roc_auc:.4f}")



Epoch [1/10], Loss: 0.3427, Accuracy: 88.78%
Epoch [2/10], Loss: 0.3379, Accuracy: 89.78%
Epoch [3/10], Loss: 0.3361, Accuracy: 90.26%
Epoch [4/10], Loss: 0.3361, Accuracy: 90.18%
Epoch [5/10], Loss: 0.3344, Accuracy: 90.50%
Epoch [6/10], Loss: 0.3341, Accuracy: 90.56%
Epoch [7/10], Loss: 0.3340, Accuracy: 90.66%
Epoch [8/10], Loss: 0.3341, Accuracy: 90.63%
Epoch [9/10], Loss: 0.3336, Accuracy: 90.89%
Epoch [10/10], Loss: 0.3338, Accuracy: 90.72%
Test Accuracy: 90.59%
              precision    recall  f1-score   support

         0.0       0.79      0.96      0.87      3327
         1.0       0.98      0.88      0.93      7282

    accuracy                           0.91     10609
   macro avg       0.88      0.92      0.90     10609
weighted avg       0.92      0.91      0.91     10609

ROC-AUC: 0.9216


**BATCH SIZE VARIATION**

BATCH_SIZE 32

In [None]:
train_data = TensorDataset(X_train, y_train)
test_data = TensorDataset(X_test, y_test)

train_loader = DataLoader(train_data, batch_size=32, shuffle=True)
test_loader = DataLoader(test_data, batch_size=32, shuffle=False)

device='cpu'
criterion = nn.BCELoss()

optimizer = optim.Adam(model.parameters(), lr=0.001)

num_epochs = 10

for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    correct = 0
    total = 0

    for inputs, labels in train_loader:
        inputs = inputs.unsqueeze(1)
        labels = labels.float().unsqueeze(1)

        optimizer.zero_grad()

        outputs = model(inputs)

        loss = criterion(outputs, labels)

        loss.backward()
        optimizer.step()

        predicted = (outputs > 0.5).float()
        correct += (predicted == labels).sum().item()
        total += labels.size(0)

        running_loss += loss.item()

    accuracy = 100 * correct / total
    print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {running_loss/len(train_loader):.4f}, Accuracy: {accuracy:.2f}%")

model.eval()
correct = 0
total = 0
y_true = []
y_pred = []

with torch.no_grad():
    for inputs, labels in test_loader:
        inputs = inputs.unsqueeze(1)
        labels = labels.float().unsqueeze(1)

        outputs = model(inputs)
        predicted = (outputs > 0.5).float()

        correct += (predicted == labels).sum().item()
        total += labels.size(0)

        y_true.extend(labels.numpy())
        y_pred.extend(predicted.numpy())

accuracy = 100 * correct / total
print(f"Test Accuracy: {accuracy:.2f}%")

from sklearn.metrics import classification_report, roc_auc_score

print(classification_report(y_true, y_pred))

roc_auc = roc_auc_score(y_true, y_pred)
print(f"ROC-AUC: {roc_auc:.4f}")



Epoch [1/10], Loss: 0.3921, Accuracy: 90.31%
Epoch [2/10], Loss: 0.2572, Accuracy: 90.88%
Epoch [3/10], Loss: 0.2257, Accuracy: 91.24%
Epoch [4/10], Loss: 0.2112, Accuracy: 91.72%
Epoch [5/10], Loss: 0.2072, Accuracy: 91.78%
Epoch [6/10], Loss: 0.2043, Accuracy: 92.07%
Epoch [7/10], Loss: 0.2025, Accuracy: 92.10%
Epoch [8/10], Loss: 0.1996, Accuracy: 92.20%
Epoch [9/10], Loss: 0.1948, Accuracy: 92.44%
Epoch [10/10], Loss: 0.1935, Accuracy: 92.49%
Test Accuracy: 93.31%
              precision    recall  f1-score   support

         0.0       0.90      0.88      0.89      3327
         1.0       0.95      0.96      0.95      7282

    accuracy                           0.93     10609
   macro avg       0.92      0.92      0.92     10609
weighted avg       0.93      0.93      0.93     10609

ROC-AUC: 0.9198


BATCH_SIZE 64

In [None]:
train_data = TensorDataset(X_train, y_train)
test_data = TensorDataset(X_test, y_test)

train_loader = DataLoader(train_data, batch_size=64, shuffle=True)
test_loader = DataLoader(test_data, batch_size=64, shuffle=False)

device='cpu'
criterion = nn.BCELoss()

optimizer = optim.Adam(model.parameters(), lr=0.001)

num_epochs = 10

for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    correct = 0
    total = 0

    for inputs, labels in train_loader:
        inputs = inputs.unsqueeze(1)
        labels = labels.float().unsqueeze(1)

        optimizer.zero_grad()

        outputs = model(inputs)

        loss = criterion(outputs, labels)

        loss.backward()
        optimizer.step()

        predicted = (outputs > 0.5).float()
        correct += (predicted == labels).sum().item()
        total += labels.size(0)

        running_loss += loss.item()

    accuracy = 100 * correct / total
    print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {running_loss/len(train_loader):.4f}, Accuracy: {accuracy:.2f}%")

model.eval()
correct = 0
total = 0
y_true = []
y_pred = []

with torch.no_grad():
    for inputs, labels in test_loader:
        inputs = inputs.unsqueeze(1)
        labels = labels.float().unsqueeze(1)

        outputs = model(inputs)
        predicted = (outputs > 0.5).float()

        correct += (predicted == labels).sum().item()
        total += labels.size(0)

        y_true.extend(labels.numpy())
        y_pred.extend(predicted.numpy())

accuracy = 100 * correct / total
print(f"Test Accuracy: {accuracy:.2f}%")

from sklearn.metrics import classification_report, roc_auc_score

print(classification_report(y_true, y_pred))

roc_auc = roc_auc_score(y_true, y_pred)
print(f"ROC-AUC: {roc_auc:.4f}")



Epoch [1/10], Loss: 0.1669, Accuracy: 93.54%
Epoch [2/10], Loss: 0.1646, Accuracy: 93.65%
Epoch [3/10], Loss: 0.1647, Accuracy: 93.67%
Epoch [4/10], Loss: 0.1613, Accuracy: 93.74%
Epoch [5/10], Loss: 0.1610, Accuracy: 93.80%
Epoch [6/10], Loss: 0.1568, Accuracy: 93.92%
Epoch [7/10], Loss: 0.1600, Accuracy: 93.75%
Epoch [8/10], Loss: 0.1566, Accuracy: 93.97%
Epoch [9/10], Loss: 0.1543, Accuracy: 94.08%
Epoch [10/10], Loss: 0.1564, Accuracy: 94.05%
Test Accuracy: 93.35%
              precision    recall  f1-score   support

         0.0       0.91      0.87      0.89      3327
         1.0       0.94      0.96      0.95      7282

    accuracy                           0.93     10609
   macro avg       0.93      0.92      0.92     10609
weighted avg       0.93      0.93      0.93     10609

ROC-AUC: 0.9162


BATCH SIZE 256

In [None]:
train_data = TensorDataset(X_train, y_train)
test_data = TensorDataset(X_test, y_test)

train_loader = DataLoader(train_data, batch_size=256, shuffle=True)
test_loader = DataLoader(test_data, batch_size=256, shuffle=False)

device='cpu'
criterion = nn.BCELoss()

optimizer = optim.Adam(model.parameters(), lr=0.001)

num_epochs = 10

for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    correct = 0
    total = 0

    for inputs, labels in train_loader:
        inputs = inputs.unsqueeze(1)
        labels = labels.float().unsqueeze(1)

        optimizer.zero_grad()

        outputs = model(inputs)

        loss = criterion(outputs, labels)

        loss.backward()
        optimizer.step()

        predicted = (outputs > 0.5).float()
        correct += (predicted == labels).sum().item()
        total += labels.size(0)

        running_loss += loss.item()

    accuracy = 100 * correct / total
    print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {running_loss/len(train_loader):.4f}, Accuracy: {accuracy:.2f}%")

model.eval()
correct = 0
total = 0
y_true = []
y_pred = []

with torch.no_grad():
    for inputs, labels in test_loader:
        inputs = inputs.unsqueeze(1)
        labels = labels.float().unsqueeze(1)

        outputs = model(inputs)
        predicted = (outputs > 0.5).float()

        correct += (predicted == labels).sum().item()
        total += labels.size(0)

        y_true.extend(labels.numpy())
        y_pred.extend(predicted.numpy())

accuracy = 100 * correct / total
print(f"Test Accuracy: {accuracy:.2f}%")

from sklearn.metrics import classification_report, roc_auc_score

print(classification_report(y_true, y_pred))

roc_auc = roc_auc_score(y_true, y_pred)
print(f"ROC-AUC: {roc_auc:.4f}")



Epoch [1/10], Loss: 0.1573, Accuracy: 94.29%
Epoch [2/10], Loss: 0.1461, Accuracy: 94.44%
Epoch [3/10], Loss: 0.1443, Accuracy: 94.59%
Epoch [4/10], Loss: 0.1440, Accuracy: 94.57%
Epoch [5/10], Loss: 0.1425, Accuracy: 94.47%
Epoch [6/10], Loss: 0.1433, Accuracy: 94.63%
Epoch [7/10], Loss: 0.1403, Accuracy: 94.75%
Epoch [8/10], Loss: 0.1377, Accuracy: 94.76%
Epoch [9/10], Loss: 0.1394, Accuracy: 94.70%
Epoch [10/10], Loss: 0.1388, Accuracy: 94.72%
Test Accuracy: 93.41%
              precision    recall  f1-score   support

         0.0       0.92      0.87      0.89      3327
         1.0       0.94      0.96      0.95      7282

    accuracy                           0.93     10609
   macro avg       0.93      0.92      0.92     10609
weighted avg       0.93      0.93      0.93     10609

ROC-AUC: 0.9164


BATCH_SIZE 1024

In [None]:
from torch.utils.data import DataLoader, TensorDataset

train_data = TensorDataset(X_train, y_train)
test_data = TensorDataset(X_test, y_test)

train_loader = DataLoader(train_data, batch_size=1024, shuffle=True)
test_loader = DataLoader(test_data, batch_size=1024, shuffle=False)

import torch.optim as optim
import torch.nn as nn

device='cpu'
criterion = nn.BCELoss()

optimizer = optim.Adam(model.parameters(), lr=0.001)

num_epochs = 10

for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    correct = 0
    total = 0

    for inputs, labels in train_loader:
        inputs = inputs.unsqueeze(1)
        labels = labels.float().unsqueeze(1)

        optimizer.zero_grad()

        outputs = model(inputs)

        loss = criterion(outputs, labels)

        loss.backward()
        optimizer.step()

        predicted = (outputs > 0.5).float()
        correct += (predicted == labels).sum().item()
        total += labels.size(0)

        running_loss += loss.item()

    accuracy = 100 * correct / total
    print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {running_loss/len(train_loader):.4f}, Accuracy: {accuracy:.2f}%")

model.eval()
correct = 0
total = 0
y_true = []
y_pred = []

with torch.no_grad():
    for inputs, labels in test_loader:
        inputs = inputs.unsqueeze(1)
        labels = labels.float().unsqueeze(1)

        outputs = model(inputs)
        predicted = (outputs > 0.5).float()

        correct += (predicted == labels).sum().item()
        total += labels.size(0)

        y_true.extend(labels.numpy())
        y_pred.extend(predicted.numpy())

accuracy = 100 * correct / total
print(f"Test Accuracy: {accuracy:.2f}%")

from sklearn.metrics import classification_report, roc_auc_score

print(classification_report(y_true, y_pred))

roc_auc = roc_auc_score(y_true, y_pred)
print(f"ROC-AUC: {roc_auc:.4f}")



Epoch [1/10], Loss: 0.1342, Accuracy: 94.96%
Epoch [2/10], Loss: 0.1331, Accuracy: 94.98%
Epoch [3/10], Loss: 0.1361, Accuracy: 94.80%
Epoch [4/10], Loss: 0.1324, Accuracy: 95.11%
Epoch [5/10], Loss: 0.1326, Accuracy: 94.96%
Epoch [6/10], Loss: 0.1312, Accuracy: 95.04%
Epoch [7/10], Loss: 0.1318, Accuracy: 94.97%
Epoch [8/10], Loss: 0.1297, Accuracy: 95.15%
Epoch [9/10], Loss: 0.1292, Accuracy: 95.08%
Epoch [10/10], Loss: 0.1280, Accuracy: 95.20%
Test Accuracy: 93.36%
              precision    recall  f1-score   support

         0.0       0.91      0.87      0.89      3327
         1.0       0.94      0.96      0.95      7282

    accuracy                           0.93     10609
   macro avg       0.93      0.92      0.92     10609
weighted avg       0.93      0.93      0.93     10609

ROC-AUC: 0.9171


BATCH_SIZE 3200

In [None]:

train_data = TensorDataset(X_train, y_train)
test_data = TensorDataset(X_test, y_test)

train_loader = DataLoader(train_data, batch_size=3200, shuffle=True)
test_loader = DataLoader(test_data, batch_size=3200, shuffle=False)

import torch.optim as optim
import torch.nn as nn

device='cpu'
criterion = nn.BCELoss()

optimizer = optim.Adam(model.parameters(), lr=0.001)

num_epochs = 10

for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    correct = 0
    total = 0

    for inputs, labels in train_loader:
        inputs = inputs.unsqueeze(1)
        labels = labels.float().unsqueeze(1)

        optimizer.zero_grad()

        outputs = model(inputs)

        loss = criterion(outputs, labels)

        loss.backward()
        optimizer.step()

        predicted = (outputs > 0.5).float()
        correct += (predicted == labels).sum().item()
        total += labels.size(0)

        running_loss += loss.item()

    accuracy = 100 * correct / total
    print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {running_loss/len(train_loader):.4f}, Accuracy: {accuracy:.2f}%")

model.eval()
correct = 0
total = 0
y_true = []
y_pred = []

with torch.no_grad():
    for inputs, labels in test_loader:
        inputs = inputs.unsqueeze(1)
        labels = labels.float().unsqueeze(1)

        outputs = model(inputs)
        predicted = (outputs > 0.5).float()

        correct += (predicted == labels).sum().item()
        total += labels.size(0)

        y_true.extend(labels.numpy())
        y_pred.extend(predicted.numpy())

accuracy = 100 * correct / total
print(f"Test Accuracy: {accuracy:.2f}%")

from sklearn.metrics import classification_report, roc_auc_score

print(classification_report(y_true, y_pred))

roc_auc = roc_auc_score(y_true, y_pred)
print(f"ROC-AUC: {roc_auc:.4f}")



Epoch [1/10], Loss: 0.1342, Accuracy: 94.28%
Epoch [2/10], Loss: 0.1347, Accuracy: 94.07%
Epoch [3/10], Loss: 0.1359, Accuracy: 94.07%
Epoch [4/10], Loss: 0.1353, Accuracy: 94.18%
Epoch [5/10], Loss: 0.1365, Accuracy: 94.21%
Epoch [6/10], Loss: 0.1321, Accuracy: 94.28%
Epoch [7/10], Loss: 0.1439, Accuracy: 94.26%
Epoch [8/10], Loss: 0.1411, Accuracy: 94.15%
Epoch [9/10], Loss: 0.1362, Accuracy: 94.19%
Epoch [10/10], Loss: 0.1333, Accuracy: 94.23%
Test Accuracy: 93.52%
              precision    recall  f1-score   support

         0.0       0.89      0.90      0.90      3327
         1.0       0.96      0.95      0.95      7282

    accuracy                           0.94     10609
   macro avg       0.92      0.93      0.93     10609
weighted avg       0.94      0.94      0.94     10609

ROC-AUC: 0.9265
