In [2]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset

import numpy as np
import matplotlib.pyplot as plt
from sklearn.metrics import classification_report 


In [5]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)


cpu


In [6]:
# Synthetic classification data
X = np.random.rand(2000, 20).astype(np.float32)  # 20 features
y = np.random.randint(0, 3, size=(2000,))        # 3 classes

X_train, X_test = X[:1600], X[1600:]
y_train, y_test = y[:1600], y[1600:]

train_dataset = TensorDataset(torch.tensor(X_train), torch.tensor(y_train))
test_dataset  = TensorDataset(torch.tensor(X_test), torch.tensor(y_test))

train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
test_loader  = DataLoader(test_dataset, batch_size=64, shuffle=False)


**##Attention Module**

In [7]:
class ChannelAttention(nn.Module):
    def __init__(self, features, reduction=16):
        super().__init__()

        self.attention = nn.Sequential(
            nn.Linear(features, features // reduction),
            nn.ReLU(),
            nn.Linear(features // reduction, features),
            nn.Sigmoid()
        )

    def forward(self, x):
        avg_pool = torch.mean(x, dim=0, keepdim=True)
        attn = self.attention(avg_pool)
        return x * attn


**Residual Connection With Attention**

In [8]:
class ResidualCBAMBlock(nn.Module):
    def __init__(self, features, dropout=0.3):
        super().__init__()

        self.bn1 = nn.BatchNorm1d(features)
        self.fc1 = nn.Linear(features, features)

        self.bn2 = nn.BatchNorm1d(features)
        self.fc2 = nn.Linear(features, features)

        self.cbam = ChannelAttention(features)
        self.dropout = nn.Dropout(dropout)
        self.relu = nn.ReLU()

    def forward(self, x):
        identity = x

        out = self.fc1(self.relu(self.bn1(x)))
        out = self.dropout(out)
        out = self.fc2(self.relu(self.bn2(out)))

        out = self.cbam(out)       # ðŸ”¥ Attention applied here
        out = out + identity       # Residual connection

        return self.relu(out)


**Deep Neural Network with Residual Connection and CBAM attention Module**

In [11]:
class DeepResidualCBAMNet(nn.Module):
    def __init__(self, input_size, num_classes):
        super().__init__()

        self.fc_in = nn.Linear(input_size, 128)

        self.res1 = ResidualCBAMBlock(128)
        self.res2 = ResidualCBAMBlock(128)
        self.res3 = ResidualCBAMBlock(128)  # ðŸ”¥ extra depth

        self.fc1 = nn.Linear(128, 64)
        self.fc2 = nn.Linear(64, 32)
        self.fc_out = nn.Linear(32, num_classes)

        self.relu = nn.ReLU()

    def forward(self, x):
        x = self.relu(self.fc_in(x))
        x = self.res1(x)
        x = self.res2(x)
        x = self.res3(x)
        x = self.relu(self.fc1(x))
        x = self.relu(self.fc2(x))
        return self.fc_out(x)


In [12]:
model = DeepResidualCBAMNet(input_size=20, num_classes=3).to(device)

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)


In [13]:
epochs = 80
train_losses = []

for epoch in range(epochs):
    model.train()
    total_loss = 0

    for inputs, labels in train_loader:
        inputs, labels = inputs.to(device), labels.to(device)

        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)

        loss.backward()
        optimizer.step()

        total_loss += loss.item()

    avg_loss = total_loss / len(train_loader)
    train_losses.append(avg_loss)

    print(f"Epoch [{epoch+1}/{epochs}] - Loss: {avg_loss:.4f}")


Epoch [1/80] - Loss: 1.0987
Epoch [2/80] - Loss: 1.0927
Epoch [3/80] - Loss: 1.0811
Epoch [4/80] - Loss: 1.0643
Epoch [5/80] - Loss: 1.0463
Epoch [6/80] - Loss: 1.0204
Epoch [7/80] - Loss: 1.0021
Epoch [8/80] - Loss: 0.9657
Epoch [9/80] - Loss: 0.9591
Epoch [10/80] - Loss: 0.9155
Epoch [11/80] - Loss: 0.8992
Epoch [12/80] - Loss: 0.8706
Epoch [13/80] - Loss: 0.8309
Epoch [14/80] - Loss: 0.8360
Epoch [15/80] - Loss: 0.7935
Epoch [16/80] - Loss: 0.7611
Epoch [17/80] - Loss: 0.7395
Epoch [18/80] - Loss: 0.6981
Epoch [19/80] - Loss: 0.6826
Epoch [20/80] - Loss: 0.6746
Epoch [21/80] - Loss: 0.6390
Epoch [22/80] - Loss: 0.6195
Epoch [23/80] - Loss: 0.5620
Epoch [24/80] - Loss: 0.6255
Epoch [25/80] - Loss: 0.5781
Epoch [26/80] - Loss: 0.5333
Epoch [27/80] - Loss: 0.5091
Epoch [28/80] - Loss: 0.5118
Epoch [29/80] - Loss: 0.5318
Epoch [30/80] - Loss: 0.4632
Epoch [31/80] - Loss: 0.4620
Epoch [32/80] - Loss: 0.4348
Epoch [33/80] - Loss: 0.4192
Epoch [34/80] - Loss: 0.4265
Epoch [35/80] - Loss: 0

In [14]:
model.eval()
y_true, y_pred = [], []

with torch.no_grad():
    for inputs, labels in test_loader:
        inputs = inputs.to(device)
        outputs = model(inputs)
        preds = torch.argmax(outputs, dim=1)

        y_true.extend(labels.numpy())
        y_pred.extend(preds.cpu().numpy())

print(classification_report(y_true, y_pred))


              precision    recall  f1-score   support

           0       0.28      0.27      0.27       132
           1       0.35      0.36      0.35       135
           2       0.38      0.38      0.38       133

    accuracy                           0.34       400
   macro avg       0.33      0.33      0.33       400
weighted avg       0.33      0.34      0.33       400

