In [9]:
import pickle
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
import torch.nn.functional as F

# Load RadioML dataset
def load_radioml_data(filepath='RML2016.10a_dict.pkl'):
    with open(filepath, 'rb') as f:
        data_dict = pickle.load(f, encoding='latin1')

    data = []
    labels = []
    for key, value in data_dict.items():
        mod_type, snr = key
        data.append(value)
        labels.extend([mod_type] * value.shape[0])

    data = np.vstack(data)
    label_set = sorted(list(set(labels)))
    label_to_int = {label: i for i, label in enumerate(label_set)}
    labels = np.array([label_to_int[label] for label in labels])

    return data, labels, label_to_int

# Dataset class for PyTorch
class RadioMLDataset(Dataset):
    def __init__(self, data, labels):
        data = data[:, np.newaxis, :, :]  # Add channel dimension (1, 2, 1024)
        self.data = torch.tensor(data, dtype=torch.float32)
        self.labels = torch.tensor(labels, dtype=torch.long)

    def __len__(self):
        return len(self.labels)

    def __getitem__(self, idx):
        return self.data[idx], self.labels[idx]

# Model based on the specified architecture with adjustments to avoid dimension reduction issues
class PaperAMCModel(nn.Module):
    def __init__(self, num_classes=24):
        super(PaperAMCModel, self).__init__()

        # Initial Convolution Layer with stride 2 to replace pooling
        self.conv1 = nn.Conv2d(1, 16, kernel_size=(3, 3), stride=2, padding=1)  # Output: (16, 512, 16)
        self.bn1 = nn.BatchNorm2d(16)

        # Second Convolution Layer with stride 2
        self.conv2 = nn.Conv2d(16, 32, kernel_size=(3, 3), stride=2, padding=1)  # Output: (32, 256, 32)
        self.bn2 = nn.BatchNorm2d(32)

        # Block 1
        self.block1 = nn.Sequential(
            nn.Conv2d(32, 32, kernel_size=(1, 1), padding=0),  # Output: (32, 256, 32)
            nn.BatchNorm2d(32),
            nn.ReLU(),
            nn.Conv2d(32, 32, kernel_size=(3, 1), padding=(1, 0)),  # Output: (32, 256, 32)
            nn.BatchNorm2d(32),
            nn.ReLU(),
            nn.Conv2d(32, 32, kernel_size=(1, 3), padding=(0, 1)),  # Output: (32, 256, 32)
            nn.BatchNorm2d(32),
            nn.ReLU()
        )

        # Block 2 with increased output channels and kernel adjustments
        self.block2 = nn.Sequential(
            nn.Conv2d(32, 64, kernel_size=(1, 1), padding=0),  # Output: (64, 256, 64)
            nn.BatchNorm2d(64),
            nn.ReLU(),
            nn.Conv2d(64, 64, kernel_size=(3, 1), padding=(1, 0)),  # Output: (64, 256, 64)
            nn.BatchNorm2d(64),
            nn.ReLU(),
            nn.Conv2d(64, 64, kernel_size=(1, 3), padding=(0, 1)),  # Output: (64, 256, 64)
            nn.BatchNorm2d(64),
            nn.ReLU()
        )

        # Block 3 with further increased output channels
        self.block3 = nn.Sequential(
            nn.Conv2d(64, 128, kernel_size=(1, 1), padding=0),  # Output: (128, 256, 128)
            nn.BatchNorm2d(128),
            nn.ReLU(),
            nn.Conv2d(128, 128, kernel_size=(3, 1), padding=(1, 0)),  # Output: (128, 256, 128)
            nn.BatchNorm2d(128),
            nn.ReLU(),
            nn.Conv2d(128, 128, kernel_size=(1, 3), padding=(0, 1)),  # Output: (128, 256, 128)
            nn.BatchNorm2d(128),
            nn.ReLU()
        )

        # Global Pooling and Fully Connected Layer
        self.global_pool = nn.AdaptiveAvgPool2d((1, 1))  # Output: (1, 1, 128)
        self.fc = nn.Linear(128, num_classes)

    def forward(self, x):
        x = F.relu(self.bn1(self.conv1(x)))
        x = F.relu(self.bn2(self.conv2(x)))
        x = self.block1(x)
        x = self.block2(x)
        x = self.block3(x)
        x = self.global_pool(x)
        x = x.view(x.size(0), -1)
        x = self.fc(x)
        return x

# Weight Initialization Function
def init_weights(m):
    if isinstance(m, nn.Conv2d) or isinstance(m, nn.Linear):
        nn.init.xavier_normal_(m.weight)
        if m.bias is not None:
            nn.init.zeros_(m.bias)

# Training and evaluation functions with gradient clipping
def train_model(model, train_loader, criterion, optimizer, scheduler, test_loader, epochs=45):
    model.train()
    for epoch in range(epochs):
        running_loss = 0.0
        correct = 0
        total = 0
        for inputs, labels in train_loader:
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()

            # Apply gradient clipping
            torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)

            optimizer.step()
            running_loss += loss.item()

            # Calculate accuracy for mini-batch
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

        # Training accuracy for this epoch
        train_accuracy = 100 * correct / total
        print(f"Epoch {epoch+1}/{epochs}, Loss: {running_loss/len(train_loader):.4f}, Training Accuracy: {train_accuracy:.2f}%")

        # Step the learning rate scheduler
        scheduler.step()

        # Evaluate on validation set after each epoch
        val_accuracy = evaluate_model(model, test_loader)
        print(f"Validation Accuracy after Epoch {epoch+1}: {val_accuracy:.2f}%")

def evaluate_model(model, test_loader):
    model.eval()
    all_preds = []
    all_labels = []
    with torch.no_grad():
        for inputs, labels in test_loader:
            outputs = model(inputs)
            _, preds = torch.max(outputs, 1)
            all_preds.extend(preds.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())
    return accuracy_score(all_labels, all_preds) * 100

# Main function for training and evaluation
def main():
    # Load data
    data, labels, label_to_int = load_radioml_data('RML2016.10a_dict.pkl')
    X_train, X_test, y_train, y_test = train_test_split(data, labels, test_size=0.2, random_state=42)

    # DataLoaders
    train_dataset = RadioMLDataset(X_train, y_train)
    test_dataset = RadioMLDataset(X_test, y_test)
    train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
    test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)

    # Model, criterion, optimizer, and scheduler
    model = PaperAMCModel(num_classes=len(label_to_int))
    model.apply(init_weights)  # Apply Xavier initialization
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9, weight_decay=1e-5)
    scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=20, gamma=0.01)  # Reduces LR by factor of 0.01 every 20 epochs

    # Train and evaluate
    train_model(model, train_loader, criterion, optimizer, scheduler, test_loader, epochs=45)

if __name__ == "__main__":
    main()


Epoch 1/45, Loss: 1.7477, Training Accuracy: 38.01%
Validation Accuracy after Epoch 1: 46.21%
Epoch 2/45, Loss: 2.0028, Training Accuracy: 26.47%
Validation Accuracy after Epoch 2: 35.33%
Epoch 3/45, Loss: 1.7728, Training Accuracy: 34.79%
Validation Accuracy after Epoch 3: 38.75%
Epoch 4/45, Loss: 1.6821, Training Accuracy: 37.65%
Validation Accuracy after Epoch 4: 42.05%
Epoch 5/45, Loss: 1.6380, Training Accuracy: 39.09%
Validation Accuracy after Epoch 5: 42.93%
Epoch 6/45, Loss: 1.6005, Training Accuracy: 40.44%
Validation Accuracy after Epoch 6: 43.25%
Epoch 7/45, Loss: 1.5406, Training Accuracy: 42.27%
Validation Accuracy after Epoch 7: 22.08%
Epoch 8/45, Loss: 1.5285, Training Accuracy: 42.82%
Validation Accuracy after Epoch 8: 47.00%
Epoch 9/45, Loss: 1.4881, Training Accuracy: 44.20%
Validation Accuracy after Epoch 9: 46.75%
Epoch 10/45, Loss: 1.4707, Training Accuracy: 44.93%
Validation Accuracy after Epoch 10: 47.35%
Epoch 11/45, Loss: 1.4609, Training Accuracy: 45.20%
Valid