In [3]:
import torch
import torchvision
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
from ndlinear import NdLinear
import torch.nn.functional as F
from torch.optim.lr_scheduler import ReduceLROnPlateau
from torch.utils.data import DataLoader, random_split
from torchvision.transforms import autoaugment, transforms


train_transform = transforms.Compose([
    transforms.RandomCrop(32, padding=4),
    transforms.RandomHorizontalFlip(),
    autoaugment.AutoAugment(autoaugment.AutoAugmentPolicy.CIFAR10),
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010))
])

test_transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010))
])

# Load CIFAR-10 dataset
train_dataset = torchvision.datasets.CIFAR10(
    root='./data', 
    train=True,
    download=True, 
    transform=train_transform
)

# Split training data into training and validation sets
train_size = int(0.9 * len(train_dataset))
val_size = len(train_dataset) - train_size
train_dataset, val_dataset = random_split(train_dataset, [train_size, val_size])

# For validation set, we should use the test transforms (no augmentation)
val_dataset.dataset.transform = test_transform

# Test dataset
test_dataset = torchvision.datasets.CIFAR10(
    root='./data', 
    train=False,
    download=True, 
    transform=test_transform
)

# Create data loaders
train_loader = DataLoader(
    train_dataset, 
    batch_size=128, 
    shuffle=True, 
    num_workers=2,
    pin_memory=True
)

val_loader = DataLoader(
    val_dataset, 
    batch_size=128, 
    shuffle=False, 
    num_workers=2,
    pin_memory=True
)

test_loader = DataLoader(
    test_dataset, 
    batch_size=128, 
    shuffle=False, 
    num_workers=2,
    pin_memory=True
)


In [4]:

class EnhancedNdLinearCNN(nn.Module):
    def __init__(self, dropout_rate=0.4):
        super(EnhancedNdLinearCNN, self).__init__()
        
        # First block - keep input channels small but increase gradually
        self.block1 = nn.Sequential(
            nn.Conv2d(3, 64, kernel_size=3, padding=1),
            nn.BatchNorm2d(64),
            nn.ReLU(inplace=True),
            nn.Conv2d(64, 64, kernel_size=3, padding=1),
            nn.BatchNorm2d(64),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(2, 2),
            nn.Dropout(dropout_rate)
        )
        
        # Second block - increase channels
        self.block2 = nn.Sequential(
            nn.Conv2d(64, 128, kernel_size=3, padding=1),
            nn.BatchNorm2d(128),
            nn.ReLU(inplace=True),
            nn.Conv2d(128, 128, kernel_size=3, padding=1),
            nn.BatchNorm2d(128),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(2, 2),
            nn.Dropout(dropout_rate)
        )
        
        # Third block - further increase channels
        self.block3 = nn.Sequential(
            nn.Conv2d(128, 256, kernel_size=3, padding=1),
            nn.BatchNorm2d(256),
            nn.ReLU(inplace=True),
            nn.Conv2d(256, 256, kernel_size=3, padding=1),
            nn.BatchNorm2d(256),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(2, 2),
            nn.Dropout(dropout_rate)
        )
        
        # Fourth block - deep features
        self.block4 = nn.Sequential(
            nn.Conv2d(256, 512, kernel_size=3, padding=1),
            nn.BatchNorm2d(512),
            nn.ReLU(inplace=True),
            nn.Conv2d(512, 512, kernel_size=3, padding=1),
            nn.BatchNorm2d(512),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(2, 2),
            nn.Dropout(dropout_rate)
        )
        
        # Global average pooling
        self.global_avg_pool = nn.AdaptiveAvgPool2d((1, 1))
        
        # NdLinear layers with proper dimensions
        self.ndlinear1 = NdLinear(input_dims=(512,), hidden_size=(256,))
        self.bn1 = nn.BatchNorm1d(256)
        self.dropout1 = nn.Dropout(dropout_rate)
        
        self.ndlinear2 = NdLinear(input_dims=(256,), hidden_size=(128,))
        self.bn2 = nn.BatchNorm1d(128)
        self.dropout2 = nn.Dropout(dropout_rate)
        
        self.ndlinear3 = NdLinear(input_dims=(128,), hidden_size=(10,))

    def forward(self, x):
        x = self.block1(x)
        x = self.block2(x)
        x = self.block3(x)
        x = self.block4(x)
        x = self.global_avg_pool(x)
        x = x.view(x.size(0), -1)
        
        x = self.ndlinear1(x)
        x = self.bn1(x)
        x = F.relu(x, inplace=True)
        x = self.dropout1(x)
        
        x = self.ndlinear2(x)
        x = self.bn2(x)
        x = F.relu(x, inplace=True)
        x = self.dropout2(x)
        
        x = self.ndlinear3(x)
        return x
    
model = EnhancedNdLinearCNN()


device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.load_state_dict(torch.load('best_model.pth', map_location=device))
model.to(device)


# Loss and optimizer with stronger regularization
criterion = nn.CrossEntropyLoss()
optimizer = optim.AdamW(model.parameters(), lr=0.001, weight_decay=5e-4)

# Cosine annealing scheduler often works better than ReduceLROnPlateau
scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=200)

In [6]:
# Training loop with early stopping
num_epochs = 100
best_acc = 0
patience = 20
counter = 0

for epoch in range(num_epochs):
    # Training phase
    train_loss, train_acc = train(model, train_loader, optimizer, criterion, device, epoch)
    
    # Validation phase
    val_loss, val_acc = validate(model, val_loader, criterion, device)
    
    # Update learning rate
    scheduler.step()
    current_lr = optimizer.param_groups[0]['lr']
    print(f'Current learning rate: {current_lr}')
    
    # Save best model
    if val_acc > best_acc:
        best_acc = val_acc
        torch.save(model.state_dict(), 'best_model.pth')
        print(f'Model saved with accuracy: {best_acc:.2f}%')
        counter = 0
    else:
        counter += 1
        
    # Early stopping
    if counter >= patience:
        print(f'Early stopping triggered after {epoch+1} epochs')
        break
    
    print(f'Epoch {epoch+1}/{num_epochs} completed')
    print(f'Best accuracy so far: {best_acc:.2f}%')

NameError: name 'train' is not defined

In [None]:
def test(model, test_loader, device):
    model.eval()
    correct = 0
    total = 0
    
    with torch.no_grad():
        for inputs, targets in test_loader:
            inputs, targets = inputs.to(device), targets.to(device)
            outputs = model(inputs)
            
            _, predicted = outputs.max(1)
            total += targets.size(0)
            correct += predicted.eq(targets).sum().item()
    
    accuracy = 100. * correct / total
    print(f'Test Accuracy: {accuracy:.2f}%')
    return accuracy

# Load best model and evaluate
model.load_state_dict(torch.load('best_model.pth'))
test_acc = test(model, test_loader, device)

In [1]:
import torch
print(f"PyTorch version: {torch.__version__}")
print(f"CUDA available: {torch.cuda.is_available()}")

if torch.cuda.is_available():
    print(f"CUDA version: {torch.version.cuda}")
    print(f"GPU device count: {torch.cuda.device_count()}")
    print(f"Current GPU: {torch.cuda.get_device_name(0)}")

PyTorch version: 2.8.0.dev20250324+cu128
CUDA available: True
CUDA version: 12.8
GPU device count: 1
Current GPU: NVIDIA GeForce GTX 1070
