In [1]:
import torch
from torch.utils.data import random_split, DataLoader
import torchvision.transforms as transforms
import torchvision

transform = transforms.Compose([
    transforms.ToTensor(),  # Convert images to tensors
    transforms.Normalize((0.1307,), (0.3081,)),  # Normalize to match MNIST stats
    transforms.Resize((256, 256)),
    transforms.RandomCrop((227, 227)),
    transforms.Lambda(lambda x: x.repeat(3, 1, 1))
])

# Load MNIST dataset
train_ds = torchvision.datasets.MNIST("/home/eagle/Projects/dl_from_scratch/mnist", train=True, download=True, transform=transform)
test_ds = torchvision.datasets.MNIST("/home/eagle/Projects/dl_from_scratch/mnist", train=False, download=True, transform=transform)

train_size = int(0.8 * len(train_ds))  # 80% for training
val_size = len(train_ds) - train_size  # 20% for validation

# Split the train_dataset into train and val
train_ds, val_ds = random_split(train_ds, [train_size, val_size])

batch_size = 80
train_loader = DataLoader(train_ds, batch_size=batch_size, shuffle=True, pin_memory=True, num_workers=6)
val_loader = DataLoader(val_ds, batch_size=batch_size, shuffle=False, pin_memory=True, num_workers=6)
test_loader = DataLoader(test_ds, batch_size=batch_size, shuffle=False, pin_memory=True, num_workers=6)

In [2]:
import torch
import torch.nn as nn
import torch.nn.functional as F

class DCNN(nn.Module):
    def __init__(self):
        super(DCNN, self).__init__()
        self.quant = torch.ao.quantization.QuantStub()
        self.conv1 = nn.Conv2d(3, 96, kernel_size=11, stride=4)
        self.relu1 = nn.ReLU()
        self.pool1 = nn.MaxPool2d(kernel_size=3, stride=2)
        self.bn1 = nn.BatchNorm2d(96)

        self.conv2 = nn.Conv2d(96, 256, kernel_size=5, stride=1, padding=2)
        self.relu2 = nn.ReLU()
        self.pool2 = nn.MaxPool2d(kernel_size=3, stride=2)
        self.bn2 = nn.BatchNorm2d(256)

        self.conv3 = nn.Conv2d(256, 384, kernel_size=3, stride=1, padding=1)
        self.relu3 = nn.ReLU()

        self.conv4 = nn.Conv2d(384, 384, kernel_size=3, stride=1, padding=1)
        self.relu4 = nn.ReLU()

        self.conv5 = nn.Conv2d(384, 256, kernel_size=3, stride=1, padding=1)
        self.relu5 = nn.ReLU()
        self.pool5 = nn.MaxPool2d(kernel_size=3, stride=2)

        self.fc6 = nn.Linear(256 * 6 * 6, 4096)  # Adjusted based on (227x227 input)
        self.relu6 = nn.ReLU()
        self.d6 = nn.Dropout()

        self.fc7 = nn.Linear(4096, 4096)
        self.relu7 = nn.ReLU()
        self.d7 = nn.Dropout()

        self.fc8 = nn.Linear(4096, 10)
        self.dequant = torch.ao.quantization.DeQuantStub()
    
    def forward(self, x):
        x = self.quant(x)
        x = self.bn1(self.pool1(self.relu1(self.conv1(x))))
        x = self.bn2(self.pool2(self.relu2(self.conv2(x))))
        x = self.relu3(self.conv3(x))
        x = self.relu4(self.conv4(x))
        x = self.pool5(self.relu5(self.conv5(x)))
        
        x = x.view(x.size(0), -1)
        
        x = self.d6(self.relu6(self.fc6(x)))
        x = self.d7(self.relu7(self.fc7(x)))
        x = self.fc8(x)
        x = self.dequant(x)
        return x  

device = "cuda" if torch.cuda.is_available() else "cpu"
model = DCNN().to(device)

In [3]:
from tqdm import tqdm  # Import tqdm for progress bars

loss_fn = torch.nn.CrossEntropyLoss()
initial_lr = 0.001
optimizer = torch.optim.SGD(model.parameters(), lr=initial_lr, momentum=0.9, weight_decay=0.0005)
scaler = torch.cuda.amp.GradScaler()

num_epochs = 30

for epoch in range(num_epochs):
    model.train()  # Set the model to training mode
    running_loss = 0.0

    # Adjust learning rate if epoch is greater than or equal to 20
    if epoch >= 20:
        for param_group in optimizer.param_groups:
            param_group['lr'] = 0.0001  # Set learning rate to 0.0001

    # Wrap the train_loader with tqdm for progress tracking
    for inputs, labels in tqdm(train_loader, desc=f"Training Epoch {epoch + 1}", unit="batch"):
        inputs, labels = inputs.to(device), labels.to(device)  
        optimizer.zero_grad()

        # Forward pass with mixed precision
        with torch.cuda.amp.autocast():  
            outputs = model(inputs)
            loss = loss_fn(outputs, labels)

        # Backward pass
        scaler.scale(loss).backward()  # Scale the loss for stable gradients
        scaler.step(optimizer)  # Update the parameters
        scaler.update()  # Update the scaler

        running_loss += loss.item()   
    
    # Calculate average loss for the epoch
    avg_loss = running_loss / len(train_loader)

    # Validation phase
    model.eval()  # Set the model to evaluation mode
    running_val_loss = 0.0
    correct = 0
    total = 0

    # Wrap the val_loader with tqdm for progress tracking
    with torch.no_grad():  # Disable gradient calculation
        for inputs, labels in tqdm(val_loader, desc="Validating", unit="batch"):
            inputs, labels = inputs.to(device), labels.to(device)

            with torch.cuda.amp.autocast():  # Enable autocasting for validation
                outputs = model(inputs)
                loss = loss_fn(outputs, labels)
                
            running_val_loss += loss.item()

            # Calculate accuracy
            _, predicted = torch.max(outputs, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    
    # Calculate average validation loss and accuracy
    avg_val_loss = running_val_loss / len(val_loader)
    accuracy = correct / total * 100  # Convert to percentage

    print(f"Epoch [{epoch + 1}/{num_epochs}], "
          f"Training Loss: {avg_loss:.4f}, "
          f"Validation Loss: {avg_val_loss:.4f}, "
          f"Validation Accuracy: {accuracy:.2f}%")

print("Training complete.")

  scaler = torch.cuda.amp.GradScaler()
  with torch.cuda.amp.autocast():
Training Epoch 1: 100%|████████████████████| 600/600 [00:30<00:00, 19.42batch/s]
  with torch.cuda.amp.autocast():  # Enable autocasting for validation
Validating: 100%|██████████████████████████| 150/150 [00:06<00:00, 22.84batch/s]


Epoch [1/30], Training Loss: 1.0348, Validation Loss: 0.1741, Validation Accuracy: 94.54%


Training Epoch 2: 100%|████████████████████| 600/600 [00:30<00:00, 19.68batch/s]
Validating: 100%|██████████████████████████| 150/150 [00:06<00:00, 23.73batch/s]


Epoch [2/30], Training Loss: 0.1531, Validation Loss: 0.0912, Validation Accuracy: 97.30%


Training Epoch 3: 100%|████████████████████| 600/600 [00:32<00:00, 18.72batch/s]
Validating: 100%|██████████████████████████| 150/150 [00:06<00:00, 22.94batch/s]


Epoch [3/30], Training Loss: 0.0990, Validation Loss: 0.0670, Validation Accuracy: 97.75%


Training Epoch 4: 100%|████████████████████| 600/600 [00:31<00:00, 19.18batch/s]
Validating: 100%|██████████████████████████| 150/150 [00:07<00:00, 21.23batch/s]


Epoch [4/30], Training Loss: 0.0768, Validation Loss: 0.0609, Validation Accuracy: 98.08%


Training Epoch 5: 100%|████████████████████| 600/600 [00:33<00:00, 17.99batch/s]
Validating: 100%|██████████████████████████| 150/150 [00:06<00:00, 24.50batch/s]


Epoch [5/30], Training Loss: 0.0663, Validation Loss: 0.0504, Validation Accuracy: 98.43%


Training Epoch 6: 100%|████████████████████| 600/600 [00:29<00:00, 20.07batch/s]
Validating: 100%|██████████████████████████| 150/150 [00:05<00:00, 26.63batch/s]


Epoch [6/30], Training Loss: 0.0570, Validation Loss: 0.0580, Validation Accuracy: 98.00%


Training Epoch 7: 100%|████████████████████| 600/600 [00:29<00:00, 20.09batch/s]
Validating: 100%|██████████████████████████| 150/150 [00:06<00:00, 23.73batch/s]


Epoch [7/30], Training Loss: 0.0503, Validation Loss: 0.0465, Validation Accuracy: 98.45%


Training Epoch 8: 100%|████████████████████| 600/600 [00:30<00:00, 19.64batch/s]
Validating: 100%|██████████████████████████| 150/150 [00:05<00:00, 26.71batch/s]


Epoch [8/30], Training Loss: 0.0470, Validation Loss: 0.0400, Validation Accuracy: 98.71%


Training Epoch 9: 100%|████████████████████| 600/600 [00:29<00:00, 20.28batch/s]
Validating: 100%|██████████████████████████| 150/150 [00:05<00:00, 26.33batch/s]


Epoch [9/30], Training Loss: 0.0432, Validation Loss: 0.0369, Validation Accuracy: 98.87%


Training Epoch 10: 100%|███████████████████| 600/600 [00:29<00:00, 20.40batch/s]
Validating: 100%|██████████████████████████| 150/150 [00:07<00:00, 19.85batch/s]


Epoch [10/30], Training Loss: 0.0388, Validation Loss: 0.0338, Validation Accuracy: 98.92%


Training Epoch 11: 100%|███████████████████| 600/600 [00:30<00:00, 19.60batch/s]
Validating: 100%|██████████████████████████| 150/150 [00:06<00:00, 22.38batch/s]


Epoch [11/30], Training Loss: 0.0371, Validation Loss: 0.0340, Validation Accuracy: 98.92%


Training Epoch 12: 100%|███████████████████| 600/600 [00:29<00:00, 20.37batch/s]
Validating: 100%|██████████████████████████| 150/150 [00:06<00:00, 23.19batch/s]


Epoch [12/30], Training Loss: 0.0346, Validation Loss: 0.0287, Validation Accuracy: 99.17%


Training Epoch 13: 100%|███████████████████| 600/600 [00:28<00:00, 20.73batch/s]
Validating: 100%|██████████████████████████| 150/150 [00:05<00:00, 26.71batch/s]


Epoch [13/30], Training Loss: 0.0328, Validation Loss: 0.0302, Validation Accuracy: 99.13%


Training Epoch 14: 100%|███████████████████| 600/600 [00:29<00:00, 20.49batch/s]
Validating: 100%|██████████████████████████| 150/150 [00:05<00:00, 28.68batch/s]


Epoch [14/30], Training Loss: 0.0311, Validation Loss: 0.0260, Validation Accuracy: 99.18%


Training Epoch 15: 100%|███████████████████| 600/600 [00:29<00:00, 20.31batch/s]
Validating: 100%|██████████████████████████| 150/150 [00:06<00:00, 23.90batch/s]


Epoch [15/30], Training Loss: 0.0288, Validation Loss: 0.0275, Validation Accuracy: 99.11%


Training Epoch 16: 100%|███████████████████| 600/600 [00:30<00:00, 20.00batch/s]
Validating: 100%|██████████████████████████| 150/150 [00:07<00:00, 20.50batch/s]


Epoch [16/30], Training Loss: 0.0266, Validation Loss: 0.0296, Validation Accuracy: 99.08%


Training Epoch 17: 100%|███████████████████| 600/600 [00:32<00:00, 18.73batch/s]
Validating: 100%|██████████████████████████| 150/150 [00:07<00:00, 21.26batch/s]


Epoch [17/30], Training Loss: 0.0274, Validation Loss: 0.0320, Validation Accuracy: 99.08%


Training Epoch 18: 100%|███████████████████| 600/600 [00:32<00:00, 18.60batch/s]
Validating: 100%|██████████████████████████| 150/150 [00:07<00:00, 19.60batch/s]


Epoch [18/30], Training Loss: 0.0262, Validation Loss: 0.0283, Validation Accuracy: 99.17%


Training Epoch 19: 100%|███████████████████| 600/600 [00:31<00:00, 18.78batch/s]
Validating: 100%|██████████████████████████| 150/150 [00:07<00:00, 20.90batch/s]


Epoch [19/30], Training Loss: 0.0237, Validation Loss: 0.0263, Validation Accuracy: 99.22%


Training Epoch 20: 100%|███████████████████| 600/600 [00:32<00:00, 18.59batch/s]
Validating: 100%|██████████████████████████| 150/150 [00:06<00:00, 21.91batch/s]


Epoch [20/30], Training Loss: 0.0233, Validation Loss: 0.0276, Validation Accuracy: 99.12%


Training Epoch 21: 100%|███████████████████| 600/600 [00:31<00:00, 18.76batch/s]
Validating: 100%|██████████████████████████| 150/150 [00:07<00:00, 19.88batch/s]


Epoch [21/30], Training Loss: 0.0186, Validation Loss: 0.0217, Validation Accuracy: 99.34%


Training Epoch 22: 100%|███████████████████| 600/600 [00:33<00:00, 18.02batch/s]
Validating: 100%|██████████████████████████| 150/150 [00:06<00:00, 22.07batch/s]


Epoch [22/30], Training Loss: 0.0175, Validation Loss: 0.0215, Validation Accuracy: 99.34%


Training Epoch 23: 100%|███████████████████| 600/600 [00:33<00:00, 17.88batch/s]
Validating: 100%|██████████████████████████| 150/150 [00:08<00:00, 18.74batch/s]


Epoch [23/30], Training Loss: 0.0159, Validation Loss: 0.0206, Validation Accuracy: 99.42%


Training Epoch 24: 100%|███████████████████| 600/600 [00:33<00:00, 17.76batch/s]
Validating: 100%|██████████████████████████| 150/150 [00:07<00:00, 21.40batch/s]


Epoch [24/30], Training Loss: 0.0160, Validation Loss: 0.0201, Validation Accuracy: 99.39%


Training Epoch 25: 100%|███████████████████| 600/600 [00:32<00:00, 18.51batch/s]
Validating: 100%|██████████████████████████| 150/150 [00:07<00:00, 20.34batch/s]


Epoch [25/30], Training Loss: 0.0168, Validation Loss: 0.0200, Validation Accuracy: 99.45%


Training Epoch 26: 100%|███████████████████| 600/600 [00:32<00:00, 18.68batch/s]
Validating: 100%|██████████████████████████| 150/150 [00:07<00:00, 20.37batch/s]


Epoch [26/30], Training Loss: 0.0159, Validation Loss: 0.0205, Validation Accuracy: 99.43%


Training Epoch 27: 100%|███████████████████| 600/600 [00:31<00:00, 18.76batch/s]
Validating: 100%|██████████████████████████| 150/150 [00:07<00:00, 21.36batch/s]


Epoch [27/30], Training Loss: 0.0149, Validation Loss: 0.0215, Validation Accuracy: 99.37%


Training Epoch 28: 100%|███████████████████| 600/600 [00:34<00:00, 17.52batch/s]
Validating: 100%|██████████████████████████| 150/150 [00:07<00:00, 20.79batch/s]


Epoch [28/30], Training Loss: 0.0149, Validation Loss: 0.0193, Validation Accuracy: 99.38%


Training Epoch 29: 100%|███████████████████| 600/600 [00:31<00:00, 18.90batch/s]
Validating: 100%|██████████████████████████| 150/150 [00:07<00:00, 20.84batch/s]


Epoch [29/30], Training Loss: 0.0147, Validation Loss: 0.0203, Validation Accuracy: 99.36%


Training Epoch 30: 100%|███████████████████| 600/600 [00:31<00:00, 19.03batch/s]
Validating: 100%|██████████████████████████| 150/150 [00:06<00:00, 21.64batch/s]

Epoch [30/30], Training Loss: 0.0144, Validation Loss: 0.0212, Validation Accuracy: 99.42%
Training complete.





In [7]:
import torch
import torch.nn as nn
from torch.quantization import QuantStub, DeQuantStub, fuse_modules
import torch.nn.utils.prune as prune
from tqdm import tqdm

class DCNN(nn.Module):
    def __init__(self):
        super(DCNN, self).__init__()
        self.quant = QuantStub()
        self.conv1 = nn.Conv2d(3, 96, kernel_size=11, stride=4)
        self.relu1 = nn.ReLU()
        self.pool1 = nn.MaxPool2d(kernel_size=3, stride=2)
        self.bn1 = nn.BatchNorm2d(96)

        self.conv2 = nn.Conv2d(96, 256, kernel_size=5, stride=1, padding=2)
        self.relu2 = nn.ReLU()
        self.pool2 = nn.MaxPool2d(kernel_size=3, stride=2)
        self.bn2 = nn.BatchNorm2d(256)

        self.conv3 = nn.Conv2d(256, 384, kernel_size=3, stride=1, padding=1)
        self.relu3 = nn.ReLU()

        self.conv4 = nn.Conv2d(384, 384, kernel_size=3, stride=1, padding=1)
        self.relu4 = nn.ReLU()

        self.conv5 = nn.Conv2d(384, 256, kernel_size=3, stride=1, padding=1)
        self.relu5 = nn.ReLU()
        self.pool5 = nn.MaxPool2d(kernel_size=3, stride=2)

        self.fc6 = nn.Linear(256 * 6 * 6, 4096)
        self.relu6 = nn.ReLU()
        self.d6 = nn.Dropout()

        self.fc7 = nn.Linear(4096, 4096)
        self.relu7 = nn.ReLU()
        self.d7 = nn.Dropout()

        self.fc8 = nn.Linear(4096, 10)
        self.dequant = DeQuantStub()
    
    def forward(self, x):
        x = self.quant(x)
        x = self.bn1(self.pool1(self.relu1(self.conv1(x))))
        x = self.bn2(self.pool2(self.relu2(self.conv2(x))))
        x = self.relu3(self.conv3(x))
        x = self.relu4(self.conv4(x))
        x = self.pool5(self.relu5(self.conv5(x)))
        
        x = x.view(x.size(0), -1)
        
        x = self.d6(self.relu6(self.fc6(x)))
        x = self.d7(self.relu7(self.fc7(x)))
        x = self.fc8(x)
        x = self.dequant(x)
        return x  

device = "cuda" if torch.cuda.is_available() else "cpu"
model = DCNN().to(device)

# Step 1: Define quantization configuration
model.qconfig = torch.quantization.get_default_qconfig('fbgemm')

# Step 2: Fuse Conv and ReLU layers
model_fp32_fused = fuse_modules(model, [['conv1', 'relu1'],
                                          ['conv2', 'relu2'],
                                          ['conv3', 'relu3'],
                                          ['conv4', 'relu4'],
                                          ['conv5', 'relu5']])

# Step 3: Prepare the model for quantization
model_fp32_prepared = torch.quantization.prepare_qat(model_fp32_fused)

# Step 4: Calibrate the model (run a few batches through it to collect statistics)
model_fp32_prepared.eval()
with torch.no_grad():
    for inputs, _ in tqdm(train_loader):
        inputs = inputs.to(device)
        model_fp32_prepared(inputs)

# Step 5: Convert to quantized model
model_quantized = torch.quantization.convert(model_fp32_prepared)

# Step 6: Apply pruning
# Example: Prune 20% of weights from all conv layers
for name, module in model_quantized.named_modules():
    if isinstance(module, nn.Conv2d):
        prune.ln_structured(module, name='weight', amount=0.2, n=2, dim=0)

# Optionally, fine-tune the model after pruning
# Define optimizer and loss function as before
loss_fn = torch.nn.CrossEntropyLoss()
initial_lr = 0.001
optimizer = torch.optim.SGD(model_quantized.parameters(), lr=initial_lr, momentum=0.9, weight_decay=0.0005)
scaler = torch.cuda.amp.GradScaler()

num_epochs = 30

for epoch in range(num_epochs):
    model_quantized.train()
    running_loss = 0.0

    # Adjust learning rate if epoch is greater than or equal to 20
    if epoch >= 20:
        for param_group in optimizer.param_groups:
            param_group['lr'] = 0.0001

    for inputs, labels in tqdm(train_loader, desc=f"Training Epoch {epoch + 1}", unit="batch"):
        inputs, labels = inputs.to(device), labels.to(device)  
        optimizer.zero_grad()

        with torch.cuda.amp.autocast():
            outputs = model_quantized(inputs)
            loss = loss_fn(outputs, labels)

        scaler.scale(loss).backward()
        scaler.step(optimizer)
        scaler.update()

        running_loss += loss.item()   
    
    avg_loss = running_loss / len(train_loader)

    # Validation phase
    model_quantized.eval()
    running_val_loss = 0.0
    correct = 0
    total = 0

    with torch.no_grad():
        for inputs, labels in tqdm(val_loader, desc="Validating", unit="batch"):
            inputs, labels = inputs.to(device), labels.to(device)

            with torch.cuda.amp.autocast():
                outputs = model_quantized(inputs)
                loss = loss_fn(outputs, labels)
                
            running_val_loss += loss.item()

            _, predicted = torch.max(outputs, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    
    avg_val_loss = running_val_loss / len(val_loader)
    accuracy = correct / total * 100

    print(f"Epoch [{epoch + 1}/{num_epochs}], "
          f"Training Loss: {avg_loss:.4f}, "
          f"Validation Loss: {avg_val_loss:.4f}, "
          f"Validation Accuracy: {accuracy:.2f}%")

print("Training complete.")

NameError: name 'model_quantized' is not defined