In [2]:
pip install torchsummary

Collecting torchsummary
  Downloading torchsummary-1.5.1-py3-none-any.whl.metadata (296 bytes)
Downloading torchsummary-1.5.1-py3-none-any.whl (2.8 kB)
Installing collected packages: torchsummary
Successfully installed torchsummary-1.5.1
Note: you may need to restart the kernel to use updated packages.



[notice] A new release of pip is available: 24.0 -> 25.0.1
[notice] To update, run: python.exe -m pip install --upgrade pip


In [8]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import DataLoader
from torchvision import datasets, transforms
from torchvision.transforms import ToTensor
from torchsummary import summary

# Define a simplified AlexNet
class SimplifiedAlexNet(nn.Module):
    def __init__(self, num_classes=10, dropout_prob=0):
        super(SimplifiedAlexNet, self).__init__()
        self.features = nn.Sequential(
            nn.Conv2d(3, 64, kernel_size=3, stride=2, padding=1),  # Output: 64x16x16
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),  # Output: 64x8x8
            nn.Conv2d(64, 192, kernel_size=3, padding=1),  # Output: 192x8x8
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),  # Output: 192x4x4
            nn.Conv2d(192, 384, kernel_size=3, padding=1),  # Output: 384x4x4
            nn.ReLU(inplace=True),
            nn.Conv2d(384, 256, kernel_size=3, padding=1),  # Output: 256x4x4
            nn.ReLU(inplace=True),
            nn.Conv2d(256, 256, kernel_size=3, padding=1),  # Output: 256x4x4
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),  # Output: 256x2x2
        )
        self.classifier = nn.Sequential(
            nn.Dropout(p=dropout_prob),
            nn.Linear(256 * 2 * 2, 4096),
            nn.ReLU(inplace=True),
            nn.Dropout(p=dropout_prob),
            nn.Linear(4096, 4096),
            nn.ReLU(inplace=True),
            nn.Linear(4096, num_classes),
        )

    def forward(self, x):
        x = self.features(x)
        x = torch.flatten(x, 1)
        x = self.classifier(x)
        return x

# Load CIFAR-10 dataset
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])

train_dataset = datasets.CIFAR10(root='./data', train=True, download=True, transform=transform)
test_dataset = datasets.CIFAR10(root='./data', train=False, download=True, transform=transform)

train_loader = DataLoader(train_dataset, batch_size=128, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=128, shuffle=False)

# Initialize the model, loss function, and optimizer
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = SimplifiedAlexNet(dropout_prob=0).to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Training function
def train(model, train_loader, criterion, optimizer, device):
    model.train()
    running_loss = 0.0
    for inputs, labels in train_loader:
        inputs, labels = inputs.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()
    return running_loss / len(train_loader)

# Validation function
def validate(model, test_loader, criterion, device):
    model.eval()
    running_loss = 0.0
    correct = 0
    total = 0
    with torch.no_grad():
        for inputs, labels in test_loader:
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            running_loss += loss.item()
            _, predicted = outputs.max(1)
            total += labels.size(0)
            correct += predicted.eq(labels).sum().item()
    return running_loss / len(test_loader), correct / total

# Training loop
epochs = 20
for epoch in range(epochs):
    train_loss = train(model, train_loader, criterion, optimizer, device)
    val_loss, val_acc = validate(model, test_loader, criterion, device)
    print(f"Epoch {epoch+1}/{epochs}, Train Loss: {train_loss:.4f}, Val Loss: {val_loss:.4f}, Val Acc: {val_acc:.4f}")

# Print the number of parameters
def count_parameters(model):
    return sum(p.numel() for p in model.parameters() if p.requires_grad)

print(f"Number of parameters in the simplified AlexNet: {count_parameters(model)}")

# Compare with the original AlexNet (approx. 60 million parameters)
print("Number of parameters in the original AlexNet: ~60 million")

Files already downloaded and verified
Files already downloaded and verified
Epoch 1/20, Train Loss: 1.6742, Val Loss: 1.3472, Val Acc: 0.5031
Epoch 2/20, Train Loss: 1.2274, Val Loss: 1.1424, Val Acc: 0.5939
Epoch 3/20, Train Loss: 1.0246, Val Loss: 1.0440, Val Acc: 0.6286
Epoch 4/20, Train Loss: 0.8921, Val Loss: 0.9598, Val Acc: 0.6653
Epoch 5/20, Train Loss: 0.7655, Val Loss: 0.9447, Val Acc: 0.6872
Epoch 6/20, Train Loss: 0.6612, Val Loss: 0.9155, Val Acc: 0.6939
Epoch 7/20, Train Loss: 0.5648, Val Loss: 0.9116, Val Acc: 0.7094
Epoch 8/20, Train Loss: 0.4844, Val Loss: 0.9343, Val Acc: 0.7172
Epoch 9/20, Train Loss: 0.3977, Val Loss: 0.9880, Val Acc: 0.7052
Epoch 10/20, Train Loss: 0.3367, Val Loss: 1.0480, Val Acc: 0.7101
Epoch 11/20, Train Loss: 0.2903, Val Loss: 1.2598, Val Acc: 0.6914
Epoch 12/20, Train Loss: 0.2354, Val Loss: 1.2129, Val Acc: 0.7118
Epoch 13/20, Train Loss: 0.2043, Val Loss: 1.2040, Val Acc: 0.7075
Epoch 14/20, Train Loss: 0.1765, Val Loss: 1.3387, Val Acc: 0.

In [10]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import DataLoader
from torchvision import datasets, transforms
from torchvision.transforms import ToTensor
from torchsummary import summary

# Define a simplified AlexNet
class SimplifiedAlexNet(nn.Module):
    def __init__(self, num_classes=10, dropout_prob=0.5):
        super(SimplifiedAlexNet, self).__init__()
        self.features = nn.Sequential(
            nn.Conv2d(3, 64, kernel_size=3, stride=2, padding=1),  # Output: 64x16x16
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),  # Output: 64x8x8
            nn.Conv2d(64, 192, kernel_size=3, padding=1),  # Output: 192x8x8
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),  # Output: 192x4x4
            nn.Conv2d(192, 384, kernel_size=3, padding=1),  # Output: 384x4x4
            nn.ReLU(inplace=True),
            nn.Conv2d(384, 256, kernel_size=3, padding=1),  # Output: 256x4x4
            nn.ReLU(inplace=True),
            nn.Conv2d(256, 256, kernel_size=3, padding=1),  # Output: 256x4x4
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),  # Output: 256x2x2
        )
        self.classifier = nn.Sequential(
            nn.Dropout(p=dropout_prob),
            nn.Linear(256 * 2 * 2, 4096),
            nn.ReLU(inplace=True),
            nn.Dropout(p=dropout_prob),
            nn.Linear(4096, 4096),
            nn.ReLU(inplace=True),
            nn.Linear(4096, num_classes),
        )

    def forward(self, x):
        x = self.features(x)
        x = torch.flatten(x, 1)
        x = self.classifier(x)
        return x

# Load CIFAR-10 dataset
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])

train_dataset = datasets.CIFAR10(root='./data', train=True, download=True, transform=transform)
test_dataset = datasets.CIFAR10(root='./data', train=False, download=True, transform=transform)

train_loader = DataLoader(train_dataset, batch_size=128, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=128, shuffle=False)

# Initialize the model, loss function, and optimizer
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = SimplifiedAlexNet(dropout_prob=0.5).to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Training function
def train(model, train_loader, criterion, optimizer, device):
    model.train()
    running_loss = 0.0
    for inputs, labels in train_loader:
        inputs, labels = inputs.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()
    return running_loss / len(train_loader)

# Validation function
def validate(model, test_loader, criterion, device):
    model.eval()
    running_loss = 0.0
    correct = 0
    total = 0
    with torch.no_grad():
        for inputs, labels in test_loader:
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            running_loss += loss.item()
            _, predicted = outputs.max(1)
            total += labels.size(0)
            correct += predicted.eq(labels).sum().item()
    return running_loss / len(test_loader), correct / total

# Training loop
epochs = 20
for epoch in range(epochs):
    train_loss = train(model, train_loader, criterion, optimizer, device)
    val_loss, val_acc = validate(model, test_loader, criterion, device)
    print(f"Epoch {epoch+1}/{epochs}, Train Loss: {train_loss:.4f}, Val Loss: {val_loss:.4f}, Val Acc: {val_acc:.4f}")

# Print the number of parameters
def count_parameters(model):
    return sum(p.numel() for p in model.parameters() if p.requires_grad)

print(f"Number of parameters in the simplified AlexNet: {count_parameters(model)}")

# Compare with the original AlexNet (approx. 60 million parameters)
print("Number of parameters in the original AlexNet: ~60 million")

Files already downloaded and verified
Files already downloaded and verified
Epoch 1/20, Train Loss: 1.6734, Val Loss: 1.3377, Val Acc: 0.5076
Epoch 2/20, Train Loss: 1.2570, Val Loss: 1.1095, Val Acc: 0.6009
Epoch 3/20, Train Loss: 1.0750, Val Loss: 1.0124, Val Acc: 0.6439
Epoch 4/20, Train Loss: 0.9493, Val Loss: 0.9938, Val Acc: 0.6527
Epoch 5/20, Train Loss: 0.8212, Val Loss: 0.9345, Val Acc: 0.6828
Epoch 6/20, Train Loss: 0.7326, Val Loss: 0.9199, Val Acc: 0.6842
Epoch 7/20, Train Loss: 0.6514, Val Loss: 0.8967, Val Acc: 0.7056
Epoch 8/20, Train Loss: 0.5760, Val Loss: 0.9122, Val Acc: 0.7077
Epoch 9/20, Train Loss: 0.5092, Val Loss: 0.8924, Val Acc: 0.7227
Epoch 10/20, Train Loss: 0.4406, Val Loss: 1.0018, Val Acc: 0.7050
Epoch 11/20, Train Loss: 0.3950, Val Loss: 1.0031, Val Acc: 0.7112
Epoch 12/20, Train Loss: 0.3519, Val Loss: 1.0602, Val Acc: 0.7165
Epoch 13/20, Train Loss: 0.3135, Val Loss: 1.1135, Val Acc: 0.7197
Epoch 14/20, Train Loss: 0.2766, Val Loss: 1.1037, Val Acc: 0.

In [7]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import DataLoader
from torchvision import datasets, transforms
from torchvision.transforms import ToTensor
from torchsummary import summary

# Define a simplified AlexNet
class SimplifiedAlexNet(nn.Module):
    def __init__(self, num_classes=100, dropout_prob=0):
        super(SimplifiedAlexNet, self).__init__()
        self.features = nn.Sequential(
            nn.Conv2d(3, 64, kernel_size=3, stride=2, padding=1),  # Output: 64x16x16
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),  # Output: 64x8x8
            nn.Conv2d(64, 192, kernel_size=3, padding=1),  # Output: 192x8x8
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),  # Output: 192x4x4
            nn.Conv2d(192, 384, kernel_size=3, padding=1),  # Output: 384x4x4
            nn.ReLU(inplace=True),
            nn.Conv2d(384, 256, kernel_size=3, padding=1),  # Output: 256x4x4
            nn.ReLU(inplace=True),
            nn.Conv2d(256, 256, kernel_size=3, padding=1),  # Output: 256x4x4
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),  # Output: 256x2x2
        )
        self.classifier = nn.Sequential(
            nn.Dropout(p=dropout_prob),
            nn.Linear(256 * 2 * 2, 4096),
            nn.ReLU(inplace=True),
            nn.Dropout(p=dropout_prob),
            nn.Linear(4096, 4096),
            nn.ReLU(inplace=True),
            nn.Linear(4096, num_classes),
        )

    def forward(self, x):
        x = self.features(x)
        x = torch.flatten(x, 1)
        x = self.classifier(x)
        return x

# Load CIFAR-10 dataset
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])

train_dataset = datasets.CIFAR100(root='./data', train=True, download=True, transform=transform)
test_dataset = datasets.CIFAR100(root='./data', train=False, download=True, transform=transform)

train_loader = DataLoader(train_dataset, batch_size=128, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=128, shuffle=False)

# Initialize the model, loss function, and optimizer
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = SimplifiedAlexNet(num_classes=100, dropout_prob=0).to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Training function
def train(model, train_loader, criterion, optimizer, device):
    model.train()
    running_loss = 0.0
    for inputs, labels in train_loader:
        inputs, labels = inputs.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()
    return running_loss / len(train_loader)

# Validation function
def validate(model, test_loader, criterion, device):
    model.eval()
    running_loss = 0.0
    correct = 0
    total = 0
    with torch.no_grad():
        for inputs, labels in test_loader:
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            running_loss += loss.item()
            _, predicted = outputs.max(1)
            total += labels.size(0)
            correct += predicted.eq(labels).sum().item()
    return running_loss / len(test_loader), correct / total

# Training loop
epochs = 20
for epoch in range(epochs):
    train_loss = train(model, train_loader, criterion, optimizer, device)
    val_loss, val_acc = validate(model, test_loader, criterion, device)
    print(f"Epoch {epoch+1}/{epochs}, Train Loss: {train_loss:.4f}, Val Loss: {val_loss:.4f}, Val Acc: {val_acc:.4f}")

# Print the number of parameters
def count_parameters(model):
    return sum(p.numel() for p in model.parameters() if p.requires_grad)

print(f"Number of parameters in the simplified AlexNet: {count_parameters(model)}")

# Compare with the original AlexNet (approx. 60 million parameters)
print("Number of parameters in the original AlexNet: ~60 million")

Downloading https://www.cs.toronto.edu/~kriz/cifar-100-python.tar.gz to ./data\cifar-100-python.tar.gz


100%|██████████████████████████████████████████████████████████████| 169001437/169001437 [00:02<00:00, 75369816.68it/s]


Extracting ./data\cifar-100-python.tar.gz to ./data
Files already downloaded and verified
Epoch 1/20, Train Loss: 4.1415, Val Loss: 3.8418, Val Acc: 0.0993
Epoch 2/20, Train Loss: 3.6573, Val Loss: 3.4834, Val Acc: 0.1563
Epoch 3/20, Train Loss: 3.2712, Val Loss: 3.1389, Val Acc: 0.2194
Epoch 4/20, Train Loss: 2.9324, Val Loss: 2.8924, Val Acc: 0.2751
Epoch 5/20, Train Loss: 2.6572, Val Loss: 2.7564, Val Acc: 0.3025
Epoch 6/20, Train Loss: 2.4252, Val Loss: 2.6240, Val Acc: 0.3273
Epoch 7/20, Train Loss: 2.2095, Val Loss: 2.5485, Val Acc: 0.3475
Epoch 8/20, Train Loss: 1.9996, Val Loss: 2.5082, Val Acc: 0.3664
Epoch 9/20, Train Loss: 1.8043, Val Loss: 2.5307, Val Acc: 0.3724
Epoch 10/20, Train Loss: 1.6019, Val Loss: 2.6603, Val Acc: 0.3672
Epoch 11/20, Train Loss: 1.4115, Val Loss: 2.7537, Val Acc: 0.3745
Epoch 12/20, Train Loss: 1.2173, Val Loss: 2.9618, Val Acc: 0.3746
Epoch 13/20, Train Loss: 1.0312, Val Loss: 3.2201, Val Acc: 0.3682
Epoch 14/20, Train Loss: 0.8687, Val Loss: 3.378

In [9]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import DataLoader
from torchvision import datasets, transforms
from torchvision.transforms import ToTensor
from torchsummary import summary

# Define a simplified AlexNet
class SimplifiedAlexNet(nn.Module):
    def __init__(self, num_classes=100, dropout_prob=0.5):
        super(SimplifiedAlexNet, self).__init__()
        self.features = nn.Sequential(
            nn.Conv2d(3, 64, kernel_size=3, stride=2, padding=1),  # Output: 64x16x16
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),  # Output: 64x8x8
            nn.Conv2d(64, 192, kernel_size=3, padding=1),  # Output: 192x8x8
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),  # Output: 192x4x4
            nn.Conv2d(192, 384, kernel_size=3, padding=1),  # Output: 384x4x4
            nn.ReLU(inplace=True),
            nn.Conv2d(384, 256, kernel_size=3, padding=1),  # Output: 256x4x4
            nn.ReLU(inplace=True),
            nn.Conv2d(256, 256, kernel_size=3, padding=1),  # Output: 256x4x4
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),  # Output: 256x2x2
        )
        self.classifier = nn.Sequential(
            nn.Dropout(p=dropout_prob),
            nn.Linear(256 * 2 * 2, 4096),
            nn.ReLU(inplace=True),
            nn.Dropout(p=dropout_prob),
            nn.Linear(4096, 4096),
            nn.ReLU(inplace=True),
            nn.Linear(4096, num_classes),
        )

    def forward(self, x):
        x = self.features(x)
        x = torch.flatten(x, 1)
        x = self.classifier(x)
        return x

# Load CIFAR-10 dataset
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])

train_dataset = datasets.CIFAR100(root='./data', train=True, download=True, transform=transform)
test_dataset = datasets.CIFAR100(root='./data', train=False, download=True, transform=transform)

train_loader = DataLoader(train_dataset, batch_size=128, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=128, shuffle=False)

# Initialize the model, loss function, and optimizer
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = SimplifiedAlexNet(num_classes=100, dropout_prob=0.5).to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Training function
def train(model, train_loader, criterion, optimizer, device):
    model.train()
    running_loss = 0.0
    for inputs, labels in train_loader:
        inputs, labels = inputs.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()
    return running_loss / len(train_loader)

# Validation function
def validate(model, test_loader, criterion, device):
    model.eval()
    running_loss = 0.0
    correct = 0
    total = 0
    with torch.no_grad():
        for inputs, labels in test_loader:
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            running_loss += loss.item()
            _, predicted = outputs.max(1)
            total += labels.size(0)
            correct += predicted.eq(labels).sum().item()
    return running_loss / len(test_loader), correct / total

# Training loop
epochs = 20
for epoch in range(epochs):
    train_loss = train(model, train_loader, criterion, optimizer, device)
    val_loss, val_acc = validate(model, test_loader, criterion, device)
    print(f"Epoch {epoch+1}/{epochs}, Train Loss: {train_loss:.4f}, Val Loss: {val_loss:.4f}, Val Acc: {val_acc:.4f}")

# Print the number of parameters
def count_parameters(model):
    return sum(p.numel() for p in model.parameters() if p.requires_grad)

print(f"Number of parameters in the simplified AlexNet: {count_parameters(model)}")

# Compare with the original AlexNet (approx. 60 million parameters)
print("Number of parameters in the original AlexNet: ~60 million")

Files already downloaded and verified
Files already downloaded and verified
Epoch 1/20, Train Loss: 4.3477, Val Loss: 3.9295, Val Acc: 0.0703
Epoch 2/20, Train Loss: 3.7956, Val Loss: 3.6155, Val Acc: 0.1182
Epoch 3/20, Train Loss: 3.4723, Val Loss: 3.3484, Val Acc: 0.1760
Epoch 4/20, Train Loss: 3.2075, Val Loss: 3.1222, Val Acc: 0.2216
Epoch 5/20, Train Loss: 2.9997, Val Loss: 2.9972, Val Acc: 0.2516
Epoch 6/20, Train Loss: 2.8263, Val Loss: 2.8310, Val Acc: 0.2804
Epoch 7/20, Train Loss: 2.6730, Val Loss: 2.7366, Val Acc: 0.3046
Epoch 8/20, Train Loss: 2.5426, Val Loss: 2.7374, Val Acc: 0.3136
Epoch 9/20, Train Loss: 2.4219, Val Loss: 2.6665, Val Acc: 0.3186
Epoch 10/20, Train Loss: 2.3098, Val Loss: 2.6831, Val Acc: 0.3268
Epoch 11/20, Train Loss: 2.2080, Val Loss: 2.6399, Val Acc: 0.3389
Epoch 12/20, Train Loss: 2.1160, Val Loss: 2.6638, Val Acc: 0.3381
Epoch 13/20, Train Loss: 2.0319, Val Loss: 2.6212, Val Acc: 0.3533
Epoch 14/20, Train Loss: 1.9508, Val Loss: 2.6574, Val Acc: 0.