In [22]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
import torch.nn.functional as F

In [27]:
# Hyperparameters
batch_size = 64
learning_rate = 0.001
num_epochs = 5

# Transformations
transform_train = transforms.Compose([
    transforms.RandomHorizontalFlip(),
    transforms.RandomCrop(32, padding=4),
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])

transform_test = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])

# Load MNIST dataset
# train_dataset = torchvision.datasets.MNIST(root='./data', train=True, transform=transform)
# test_dataset = torchvision.datasets.MNIST(root='./data', train=False, transform=transform)

train_dataset = torchvision.datasets.CIFAR10(root='./data', train=True, transform=transform_train, download=True)
test_dataset = torchvision.datasets.CIFAR10(root='./data', train=False, transform=transform_test, download=True)

train_loader = torch.utils.data.DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True)
test_loader = torch.utils.data.DataLoader(dataset=test_dataset, batch_size=batch_size, shuffle=False)

Files already downloaded and verified
Files already downloaded and verified


In [23]:
# Define CNN Model
class CNN(nn.Module):
    def __init__(self):
        super(CNN, self).__init__()
        self.conv1 = nn.Conv2d(3, 32, kernel_size=3, stride=1, padding=1)
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1)
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2, padding=0)
        self.fc1 = nn.Linear(64 * 8 * 8, 1000)
        self.fc2 = nn.Linear(1000, 10)
        self.relu = nn.ReLU()

    def forward(self, x):
        x = self.pool(self.relu(self.conv1(x)))
        x = self.pool(self.relu(self.conv2(x)))
        x = x.view(-1, 64 * 8 * 8)
        x = self.relu(self.fc1(x))
        x = self.fc2(x)
        return x

# Initialize model, loss function, and optimizer
model = CNN()
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

# Training loop
for epoch in range(num_epochs):
    for i, (images, labels) in enumerate(train_loader):
        # Forward pass
        outputs = model(images)
        loss = criterion(outputs, labels)

        # Backward pass and optimization
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        if (i+1) % 100 == 0:
            print(f'Epoch [{epoch+1}/{num_epochs}], Step [{i+1}/{len(train_loader)}], Loss: {loss.item():.4f}')

# Save the model
# torch.save(model.state_dict(), 'base_cnn.pth')

# Evaluate the model
model.eval()
with torch.no_grad():
    correct = 0
    total = 0
    for images, labels in test_loader:
        outputs = model(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

    print(f'Accuracy of the model on the 10000 test images: {100 * correct / total} %')

Files already downloaded and verified
Files already downloaded and verified
Epoch [1/5], Step [100/782], Loss: 1.7628
Epoch [1/5], Step [200/782], Loss: 1.4399
Epoch [1/5], Step [300/782], Loss: 1.4755
Epoch [1/5], Step [400/782], Loss: 1.3536
Epoch [1/5], Step [500/782], Loss: 1.6167
Epoch [1/5], Step [600/782], Loss: 1.3590
Epoch [1/5], Step [700/782], Loss: 1.3426
Epoch [2/5], Step [100/782], Loss: 1.1341
Epoch [2/5], Step [200/782], Loss: 1.2550
Epoch [2/5], Step [300/782], Loss: 1.1822
Epoch [2/5], Step [400/782], Loss: 0.8934
Epoch [2/5], Step [500/782], Loss: 1.3029
Epoch [2/5], Step [600/782], Loss: 1.0129
Epoch [2/5], Step [700/782], Loss: 1.0653
Epoch [3/5], Step [100/782], Loss: 1.3633
Epoch [3/5], Step [200/782], Loss: 1.2100
Epoch [3/5], Step [300/782], Loss: 0.8252
Epoch [3/5], Step [400/782], Loss: 1.0216
Epoch [3/5], Step [500/782], Loss: 0.7675
Epoch [3/5], Step [600/782], Loss: 1.0715
Epoch [3/5], Step [700/782], Loss: 0.6869
Epoch [4/5], Step [100/782], Loss: 0.9244


In [24]:
class ImprovedCNN(nn.Module):
    def __init__(self):
        super(ImprovedCNN, self).__init__()
        self.conv1 = nn.Conv2d(3, 32, kernel_size=3, stride=1, padding=1)
        self.bn1 = nn.BatchNorm2d(32)
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1)
        self.bn2 = nn.BatchNorm2d(64)
        self.conv3 = nn.Conv2d(64, 128, kernel_size=3, stride=1, padding=1)
        self.bn3 = nn.BatchNorm2d(128)
        self.conv4 = nn.Conv2d(128, 256, kernel_size=3, stride=1, padding=1)
        self.bn4 = nn.BatchNorm2d(256)
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2, padding=0)
        self.dropout = nn.Dropout(p=0.5)
        self.fc1 = nn.Linear(256 * 2 * 2, 256)
        self.fc2 = nn.Linear(256, 10)
        
    def forward(self, x):
        x = self.pool(F.relu(self.bn1(self.conv1(x))))
        x = self.pool(F.relu(self.bn2(self.conv2(x))))
        x = self.pool(F.relu(self.bn3(self.conv3(x))))
        x = self.pool(F.relu(self.bn4(self.conv4(x))))
        x = x.view(-1, 256 * 2 * 2)
        x = F.relu(self.fc1(x))
        x = self.dropout(x)
        x = self.fc2(x)
        return x

# Initialize model, loss function, and optimizer
model = ImprovedCNN()
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=20, gamma=0.1)

# Training loop
for epoch in range(num_epochs):
    for i, (images, labels) in enumerate(train_loader):
        # Forward pass
        outputs = model(images)
        loss = criterion(outputs, labels)

        # Backward pass and optimization
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        if (i+1) % 100 == 0:
            print(f'Epoch [{epoch+1}/{num_epochs}], Step [{i+1}/{len(train_loader)}], Loss: {loss.item():.4f}')

# Save the model
# torch.save(model.state_dict(), 'improved_cnn.pth')

# Evaluate the model
model.eval()
with torch.no_grad():
    correct = 0
    total = 0
    for images, labels in test_loader:
        outputs = model(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

    print(f'Accuracy of the model on the 10000 test images: {100 * correct / total} %')

Epoch [1/5], Step [100/782], Loss: 1.6767
Epoch [1/5], Step [200/782], Loss: 1.3820
Epoch [1/5], Step [300/782], Loss: 1.5726
Epoch [1/5], Step [400/782], Loss: 1.3531
Epoch [1/5], Step [500/782], Loss: 1.2981
Epoch [1/5], Step [600/782], Loss: 1.5202
Epoch [1/5], Step [700/782], Loss: 1.4114
Epoch [2/5], Step [100/782], Loss: 1.2239
Epoch [2/5], Step [200/782], Loss: 1.1385
Epoch [2/5], Step [300/782], Loss: 1.3204
Epoch [2/5], Step [400/782], Loss: 1.3601
Epoch [2/5], Step [500/782], Loss: 1.1384
Epoch [2/5], Step [600/782], Loss: 1.0992
Epoch [2/5], Step [700/782], Loss: 1.0440
Epoch [3/5], Step [100/782], Loss: 1.3043
Epoch [3/5], Step [200/782], Loss: 1.0034
Epoch [3/5], Step [300/782], Loss: 0.8537
Epoch [3/5], Step [400/782], Loss: 1.1092
Epoch [3/5], Step [500/782], Loss: 0.8045
Epoch [3/5], Step [600/782], Loss: 0.8867
Epoch [3/5], Step [700/782], Loss: 0.9102
Epoch [4/5], Step [100/782], Loss: 1.0762
Epoch [4/5], Step [200/782], Loss: 0.8118
Epoch [4/5], Step [300/782], Loss:

In [29]:
class VisionTransformer(nn.Module):
    def __init__(self, img_size=32, patch_size=4, num_classes=10, dim=256, depth=6, heads=8, mlp_dim=512, dropout=0.1, emb_dropout=0.1):
        super(VisionTransformer, self).__init__()
        
        assert img_size % patch_size == 0, 'Image dimensions must be divisible by the patch size.'
        num_patches = (img_size // patch_size) ** 2
        patch_dim = 3 * patch_size * patch_size
        
        self.patch_size = patch_size
        self.dim = dim
        
        self.to_patch_embedding = nn.Conv2d(3, dim, kernel_size=patch_size, stride=patch_size)
        
        self.pos_embedding = nn.Parameter(torch.randn(1, num_patches + 1, dim))
        self.cls_token = nn.Parameter(torch.randn(1, 1, dim))
        self.dropout = nn.Dropout(emb_dropout)
        
        encoder_layer = nn.TransformerEncoderLayer(d_model=dim, nhead=heads, dim_feedforward=mlp_dim, dropout=dropout)
        self.transformer = nn.TransformerEncoder(encoder_layer, num_layers=depth)
        
        self.to_cls_token = nn.Identity()
        self.mlp_head = nn.Sequential(
            nn.LayerNorm(dim),
            nn.Linear(dim, num_classes)
        )
        
    def forward(self, x):
        x = self.to_patch_embedding(x)
        b, c, h, w = x.shape
        x = x.view(b, c, -1).permute(0, 2, 1)  # Reshape and permute to (batch_size, num_patches, dim)
        
        cls_tokens = self.cls_token.expand(b, -1, -1)
        x = torch.cat((cls_tokens, x), dim=1)
        x += self.pos_embedding[:, :(x.size(1))]
        x = self.dropout(x)
        
        x = self.transformer(x)
        
        x = self.to_cls_token(x[:, 0])
        x = self.mlp_head(x)
        
        return x

# Initialize model, loss function, and optimizer
model = VisionTransformer()
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

# Training loop
for epoch in range(num_epochs):
    for i, (images, labels) in enumerate(train_loader):
        # Forward pass
        outputs = model(images)
        loss = criterion(outputs, labels)

        # Backward pass and optimization
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        if (i+1) % 100 == 0:
            print(f'Epoch [{epoch+1}/{num_epochs}], Step [{i+1}/{len(train_loader)}], Loss: {loss.item():.4f}')

# Save the model
# torch.save(model.state_dict(), 'vision_transformer.pth')

# Evaluate the model
model.eval()
with torch.no_grad():
    correct = 0
    total = 0
    for images, labels in test_loader:
        outputs = model(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

    print(f'Accuracy of the model on the 10000 test images: {100 * correct / total} %')

Epoch [1/5], Step [100/782], Loss: 2.3125
Epoch [1/5], Step [200/782], Loss: 2.3031
Epoch [1/5], Step [300/782], Loss: 2.3581
Epoch [1/5], Step [400/782], Loss: 2.3232
Epoch [1/5], Step [500/782], Loss: 2.3334
Epoch [1/5], Step [600/782], Loss: 2.3165
Epoch [1/5], Step [700/782], Loss: 2.3309
Epoch [2/5], Step [100/782], Loss: 2.3216
Epoch [2/5], Step [200/782], Loss: 2.2952
Epoch [2/5], Step [300/782], Loss: 2.3121
Epoch [2/5], Step [400/782], Loss: 2.3234
Epoch [2/5], Step [500/782], Loss: 2.3115
Epoch [2/5], Step [600/782], Loss: 2.2975
Epoch [2/5], Step [700/782], Loss: 2.3009
Epoch [3/5], Step [100/782], Loss: 2.2978
Epoch [3/5], Step [200/782], Loss: 2.3083
Epoch [3/5], Step [300/782], Loss: 2.3032
Epoch [3/5], Step [400/782], Loss: 2.3010
Epoch [3/5], Step [500/782], Loss: 2.2942
Epoch [3/5], Step [600/782], Loss: 2.2980
Epoch [3/5], Step [700/782], Loss: 2.3180
Epoch [4/5], Step [100/782], Loss: 2.3092
Epoch [4/5], Step [200/782], Loss: 2.3008
Epoch [4/5], Step [300/782], Loss: