In [50]:
import torch
import torch.nn as nn
import torchvision
import torchvision.transforms as transforms
from torch.utils.data import random_split
import time 

In [51]:
device=torch.device('mps')
device

device(type='mps')

The input size of **VGG-13** is **3x224x224**.We are transforming the data into 224 using resize and recrop

In [52]:
transformation=torchvision.transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])

In [53]:
dataset = torchvision.datasets.ImageFolder(root='cnn_dataset', transform=transformation)

In [54]:
train_size = int(0.8 * len(dataset))
testing_size = len(dataset) - train_size
train_dataset, testing_dataset = random_split(dataset, [train_size, testing_size])

In [55]:
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=64, shuffle=True, num_workers=4)
testing_loader=torch.utils.data.DataLoader(testing_dataset,batch_size=64,shuffle=False,num_workers=4)

In [67]:
class VGG13(nn.Module):
    def __init__(self):
        super(VGG13, self).__init__()
        self.features = nn.Sequential(
            nn.Conv2d(3, 64, kernel_size=3, stride=1,padding='same'),#224,
            nn.ReLU(inplace=True),
            nn.Conv2d(64,64,kernel_size=3,stride=1,padding='same'),#224
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),#112
            nn.Conv2d(64, 128, kernel_size=3, stride=1,padding='same'),#112
            nn.ReLU(inplace=True),
            nn.Conv2d(128,128,kernel_size=3,stride=1,padding='same'),#112
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),#56
            nn.Conv2d(128, 256, kernel_size=3,stride=1, padding='same'),#56
            nn.ReLU(inplace=True),
            nn.Conv2d(256, 256, kernel_size=3,stride=1, padding='same'),#56,
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),#28,
            nn.Conv2d(256, 512, kernel_size=3,stride=1, padding='same'),#28
            nn.ReLU(inplace=True),
            nn.Conv2d(512, 512, kernel_size=3,stride=1, padding='same'),#28,
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),#8
            nn.Conv2d(512, 512, kernel_size=3,stride=1, padding='same'),#14
            nn.ReLU(inplace=True),
            nn.Conv2d(512, 512, kernel_size=3,stride=1, padding='same'),#14,
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),#4           
            nn.Flatten(),
            nn.Dropout(p=0.8),
            nn.Linear(8192, 4096), 
            nn.ReLU(), 
            nn.Dropout(p=0.5),
            nn.Linear(4096, 4096), 
            nn.ReLU(), 
            nn.Dropout(p=0.5),
            nn.Linear(4096,1000),
            nn.ReLU(),
            nn.Linear(1000,3))
        self.softmax=nn.Softmax(dim=1)
    def forward(self,x):
        x=self.features(x)
        x=self.softmax(x)
        return x

There is no same padding in pytorch for stride>1 So,We have manually calculated and inserted the required padding at each maxpool layer.

In [68]:
model=VGG13().to(device)

In [69]:
model

VGG13(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=same)
    (1): ReLU(inplace=True)
    (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=same)
    (3): ReLU(inplace=True)
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=same)
    (6): ReLU(inplace=True)
    (7): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=same)
    (8): ReLU(inplace=True)
    (9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (10): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=same)
    (11): ReLU(inplace=True)
    (12): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=same)
    (13): ReLU(inplace=True)
    (14): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (15): Conv2d(256, 512, kernel_size=(3, 3), stride=(1, 1), padding=same)
    (16): ReLU(inplace

In [65]:
criterion = nn.CrossEntropyLoss().to(device)
optimizer = torch.optim.SGD(model.parameters(), lr=0.01, momentum=0.9)
def train_one_epoch(epoch_index):
    training_correct_ones=0
    running_loss=0.0
    for i, data in enumerate(train_loader):
        inputs, labels = data
        optimizer.zero_grad()
        #inputs= inputs.to(torch.float64)
        outputs = model(inputs.to(device))
        loss = criterion(outputs, labels.to(device))
        loss.backward()
        optimizer.step()
        running_loss += loss.item()
        maxi,predictions=torch.max(outputs,1)
        training_correct_ones+=(predictions==labels.to(device)).sum().item()
        if i % 100 == 99:    
            print(f'current epoch:{epoch_index},running_loss is {running_loss}')
            running_loss = 0.0
    return training_correct_ones

In [66]:
epochs=2
training_epochs_VGG=[]
testing_epochs_VGG=[]
for epoch in range(epochs):
    correct_ones=0
    model.train(True)
    training_correct_ones=train_one_epoch(epoch)
    model.train(False)
    #testing error is
    for j,test_data in enumerate(testing_loader):
        test_inputs,test_labels=test_data
        test_inputs,test_labels=test_inputs.to(device),test_labels.to(device)
        test_outputs=model(test_inputs)
        maxi,predictions=torch.max(test_outputs,1)
        correct_ones+=(predictions==test_labels).sum()
    training_epochs_VGG.append(training_correct_ones)
    testing_epochs_VGG.append(correct_ones.item())
    print(correct_ones)

Error: command buffer exited with error status.
	The Metal Performance Shaders operations encoded on it may not have completed.
	Error: 
	(null)
	Ignored (for causing prior/excessive GPU errors) (00000004:kIOGPUCommandBufferCallbackErrorSubmissionsIgnored)
	<AGXG13XFamilyCommandBuffer: 0x15ca7fb30>
    label = <none> 
    device = <AGXG13XDevice: 0x108388a00>
        name = Apple M1 Pro 
    commandQueue = <AGXG13XFamilyCommandQueue: 0x108389200>
        label = <none> 
        device = <AGXG13XDevice: 0x108388a00>
            name = Apple M1 Pro 
    retainedReferences = 1


RuntimeError: MPS backend out of memory (MPS allocated: 12.23 GB, other allocations: 5.82 GB, max allowed: 18.13 GB). Tried to allocate 98.00 MB on private pool. Use PYTORCH_MPS_HIGH_WATERMARK_RATIO=0.0 to disable upper limit for memory allocations (may cause system failure).

Because there are 4 512 conv layers and the first input layer after flatting the conv layer had 25,088 input parameters.This have MPS out of memory because it consumed too much allocation in the memory.