In [None]:
import torch
import torchvision
import torchvision.transforms as transforms
import torch.nn as nn
import torch.nn.functional as F
import os

device = torch.device('cuda')

In [None]:
# resize images
T = torchvision.transforms.Compose([transforms.ToTensor(),
                                    transforms.Normalize(mean=[.5]*3, std=[.5]*3),
                                    transforms.RandomHorizontalFlip(),
                                    transforms.RandomRotation(30)])
dataPath = os.getcwd()
dataset = torchvision.datasets.CIFAR10(dataPath,transform=T,download=True)
split = [40000,10000]
train,validation = torch.utils.data.random_split(dataset,
                                                 split,
                                                 torch.Generator().manual_seed(42))
test = torchvision.datasets.CIFAR10(dataPath,
                                    transform=T,
                                    train=False)

batchSize = 100
trainL = torch.utils.data.DataLoader(train,batch_size=batchSize,shuffle=True,pin_memory=True)
validL = torch.utils.data.DataLoader(validation,batch_size=batchSize,pin_memory=True)
testL = torch.utils.data.DataLoader(test,batch_size=batchSize,pin_memory=True)

In [None]:
# define the model architectures for each problem

# linear model for part a
class NetA(nn.Module):
   
    def __init__(self):
        super(Net, self).__init__()
        
        self.linear = nn.Linear(3*32*32,10)
        
    def forward(self, x):
        
        x = x.view(3*32*32)
        x = self.linear(x)
        return x
    
# model w/ one hidden layer of size M for part b
class NetB(nn.Module):
   
    def __init__(self):
        super(Net, self).__init__(M=256)
        
        self.hidden = nn.Linear(3*32*32,M)
        self.linear = nn.Linear(M,10)
        
    def forward(self, x):
        
        x = x.view(3*32*32)
        x = F.ReLU(self.hidden(x))
        x = self.linear(x)
        return x

# CNN defined in part c
class NetC(nn.Module):
   
    def __init__(self,M,k,N):
        super(Net, self).__init__()
        
        self.conv = nn.Conv2D(in_channels=3,
                              out_channels=M,
                              kernel_size=k,
                              stride=k)
        self.pool = F.MaxPool2d(kernel_size=N,
                                stride=N)
        self.linear = nn.Linear(M*torch.floor((33-k)/N)**2,10)
        
    def forward(self, x):
        
        x = F.ReLU(self.conv(x))
        x = self.pool(x)
        x = self.linear(x)
        return x

# network architecture defined in "Training a Classifier" tutorial
class NetTut(nn.Module):
    
    def __init__(self):
        super(Net, self).__init__()
    
        self.conv1 = nn.Conv2d(3, 6, 5)
        self.pool = nn.MaxPool2d(2, 2)
        self.conv2 = nn.Conv2d(6, 16, 5)
        self.fc1 = nn.Linear(16 * 5 * 5, 120)
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 10)

    def forward(self, x):
        
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = x.view(-1, 16 * 5 * 5)
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x

In [None]:
# train the tutorial model
net = NetTut()
net = net.to(device)

criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(net.parameters(), lr=0.001, momentum=0.9)

for epoch in range(2):  # loop over the dataset multiple times

    running_loss = 0.0
    for i, data in enumerate(trainloader, 0):
        # get the inputs; data is a list of [inputs, labels]
        inputs, labels = data[0].to(device),data[1].to(device)

        # zero the parameter gradients
        optimizer.zero_grad()

        # forward + backward + optimize
        outputs = net(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        # print statistics
        running_loss += loss.item()
        if i % 2000 == 1999:    # print every 2000 mini-batches
            print('[%d, %5d] loss: %.3f' %
                  (epoch + 1, i + 1, running_loss / 2000))
            running_loss = 0.0

print('Finished Training')

In [None]:
# save the trained model
torch.save(net.state_dict(), './cifar_net.pck')