In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision
from torchvision.datasets import CIFAR10
import torchvision.transforms as transforms
import task_complexity
import matplotlib.pyplot as plt
import numpy as np

In [2]:
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
    ])

target_transform_cifar10 = transforms.Compose([
    lambda x: torch.tensor(x),
    lambda x: F.one_hot(x, num_classes = 10)
    ])

batch_size = 128

trainset_cifar10 = CIFAR10(root = '../example_data/', train = True, transform = transform, target_transform = target_transform_cifar10)
trainloader_cifar10 = torch.utils.data.DataLoader(trainset_cifar10, batch_size = batch_size, shuffle = True, num_workers = 2)

In [3]:
class CNN(nn.Module):
        def __init__(self, layers = 5):
            super().__init__()
            self.block1 = nn.Sequential(*[nn.Conv2d(3, 16, 4, 2, 1), nn.ReLU(inplace = True), nn.BatchNorm2d(16)])
            self.block2 = nn.Sequential(*[nn.Conv2d(16, 32, 4, 2, 1), nn.ReLU(inplace = True), nn.BatchNorm2d(32)])
            self.block3 = nn.Sequential(*[nn.Conv2d(32, 64, 4, 2, 1), nn.ReLU(inplace = True), nn.BatchNorm2d(64)])
            
            self.pool = nn.AdaptiveAvgPool2d((1, 1))
            self.flat = nn.Flatten()
            self.linear = nn.Linear(64, 10)

        def forward(self, x):
            x = self.block1(x)
            x = self.block2(x)
            x = self.block3(x)
            x = self.pool(x)
            x = self.flat(x)
            x = self.linear(x)
            x = nn.Softmax()(x)
            return x

In [4]:
device = torch.device("cuda")

In [5]:
net = CNN().to(device)

In [6]:
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(net.parameters())

In [7]:
for epoch in range(10):  # loop over the dataset multiple times

    running_loss = 0.0
    for i, data in enumerate(trainloader_cifar10, 0):
        # get the inputs; data is a list of [inputs, labels]
        inputs, labels = data
        inputs, labels = inputs.to(device), labels.double().to(device)

        # zero the parameter gradients
        optimizer.zero_grad()

        # forward + backward + optimize
        outputs = net(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        # print statistics
        running_loss += loss.item()
        if i % 20 == 0:    # print every 2000 mini-batches
            print(f'[{epoch + 1}, {i + 1:5d}] loss: {running_loss / 2000:.3f}')
            running_loss = 0.0

print('Finished Training')

  return self._call_impl(*args, **kwargs)


[1,     1] loss: 0.001
[1,    21] loss: 0.022
[1,    41] loss: 0.022
[1,    61] loss: 0.022
[1,    81] loss: 0.021
[1,   101] loss: 0.021
[1,   121] loss: 0.021
[1,   141] loss: 0.021
[1,   161] loss: 0.021
[1,   181] loss: 0.021
[1,   201] loss: 0.021
[1,   221] loss: 0.021
[1,   241] loss: 0.020
[1,   261] loss: 0.020
[1,   281] loss: 0.020
[1,   301] loss: 0.020
[1,   321] loss: 0.020
[1,   341] loss: 0.020
[1,   361] loss: 0.020
[1,   381] loss: 0.020
[2,     1] loss: 0.001
[2,    21] loss: 0.020
[2,    41] loss: 0.020
[2,    61] loss: 0.020
[2,    81] loss: 0.020
[2,   101] loss: 0.020
[2,   121] loss: 0.020
[2,   141] loss: 0.020
[2,   161] loss: 0.020
[2,   181] loss: 0.020
[2,   201] loss: 0.020
[2,   221] loss: 0.020
[2,   241] loss: 0.020
[2,   261] loss: 0.020
[2,   281] loss: 0.020
[2,   301] loss: 0.019
[2,   321] loss: 0.020
[2,   341] loss: 0.020
[2,   361] loss: 0.020
[2,   381] loss: 0.019
[3,     1] loss: 0.001
[3,    21] loss: 0.019
[3,    41] loss: 0.019
[3,    61] 