

```
# This is formatted as code
```

# Part 1 - Linear Classifier



In [None]:
from torch import nn

class Classifier(nn.Module):
    def __init__(self,
                 ninputs=3*32*32, # Size of input (channels * size of image)
                 nhidden=512, # Number of hidden layers
                 nout=10, # Number of classes
                ):
        super().__init__()
        self.net = nn.Sequential(
            nn.Linear(ninputs, nhidden),
            nn.ReLU(),
            nn.Linear(nhidden, nhidden),
            nn.ReLU(),
            nn.Linear(nhidden, nout),
            nn.ReLU(),
        )
        
    def forward(self, x):
        b, c, h, w = x.shape
        x = x.view(b, -1) # Flatten image -- 3 * 32 * 32
        x = self.net(x)
        return x

: 

In [None]:
class resnet(nn.Module):
    def __init__(self):
        super().__init__()
        self.l1 = nn.Sequntial(nn.Linear(10,10), nn.ReLU())
        self.l2 = nn.Sequntial(nn.Linear(10, 10), nn.ReLU())
        
    def forward(self, x):
        skip = x
        x = self.l1(x)
        x = skip + x
        x = self.l2(x)
        return x

: 

: 

# Setup the training program
Now we need to get the meat of the subject and get the parts we need to train the network.

In [None]:
import torch
from torchvision import datasets
from torch import nn
from torch import optim
from tqdm import tqdm, notebook # This is optional but useful
from torch.utils.data import DataLoader

# Let's get the right torch device (preference of GPU)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# Let's set up some parameters
learning_rate=5e-1
nepochs = 10
ninputs=3*32*32
nhidden=1024
nout=10

model = Classifier(ninputs=ninputs,
                  nhidden=nhidden,
                  nout=nout).to(device)
print(model)
# We need an optimizer that tells us what form of gradient descent to do
optimizer = optim.SGD(model.parameters(), lr=learning_rate)

# We also need a loss function
LossFunction = nn.CrossEntropyLoss()

# We're going to reload the data here so we have added clarity

batch_size = 2048

cifar10_dataset = datasets.CIFAR10(root='./', # here
                               train=True, # train split
                               download=True, # we want to get the data
                               transform=T.ToTensor(), # put it into tensor format
                              )

train_data = DataLoader(cifar10_dataset,
                            batch_size=batch_size,
                            )

# This is default on but let's just be pedantic
model.train()
loss_history = []
loss = torch.Tensor([0])
for epoch in notebook.trange(nepochs,
                  desc=f"Epoch",
                  unit="epoch",
                  disable=False):
    for (data, label) in tqdm(train_data,
                              desc="iteration",
                              unit="%",
                              disable=True):
        optimizer.zero_grad(set_to_none=True) # Here we clear the gradients
        
        # We need to make sure the tensors are on the same device as our model
        data = data.to(device)
        label = label.to(device)
        out = model(data)
        
        loss = LossFunction(out, label)
        
        # PyTorch is Magic!
        loss.backward() # This function calculates all our gradients
        optimizer.step() # This function does our gradient descent with those gradients
        loss_history.append(loss.item())
    print(f"Epoch {epoch}: loss: {loss.item()}")


: 

In [None]:
# Note that we are not plotting loss per epoch but per iteration
plt.plot(loss_history)
plt.title("Neural Network Loss")
plt.xlabel("Number of iterations")
plt.ylabel("Loss")
plt.show()

: 

# Testing
Now lets' see how well our network learned



In [None]:
# We need to do the test data now
mnist_test_dataset = datasets.CIFAR10(root='./', # here
                               train=False, # train split
                               download=True, # we want to get the data
                               transform=T.ToTensor(), # put it into tensor format
                              )
test_data = DataLoader(mnist_test_dataset,
                       batch_size=batch_size,
                        )

model.eval()
accuracy = 0
for (data, label) in test_data:
    data = data.to(device)
    label = label.to(device)
    out = model(data)
    answers = out.max(dim=1)[1]
    accuracy += (answers == label).sum()
print(f"Total accuracy = {accuracy / len(mnist_test_dataset)*100:.2f}%")

: 

# Summary
You should play around with the above network and test different hyper parameters. Does the accuracy change? 

Try to answer the following questions:
- How does Learning Rate change the accuracy?
- How does batch size change things?
- How many hidden layers do we need?
- What happens if we use a different loss function?
- What about a different optimizer?
- Do we need to train longer?

# Getting more advanced
Let's get a bit more advanced with our network. This time we will design our network to take a variable number of hidden layers. We'll be able to add more depth to the network and define how many hidden layers are in that layer.

In [None]:
class LinearNet(nn.Module):
    def __init__(self,
                 ninputs=3*32*32,
                 hidden=[512],
                 nout=10,
                ):
        super().__init__()
        net = []
        # Input layer
        net.append(nn.Linear(ninputs, hidden[0]))
        net.append(nn.ReLU())
        
        # Hidden layers
        """if len(hidden) > 1:
            for i in range(len(hidden)):
                if i == 0:
                    net.append(nn.Linear(hidden[i], hidden[i+1]))
                else:
                    net.append(nn.Linear(hidden[i-1], hidden[i]))
                net.append(nn.ReLU())"""
        if len(hidden) > 1:
          for i in range(len(hidden) - 1):
            net.append(nn.Linear(hidden[i], hidden[i+1]))
            net.append(nn.ReLU())

        # Output Layer
        net.append(nn.Linear(hidden[-1], nout))
        net.append(nn.ReLU())
        self.net = nn.Sequential(*net)
        
    def forward(self, x):
        b, c, h, w = x.shape
        x = x.view(b, -1) # Flatten image
        x = self.net(x)
        return x

: 

In [None]:
# Let's set up some parameters
learning_rate=5e-1
nepochs = 10
ninputs=3*32*32
hidden=[5000, 10000]
nout=10

model = LinearNet(ninputs=ninputs,
                  hidden=hidden,
                  nout=nout).to(device)
print(model)
# We need an optimizer that tells us what form of gradient descent to do
optimizer = optim.SGD(model.parameters(), lr=learning_rate)

# We also need a loss function
LossFunction = nn.CrossEntropyLoss()

# We're going to reload the data here so we have added clarity

batch_size = 2048

cifar10_dataset = datasets.CIFAR10(root='./', # here
                               train=True, # train split
                               download=True, # we want to get the data
                               transform=T.ToTensor(), # put it into tensor format
                              )

train_data = DataLoader(cifar10_dataset,
                        batch_size=batch_size,
                        )

# This is default on but let's just be pedantic
model.train()
loss_history = []
for epoch in notebook.trange(nepochs,
                  desc="Epoch",
                  unit="epoch",
                  disable=False):
    for (data, label) in tqdm(train_data,
                              desc="iteration",
                              unit="%",
                              disable=True):
        optimizer.zero_grad(set_to_none=True)
        data = data.to(device)
        label = label.to(device)
        out = model(data)
        loss = LossFunction(out, label)
        loss.backward()
        optimizer.step()
        loss_history.append(loss.item())
    print(f"Epoch {epoch}: loss: {loss.item()}")
    
# Note that we are not plotting loss per epoch but per iteration
plt.plot(loss_history)
plt.title("Neural Network Loss")
plt.xlabel("Number of iterations")
plt.ylabel("Loss")
plt.show()

: 

In [None]:
model.eval()
accuracy = 0
for (data, label) in test_data:
    data = data.to(device)
    label = label.to(device)
    out = model(data)
    answers = out.max(dim=1)[1]
    accuracy += (answers == label).sum()
print(f"Total accuracy = {accuracy / len(mnist_test_dataset)*100:.2f}%")

: 

# Getting More Out of PyTorch

In [None]:
# Let's see what datasets are offered (https://pytorch.org/vision/stable/datasets.html)
print(datasets.__dir__())

: 

In [None]:
# What about optimizers?
print(optim.__dir__())

: 

: 