In [21]:
import torch
import numpy as np

In [2]:
torch.tensor([[1, 2], [3, 4]])

tensor([[1, 2],
        [3, 4]])

In [7]:
torch.rand(2, 2), torch.rand((2, 2))

(tensor([[0.6882, 0.6830],
         [0.7553, 0.3041]]),
 tensor([[0.8068, 0.1210],
         [0.8462, 0.7624]]))

In [6]:
torch.rand((3, 5)).shape, torch.rand(3, 5).shape

(torch.Size([3, 5]), torch.Size([3, 5]))

In [10]:
a, b = torch.rand(2, 2), torch.rand(2, 2)
torch.matmul(a, b)

# see np.dot

tensor([[0.0593, 0.2055],
        [0.1028, 0.2299]])

In [12]:
a * b  # element wise see np.multiply

tensor([[0.0543, 0.6722],
        [0.0013, 0.0536]])

In [13]:
torch.zeros(2, 2)

tensor([[0., 0.],
        [0., 0.]])

In [14]:
torch.ones(2, 2)

tensor([[1., 1.],
        [1., 1.]])

In [18]:
a = torch.eye(2)
a

tensor([[1., 0.],
        [0., 1.]])

In [24]:
converted = torch.from_numpy(np.array(range(3)))
converted

tensor([0, 1, 2])

In [25]:
converted.numpy()

array([0, 1, 2])

matmul matrix mult

'*' element-wise mult

eye identity tensor

zeros 

ones

rand

tensor

## Forward propagation

Computational graph

In [26]:
a = torch.Tensor([2])
b = torch.Tensor([-4])
c = torch.Tensor([-2])
d = torch.Tensor([2])

In [27]:
e = a + b
f = c * d

In [28]:
g = e * f

In [29]:
print(e, f, g)

tensor([-2.]) tensor([-4.]) tensor([8.])


## Backprop

use term gradient in place of 'derivative' with many variables

In [30]:
x = torch.tensor(-3., requires_grad=True)
y = torch.tensor(5., requires_grad=True)
z = torch.tensor(-2., requires_grad=True)

q = x + y
f = q * z  

f.backward()

print(z.grad, y.grad, x.grad)

tensor(2.) tensor(-2.) tensor(-2.)


## Fully Connected Neural Networks

In [34]:
input_layer = torch.rand(10)

w1 = torch.rand(10, 20)
w2 = torch.rand(20, 20)
w3 = torch.rand(20, 4)
h1 = torch.matmul(input_layer, w1)
h2 = torch.matmul(h1, w2)
out = torch.matmul(h2, w3)
out

tensor([281.6978, 262.7382, 322.6375, 237.2466])

In [35]:
# OO network

import torch.nn as nn

class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.fc1 = nn.Linear(10, 20)
        self.fc2 = nn.Linear(20, 20)
        self.output = nn.Linear(20, 4)
        
    def forward(self, x):
        for f in (self.fc1, self.fc2, self.output):
            x = f(x)
        return x

In [36]:
input_layer = torch.rand(10)
net = Net()
res = net(input_layer)
res

tensor([ 0.2473, -0.1382, -0.0740,  0.0430], grad_fn=<AddBackward0>)

In [39]:
# matmul is linear transformation

in_layer = torch.tensor([2., 1.])
w1 = torch.tensor([[.45, .32], [-.12, .29]])
w2 = torch.tensor([[.48, -.12], [.64, .91]])
w = torch.matmul(w1, w2)  # simplify
out = torch.matmul(in_layer, w)
out, w

(tensor([0.9696, 0.7527]),
 tensor([[0.4208, 0.2372],
         [0.1280, 0.2783]]))

## non-linear activation functions

- sigmoid
- tanh
- ReLU max(0, x)
- Leaky ReLU max(0.1x, x)
- Maxout
- ELU

### ReLU

In [40]:
relu = nn.ReLU()

In [42]:
t1 = torch.tensor([2., -4.])
relu(t1)

tensor([2., 0.])

In [43]:
t2 = torch.tensor([[2, -4], [1.2, 0]])
relu(t2)

tensor([[2.0000, 0.0000],
        [1.2000, 0.0000]])

Accuracy is not differntiable, but softmax cross-entropy loss is

P(Y=k | X=X_i) = (e^s, k) / (Sum-j e^s, j)

softmax transforms numbers into probabilities...

Loss = -ln(0.13) where 0.13 is the softmax

In [45]:
logits = torch.tensor([[3.2, 5.1, -1.7]])
ground_truth = torch.tensor([0])
criterion = nn.CrossEntropyLoss()
loss = criterion(logits, ground_truth)
loss

tensor(2.0404)

Cross-entropy loss, measures perf of a classification model whose output is a probability value between 0 and 1. Value increases as predicted probability diverges from the actual label.
```
def ce(y_hat, y):
    return -log(y_hat) if y == 1 else -log(1 - y_hat)
```

##  Datasets and Dataloaders

In [47]:
import torch
import torchvision
import torch.utils.data
import torchvision.transforms as transforms

In [66]:
transform = transforms.Compose([
    transforms.ToTensor(),
    # mean, std for each channel
    transforms.Normalize((.4914, .48216, .44653), (.24703, .2435, .2616))
])

In [50]:
train = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=transform)
test = torchvision.datasets.CIFAR10(root='./data', train=False, download=True, transform=transform)

Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to ./data/cifar-10-python.tar.gz


100.0%


Extracting ./data/cifar-10-python.tar.gz to ./data
Files already downloaded and verified


In [53]:
from torch.utils.data import DataLoader
trainloader = DataLoader(train, batch_size=32, shuffle=True, num_workers=12)

In [54]:
testloader = DataLoader(test, batch_size=32, shuffle=False, num_workers=12)

In [62]:
testloader.dataset.data.shape

(10000, 32, 32, 3)

In [65]:
testloader.batch_size

32

In [70]:
import torch.nn.functional as F
import torch.optim as optim

class Net2(nn.Module):
    def __init__(self):
        super(Net2, self).__init__()
        self.fc1 = nn.Linear(32*32*3, 500)
        self.fc2 = nn.Linear(500, 10)
        
    def forward(self, x):
        x = F.relu(self.fc1(x))
        return self.fc2(x)


In [72]:
net = Net2()
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(net.parameters(), lr=3e-4)

for epoch in range(1):
    for data in trainloader:
        # data contains a batch of items
        inputs, labels = data
        inputs = inputs.view(-1, 32 * 32 * 3)  # convert to vectors
        
        # zero out parameter gradients from prev batch
        optimizer.zero_grad()
        
        # forward, backward, optimize
        outputs = net(inputs)
        loss = criterion(outputs, labels)
        loss.backward()  # stores gradients on tensor objects as tensor.grad
        optimizer.step()
        

In [73]:
correct, total = 0, 0
predictions = []
net.eval()  
# switch to have network run in inference mode 
# - normalization layers use running stats
# - de-activates Dropout layers

# model.train()
# - normalization layers use per-batch stats
# - activates Dropout layers

for data in testloader:
    inputs, labels = data
    inputs = inputs.view(-1, 32*32*3)
    outputs = net(inputs)
    _, predicted = torch.max(outputs.data, 1)
    predictions.append(outputs)
    total += labels.size(0)
    correct += (predicted == labels).sum().item()
    
print(f'test accuracy {100*correct/total}')

test accuracy 48.31
