# Beginner Tutorials: 
# Deep Learning with PyTorch: A 60 Minute Blitz

https://pytorch.org/tutorials/beginner/deep_learning_60min_blitz.html

# What is PyTorch?

https://pytorch.org/tutorials/beginner/blitz/tensor_tutorial.html#sphx-glr-beginner-blitz-tensor-tutorial-py

## Getting Strarted

In [None]:
from __future__ import print_function
import torch

In [None]:
x = torch.empty(5,3)
print(x)

In [None]:
x = torch.rand(5,3)
print(x)

In [None]:
x = torch.tensor([5.5, 3],  dtype=torch.double)
print(x, '\n')

x = x.new_ones(5, 3)
print(x, '\n')

x = torch.randn_like(x, dtype=torch.float)
print(x)

In [None]:
print(x.size())

In [None]:
y = torch.rand(5,3)
print(x + y)

In [None]:
result = torch.empty(5,3)
torch.add(x, y, out=result)
print(result)

In [None]:
y.add_(x)
print(y)

In [None]:
print(x[:, 1])

In [None]:
x = torch.randn(4,4)
y = x.view(16)
z = x.view(-1, 8)
print(x.size(), y.size(), z.size())

In [None]:
x = torch.randn(1)
print(x)
print(x.item())

## NumPy Bridge

In [None]:
a = torch.ones(5)
print(a)

In [None]:
b = a.numpy()
print(b)

In [None]:
a.add_(1)
print(a)
print(b)

In [None]:
import numpy as np
a = np.ones(5)
b = torch.from_numpy(a)
np.add(a, 1, out=a)
print(a)
print(b)

## CUDA Tensors

In [None]:
if torch.cuda.is_available():
    device = torch.device("cuda")
    y = torch.ones_like(x, device=device)
    x = x.to(device)
    z = x + y
    print(z)
    print(z.to("cpu", torch.double))

# Autograd: automatic differentiation

https://pytorch.org/tutorials/beginner/blitz/autograd_tutorial.html

## Tensor

In [None]:
import torch
x = torch.ones(2, 2, requires_grad=True)
print(x)

In [None]:
y = x + 2
print(y)

In [None]:
print(y.grad_fn)

In [None]:
z = y * y * 3
out = z.mean()

print(z, out)

In [None]:
a = torch.randn(2,2)
a = ((a * 3) / (a - 1))
print(a.requires_grad)
print(a.grad_fn)

a.requires_grad_(True)
print(a.requires_grad)

b = (a * a).sum()
print(b.grad_fn)

## Gradients

In [None]:
out.backward()
print(x.grad)

In [None]:
x = torch.randn(3, requires_grad=True)

y = x * 2
while y.data.norm() < 1000:
    y = y * 2
    
print(y)

In [None]:
gradients = torch.tensor([0.1, 1.0, 0.0001], dtype=torch.float)
y.backward(gradients)

print(x.grad)

In [None]:
print(x.requires_grad)
print((x ** 2).requires_grad)

with torch.no_grad():
    print((x ** 2).requires_grad)

# Neural Networks
https://pytorch.org/tutorials/beginner/blitz/neural_networks_tutorial.html

## Define the network

In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F

class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        
        self.conv1 = nn.Conv2d(1, 6, 5)
        self.conv2 = nn.Conv2d(6, 16, 5)
        
        self.fc1 = nn.Linear(16 * 5 * 5, 120)
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 10)
        
    def forward(self, x):
        x = F.max_pool2d(F.relu(self.conv1(x)), (2, 2))
        x = F.max_pool2d(F.relu(self.conv2(x)), 2)
        x = x.view(-1, self.num_flat_features(x))
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        
        return x
    
    def num_flat_features(self, x):
        size = x.size()[1:]
        num_features = 1
        for s in size:
            num_features *= s
            
        return num_features
    
net = Net()

print(net)

In [None]:
params = list(net.parameters())
print(len(params))

for i in range(len(params)):
    print(params[i].size())

In [None]:
input = torch.rand(1, 1, 32, 32)
out = net(input)
print(out)

In [None]:
net.zero_grad()
out.backward(torch.randn(1,10))

# Loss Function

In [None]:
output = net(input)

target = torch.arange(1, 11)
print(target)

target = target.view(1, -1)
print(target)

criterion = nn.MSELoss()
print(criterion)

loss = criterion(output, target)
print(loss)

In [None]:
print(loss.grad_fn)  # MSELoss
print(loss.grad_fn.next_functions[0][0])  # Linear
print(loss.grad_fn.next_functions[0][0].next_functions[0][0])  # ReLU

## Backprop

In [None]:
net.zero_grad()

print('conv1.bias.grad before backward')
print(net.conv1.bias.grad)

loss.backward()

print('conv1.bias.grad after backward')
print(net.conv1.bias.grad)

## Update the weights

In [None]:
print(list(net.parameters())[1])

learning_rate = 0.01
for f in net.parameters():
    f.data.sub_(f.grad.data * learning_rate)
    
print(list(net.parameters())[1])

In [None]:
import torch.optim as optim

optimizer = optim.SGD(net.parameters(), lr=0.01)

optimizer.zero_grad()
output = net(input)
loss = criterion(output, target)
loss.backward()

print(list(net.parameters())[1])
optimizer.step()
print(list(net.parameters())[1])

# Training a classifier
https://pytorch.org/tutorials/beginner/blitz/cifar10_tutorial.html

## What about data

## Training an image classifier
### 1. Loading and normalizing CIFAR10

In [1]:
import torch
import torchvision
import torchvision.transforms as transforms

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

In [2]:
transform = transforms.Compose(
    [transforms.ToTensor(),
     transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])

trainset = torchvision.datasets.CIFAR10(root='./data', train=True,
                                        download=True, transform=transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=4,
                                          shuffle=True, num_workers=2)

testset = torchvision.datasets.CIFAR10(root='./data', train=False,
                                       download=True, transform=transform)
testloader = torch.utils.data.DataLoader(testset, batch_size=4,
                                         shuffle=False, num_workers=2)

classes = ('plane', 'car', 'bird', 'cat',
           'deer', 'dog', 'frog', 'horse', 'ship', 'truck')

Files already downloaded and verified
Files already downloaded and verified


In [3]:
import matplotlib.pyplot as plt
import numpy as np

def imshow(img):
    img = img / 2 + 0.5
    npimg = img.numpy()
    plt.imshow(np.transpose(npimg, (1, 2, 0)))
    
dataiter = iter(trainloader)
imges, labels = dataiter.next()

imshow(torchvision.utils.make_grid(imges))
print(''.join('%5s' % classes[labels[j]] for j in range(4)))

 deerplane deer bird


### 2. Define a Convolution Neural Network

In [4]:
import torch.nn as nn
import torch.nn.functional as F


class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(3, 60, 5)
        self.pool = nn.MaxPool2d(2, 2)
        self.conv2 = nn.Conv2d(60, 160, 5)
        self.fc1 = nn.Linear(160 * 5 * 5, 120)
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 10)

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = x.view(-1, 160 * 5 * 5)
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x


net = Net()

net.to(device)

Net(
  (conv1): Conv2d(3, 60, kernel_size=(5, 5), stride=(1, 1))
  (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (conv2): Conv2d(60, 160, kernel_size=(5, 5), stride=(1, 1))
  (fc1): Linear(in_features=4000, out_features=120, bias=True)
  (fc2): Linear(in_features=120, out_features=84, bias=True)
  (fc3): Linear(in_features=84, out_features=10, bias=True)
)

In [5]:
import torch.optim as optim

criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9)

In [6]:
for epoch in range(2):
    running_loss = 0.0
    for i, data in enumerate(trainloader, 0):
        inputs, labels = data
        inputs, labels = inputs.to(device), labels.to(device)
        
        optimizer.zero_grad()
        
        outputs = net(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        
        running_loss += loss.item()
        if i % 2000 == 1999:
            print('[%d, %5d] loss: %.3f' % (epoch + 1, i + 1, running_loss / 2000))
            running_loss = 0.0
            
print('Finished Training')

[1,  2000] loss: 2.046
[1,  4000] loss: 1.664
[1,  6000] loss: 1.512
[1,  8000] loss: 1.405
[1, 10000] loss: 1.317
[1, 12000] loss: 1.236
[2,  2000] loss: 1.142
[2,  4000] loss: 1.098
[2,  6000] loss: 1.048
[2,  8000] loss: 1.023
[2, 10000] loss: 0.979
[2, 12000] loss: 0.961
Finished Training


In [None]:
dataiter = iter(testloader)
images, labels = dataiter.next()

imshow(torchvision.utils.make_grid(images))
print('GroundTruth: ', ' '.join('%5s' % classes[labels[j]] for j in range(4)))

In [None]:
images = images.to(device)
outputs = net(images)
print(outputs)

In [None]:
_, predicted = torch.max(outputs, 1)

print('Predicted: ', ' '.join('%5s' % classes[predicted[j]] for j in range(4)))

In [None]:
correct = 0
total = 0
with torch.no_grad():
    for data in testloader:
        images, labels = data
        images, labels = images.to(device), labels.to(device)
        outputs = net(images)
        _, predicted = torch. max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()
        
print('Accuracy of the network on the 10000 testimages: %d %%' % (100 * correct / total))
        
        

In [None]:
class_correct = list(0. for i in range(10))
class_total = list(0. for i in range(10))

with torch.no_grad():
    for data in testloader:
        images, labels = data
        images, labels = images.to(device), labels.to(device)
        outpus = net(images)
        _, predicted = torch.max(outpus, 1)
        c = (predicted == labels).squeeze()
        for i in range(4):
            label = labels[i]
            class_correct[label] += c[i].item()
            class_total[label] += 1
            
for i in range(10):
    print('Accuracy of %5s : %2d %%' % (classes[i], 100 * class_correct[i] / class_total[i]))

## Training on GPU

In [None]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(device)

In [None]:
net.to(device)

In [None]:
inputs, labels = inputs.to(device), labels.to(device)

# Optional: Data Parallelism
https://pytorch.org/tutorials/beginner/blitz/data_parallel_tutorial.html#imports-and-parameters


In [None]:
device = torch.device("cuda:0")
net.to(device)

In [None]:
mytensor = torch.randn(3, 4)
mytensor_gpu = mytensor.to(device)

print(mytensor)
print(mytensor_gpu)

In [None]:
net = nn.DataParallel(net)

## Imports and parameters

In [None]:
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader

input_size = 5
output_size = 2

batch_size = 30
data_size = 100


In [None]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

In [None]:
class RandomDataset(Dataset):
    def __init__(self, size, length):
        self.len = length
        self.data = torch.randn(length, size)
        
    def __getitem__(self, index):
        return self.data[index]
    
    def __len__(self):
        return self.len
    
rand_loader = DataLoader(dataset=RandomDataset(input_size, 100),
                         batch_size=batch_size, shuffle=True)

In [None]:
class Model(nn.Module):
    def __init__(self, input_size, output_size):
        super(Model, self).__init__()
        self.fc = nn.Linear(input_size, output_size)
        
    def forward(self, input):
        output = self.fc(input)
        print("\tIn Model: input size", input.size(),
              "output size", output.size())

        return output

In [None]:
model = Model(input_size, output_size)
if torch.cuda.device_count() > 1:
    print("Let's use", torch.cuda.device_count(), "GPUs!")
    model = nn.DataParallel(model)
    
model.to(device)

In [None]:
for data in rand_loader:
    input = data.to(device)
    output = model(input)
    print("Outside: input size", input.size(),
          "output_size", output.size())