In [None]:
import torch

### Tensor operation

In [None]:
x = torch.randn(4,4)
y = x.view(16)
z = x.view(-1, 2)
display(x, y, z)

In [None]:
# convert torch to numpy array
x_1 = x.numpy()

# !!! Careful: if change in CPU, both objects share memory with each other, so change one will impact the other
x.add_(1)
display(x, x_1)

In [None]:
# torch.from_numpy will share same memory too, while torch.tensor will create a new copy

In [None]:
# GPU support
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
x = torch.randn(4, 4, device=device)
display(device, x)

### Autograd

In [None]:
x = torch.randn(4, 4, requires_grad=True)
y = x+2
z = y*y*3
z = z.mean()
display(x, y, z)
display(z.grad_fn)
display(x.grad)
z.backward()
display(x.grad)  # dz/dx

# !!! Careful: backward() accumulates the gradients for this tensor into .grad() attributes, so need to be careful for operation considering optimizer.zero_grad()

### Linear regression via tensor operation

In [None]:
x = torch.tensor([1, 2, 3, 4, 5], dtype=torch.float32)
y = torch.tensor([2, 4, 6, 8, 10], dtype=torch.float32)
# f(x) = 2 * x
w = torch.tensor(0.0, dtype=torch.float32, requires_grad=True)

def forward(x):
    return w * x

def loss(y, y_pred):
    return ((y_pred - y) ** 2).mean()

# training loop
lr = 0.01
n_epochs = 100
i = 0
for i in range(n_epochs):
    y_pred = forward(x)
    l = loss(y, y_pred)
    l.backward()
    with torch.no_grad():
        w -= lr * w.grad
    # zero the gradient after updating
    w.grad.zero_()
    if i % 10 == 0:
        display(f'epoch - {i}, weight - {w}, loss - {l}')
    i += 1

In [None]:
x_test = torch.tensor([5], dtype=torch.float32)
y_pred = forward(x_test)
display(f'y_pred is {y_pred}')

### Model, Loss & Optimizer in a "torch way"

In [None]:
# typical training module in a torch way
import torch.nn as nn

class LinearRegression(nn.Module):
    def __init__(self, input_dim, output_dim):
        super(LinearRegression, self).__init__()
        self.lin = nn.Linear(input_dim, output_dim)
        
    def forward(self, x):
        return self.lin(x)

In [None]:
x = torch.tensor([[1], [2], [3], [4], [5]], dtype=torch.float32)
y = torch.tensor([[2], [4], [6], [8], [10]], dtype=torch.float32)
input_size, output_size = 1, 1
model = LinearRegression(input_size, output_size)
# y_pred = model(x_test)
# display(f'Prediction before training is {y_pred}')

In [None]:
lr = 0.01
n_epochs = 150
loss = nn.MSELoss()
optimizer = torch.optim.SGD(model.parameters(), lr=lr)

# training loop
for i in range(n_epochs):
    y_pred = model(x)  # automatically execute forward()
    l = loss(y_pred, y)
    l.backward()
    optimizer.step()
    optimizer.zero_grad()
    w, b = model.parameters()
    if i % 10 == 0:
       display(f'epoch - {i}, weight, bias - {w}, {b}, loss - {l}')

### First NN model

In [None]:
# leverage GPU, Dataset, DatasetLoader, Transforms, Neural Network, Training & Evaluation

In [None]:
import torch
import torch.nn as nn
import torchvision
import torchvision.transforms as transforms
import matplotlib.pyplot as plt

# Device configuration
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Hyper-parameters 
input_size = 784 # 28x28
hidden_size = 500 
num_classes = 10
num_epochs = 2
batch_size = 100
learning_rate = 0.001

# MNIST dataset 
train_dataset = torchvision.datasets.MNIST(root='./data', 
                                           train=True, 
                                           transform=transforms.ToTensor(),  
                                           download=True)

test_dataset = torchvision.datasets.MNIST(root='./data', 
                                          train=False, 
                                          transform=transforms.ToTensor())

# Data loader
train_loader = torch.utils.data.DataLoader(dataset=train_dataset, 
                                           batch_size=batch_size, 
                                           shuffle=True)

test_loader = torch.utils.data.DataLoader(dataset=test_dataset, 
                                          batch_size=batch_size, 
                                          shuffle=False)

examples = iter(test_loader)
example_data, example_targets = next(examples)

for i in range(6):
    plt.subplot(2,3,i+1)
    plt.imshow(example_data[i][0], cmap='gray')
plt.show()

In [None]:
next(iter(test_loader))

In [None]:
# Fully-connected with one hidden layer NN
class NeuralNet(nn.Module):
    def __init__(self, input_dim, hidden_dim, num_classes):
        super(NeuralNet, self).__init__()
        self.l1 = nn.Linear(input_dim, hidden_dim)
        self.relu = nn.ReLU()
        self.l2 = nn.Linear(hidden_dim, num_classes)
    def forward(self, x):
        out = self.l1(x)
        out = self.relu(out)
        out = self.l2(out)
        # no activation and no softmax at the end
        return out

In [None]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = NeuralNet(input_size, hidden_size, num_classes).to(device)

In [None]:
# loss & optimizer
loss = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)  

# training loop
n_total_steps = len(train_loader)
for epoch in range(num_epochs):
    for i, (images, labels) in enumerate(train_loader):
        # origin shape: [100, 1, 28, 28]
        # resized: [100, 784]
        images = images.reshape(-1, 28*28).to(device)
        labels = labels.to(device)
        y_pred = model(images)  # automatically execute forward()
        l = loss(y_pred, labels)
        l.backward() # compute gradient descent
        optimizer.step() # update weights based on gradients
        optimizer.zero_grad()  # empty gradient descent
        if i % 100 == 0:
            params = model.parameters()
            display(f'epoch - {epoch}, steps - {i} / {n_total_steps}, params - {params}, loss - {l}')
    epoch += 1

In [None]:
# inference
# no need to calculate grads
with torch.no_grad():
    n_samples = len(test_loader.dataset)
    n_correct = 0
    for images, labels in test_loader:
        images = images.reshape(-1, 28*28).to(device)
        labels = labels.to(device)
        y_pred = model(images)  # automatically execute forward()

        # max returns (output_value ,index)
        _, predicted = torch.max(y_pred, 1)
        # display(predicted, predicted.shape)
        n_correct += (predicted == labels).sum().item()

    acc = n_correct / n_samples
    print(f'Accuracy of the network on the {n_samples} test images: {100*acc} %')

### CNN model

In [None]:
# Convolutional / Maxpooling /Save&Load model

In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision
import torchvision.transforms as transforms
import matplotlib.pyplot as plt
import numpy as np

# Device configuration
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Hyper-parameters 
num_epochs = 10
batch_size = 32
learning_rate = 0.001

# dataset has PILImage images of range [0, 1]. 
# We transform them to Tensors of normalized range [-1, 1]
transform = transforms.Compose(
    [transforms.ToTensor(),
     transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])

# CIFAR10: 60000 32x32 color images in 10 classes, with 6000 images per class
train_dataset = torchvision.datasets.CIFAR10(root='./data', train=True,
                                        download=True, transform=transform)

test_dataset = torchvision.datasets.CIFAR10(root='./data', train=False,
                                       download=True, transform=transform)

train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size,
                                          shuffle=True)

test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=batch_size,
                                         shuffle=False)

classes = ('plane', 'car', 'bird', 'cat',
           'deer', 'dog', 'frog', 'horse', 'ship', 'truck')

def imshow(imgs):
    imgs = imgs / 2 + 0.5   # unnormalize
    npimgs = imgs.numpy()
    plt.imshow(np.transpose(npimgs, (1, 2, 0)))
    plt.show()

# one batch of random training images
dataiter = iter(train_loader)
images, labels = next(dataiter)
img_grid = torchvision.utils.make_grid(images[0:25], nrow=5)
imshow(img_grid)

In [None]:
class ConvNet(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = nn.Conv2d(3, 32, 3)
        self.pool = nn.MaxPool2d(2, 2)
        self.conv2 = nn.Conv2d(32, 64, 3)
        self.conv3 = nn.Conv2d(64, 64, 3)
        self.fc1 = nn.Linear(64*4*4, 64)
        self.fc2 = nn.Linear(64, 10)

    def forward(self, x):
        # N, 3, 32, 32
        x = F.relu(self.conv1(x))   # -> N, 32, 30, 30
        x = self.pool(x)            # -> N, 32, 15, 15
        x = F.relu(self.conv2(x))   # -> N, 64, 13, 13
        x = self.pool(x)            # -> N, 64, 6, 6
        x = F.relu(self.conv3(x))   # -> N, 64, 4, 4
        x = torch.flatten(x, 1)     # -> N, 1024
        x = F.relu(self.fc1(x))     # -> N, 64
        x = self.fc2(x)             # -> N, 10
        return x