# Lab VI

In [88]:
import numpy as np
import torch

# Tensors

*Tensor is a n-dimensional array, it means that it can have any number of dimensions.NumPy `ndarray` can not be loaded into GPUs. However, `tensor`s can do so.*

In [None]:
x = torch.tensor(4.)
x

In [None]:
y = torch.tensor([1, 2, 3, 4, 5.0])
y

In [None]:
z = torch.tensor([
    [1, 2, 3],
    [3, 4, 5],
    [6, 7, 8]
])
z

*We can use both NumPy stype `shape` or PyTorch specific `size()` function to get the size of the tensor.**

In [None]:
print(x.shape)
print(y.size())
print(z.shape)

In [None]:
print(x.dtype)
print(y.dtype)
print(z.dtype)

*PyTorch is perfectly compatible with NumPy. We use `from_numpy()` to create `tensor` from `ndarray`, and use `numpy()` to convert a `tensor` into a `ndarray`. However, converting between them does not change the memory location of them. So be careful.*

In [None]:
n = np.arange(16).reshape(4, 4)
n

In [None]:
tn = torch.from_numpy(n)
tn

In [None]:
tn.numpy()

In [None]:
print(n.dtype)
print(tn.dtype)

*PyTorch even has NumPy like interface for creating tensors. Common utility functions for creating a `tensor` include `empty`, `ones`, `zeros`, `rand`, `randint`, etc., and their corresponding `empty_like`, `rand_like`, `randint_like`, etc.*

In [None]:
x = torch.empty(2, 2)
x

In [None]:
y = torch.zeros(3, 6)
y

In [None]:
o = torch.ones(2, 2)
o

In [None]:
z = torch.eye(3, 4)
z

In [None]:
torch.ones_like(z, dtype=torch.int64)

In [None]:
torch.rand(2, 2)

*Slicing and indexing is similar to NumPy. However, we can use `item` to get the actual value if the tensor has only one item.*

In [39]:
x = torch.tensor(5.)
x

tensor(5.)

In [40]:
x.item()

5.0

*We can use `view` function to reshape a tensor.*

In [48]:
x = torch.randint(0, 5, (5, 4))
x

tensor([[4, 0, 1, 1],
        [2, 0, 3, 3],
        [4, 3, 4, 3],
        [2, 1, 1, 0],
        [3, 3, 2, 1]])

In [49]:
y = x.view(2, 10)
y

tensor([[4, 0, 1, 1, 2, 0, 3, 3, 4, 3],
        [4, 3, 2, 1, 1, 0, 3, 3, 2, 1]])

In [51]:
y.view(4, 5)

tensor([[4, 0, 1, 1, 2],
        [0, 3, 3, 4, 3],
        [4, 3, 2, 1, 1],
        [0, 3, 3, 2, 1]])

In [52]:
y.view(-1, 4)

tensor([[4, 0, 1, 1],
        [2, 0, 3, 3],
        [4, 3, 4, 3],
        [2, 1, 1, 0],
        [3, 3, 2, 1]])

*`unsqueeze` can add a new dimension at the specified position.*

In [62]:
torch.unsqueeze(x, 0).shape

torch.Size([1, 5, 4])

# Gradients

In [75]:
x = torch.tensor(5.)
w = torch.tensor(2., requires_grad=True)
b = torch.tensor(3., requires_grad=True)

In [76]:
y = w*x + b
y

tensor(13., grad_fn=<AddBackward0>)

*We can automatically calculate the derivatives of `y` with respect to its parameters which have `requires_grad=True` set.*

In [77]:
y.backward()

In [78]:
print('dy/dw: ', w.grad)
print('dy/db: ', b.grad)

dy/dw:  tensor(5.)
dy/db:  tensor(1.)


*If you do not want to calculate gradients for some operation, you can use `with torch.no_grad()`.*

In [79]:
with torch.no_grad():
    w -= w.grad * 0.001
    b -= b.grad * 0.001

In [81]:
print('W: ', w)
print('B: ', b)

W:  tensor(1.9950, requires_grad=True)
B:  tensor(2.9990, requires_grad=True)


In [80]:
print('dy/dw: ', w.grad)
print('dy/db: ', b.grad)

dy/dw:  tensor(5.)
dy/db:  tensor(1.)


*We can clear the gradients using `zero_` function. Any function with a `_` in the end usually  means the operation is performed in-place.*

In [83]:
w.grad.zero_()
b.grad.zero_()

tensor(0.)

In [84]:
print('dy/dw: ', w.grad)
print('dy/db: ', b.grad)

dy/dw:  tensor(0.)
dy/db:  tensor(0.)


# A Simple Linear Regression

*Preparing `iris` data.*

In [103]:
from sklearn.datasets import load_iris

In [104]:
iris = load_iris()

In [105]:
iris.data.shape

(150, 4)

*We will be taking first `3` columns, which are sepal lenght, sepal width, and petal length, as our features, and the last column, which is petal width, as target.**

In [106]:
x = iris.data[:, :3]
y = iris.data[:, 3]

In [107]:
print(x.shape)
print(y.shape)

(150, 3)
(150,)


*Next, we create tensors from the `ndarray`. Note that we have changed the shape of the label array. At first, `y` was `0` dimensional. To make it work with matrices, we changed it to `-1x1` dimensional, where `-1` means whatever numebr necessary or left after setting `1` column.* 

In [108]:
x = torch.from_numpy(x)
y = torch.from_numpy(y.reshape(-1, 1))

In [109]:
print(x.shape)
print(x.dtype)
print(y.shape)
print(y.dtype)

torch.Size([150, 3])
torch.float64
torch.Size([150, 1])
torch.float64


*Weight initialization. We initialize a `3x1` matrix as there are `3` features, and a `1x1` bias.*

In [110]:
w = torch.rand(3, 1, requires_grad=True, dtype=torch.float64)
b = torch.rand(1, 1, requires_grad=True, dtype=torch.float64)

In [114]:
print(w)

tensor([[0.4247],
        [0.9717],
        [0.6651]], dtype=torch.float64, requires_grad=True)


In [115]:
print(b)

tensor([[0.0009]], dtype=torch.float64, requires_grad=True)


In [116]:
print(w.shape)
print(w.dtype)
print(b.shape)
print(b.dtype)

torch.Size([3, 1])
torch.float64
torch.Size([1, 1])
torch.float64


*Creating model. Our simple linear model has equation of $w_{1}x_{1} + w_{2}x_{2} + w_{3}x_{3} + b$, which is simply achieved by computing $x\times W + b$.*

In [136]:
def model(x):
    return x @ w + b

In [137]:
def mse(pred, label):
    ae = pred - label
    return torch.sum(ae * ae) / ae.numel()

In [138]:
epochs = 10
lr = 0.01

for e in range(epochs):
    predictions = model(x)
    loss = mse(predictions, y)
    
    print('Epoch ', e, ': Loss ', loss)
    
    with torch.no_grad():
        loss.backward()
        
        w -= w.grad * lr
        b -= b.grad * lr
        
        w.grad.zero_()
        b.grad.zero_()

Epoch  0 : Loss  tensor(0.2404, dtype=torch.float64, grad_fn=<DivBackward0>)
Epoch  1 : Loss  tensor(0.2255, dtype=torch.float64, grad_fn=<DivBackward0>)
Epoch  2 : Loss  tensor(0.2117, dtype=torch.float64, grad_fn=<DivBackward0>)
Epoch  3 : Loss  tensor(0.1990, dtype=torch.float64, grad_fn=<DivBackward0>)
Epoch  4 : Loss  tensor(0.1871, dtype=torch.float64, grad_fn=<DivBackward0>)
Epoch  5 : Loss  tensor(0.1762, dtype=torch.float64, grad_fn=<DivBackward0>)
Epoch  6 : Loss  tensor(0.1660, dtype=torch.float64, grad_fn=<DivBackward0>)
Epoch  7 : Loss  tensor(0.1566, dtype=torch.float64, grad_fn=<DivBackward0>)
Epoch  8 : Loss  tensor(0.1479, dtype=torch.float64, grad_fn=<DivBackward0>)
Epoch  9 : Loss  tensor(0.1398, dtype=torch.float64, grad_fn=<DivBackward0>)


# Regression Using PyTorch

In [282]:
import torch.nn as nn
from torch.optim import SGD, Adam
from torch.nn.functional import mse_loss
from torch.utils.data.sampler import SubsetRandomSampler
from torch.utils.data import Dataset, DataLoader

In [247]:
class MakeTensor:
    def __call__(self, sample):
        x, y = sample
        return torch.from_numpy(x.astype(np.float32)), torch.from_numpy(y.astype(np.float32))

In [248]:
class IRISDataset(Dataset):
    def __init__(self, transforms=None):
        data = load_iris()
        self.x = data.data[:, 0:3]
        self.y = data.data[:, [3]]
        self.n = data.data.shape[0]
        self.transforms = transforms
    
    def __getitem__(self, index):
        sample = self.x[index], self.y[index]
        if self.transforms:
            sample = self.transforms(sample)
        return sample
    
    def __len__(self):
        return self.n

In [249]:
dataset = IRISDataset(transforms=MakeTensor())

In [250]:
dataset[0]

(tensor([5.1000, 3.5000, 1.4000]), tensor([0.2000]))

In [251]:
datasampler = SubsetRandomSampler(np.arange(len(dataset)))
dataloader = DataLoader(dataset, 32, sampler=datasampler)

In [252]:
for xa, ya in dataloader:
    print(xa.shape)
    print(ya.shape)

torch.Size([32, 3])
torch.Size([32, 1])
torch.Size([32, 3])
torch.Size([32, 1])
torch.Size([32, 3])
torch.Size([32, 1])
torch.Size([32, 3])
torch.Size([32, 1])
torch.Size([22, 3])
torch.Size([22, 1])


In [253]:
model = nn.Linear(in_features=3, out_features=1, bias=True)

In [254]:
model.weight

Parameter containing:
tensor([[ 0.5267, -0.2628,  0.2328]], requires_grad=True)

In [255]:
model.bias

Parameter containing:
tensor([0.5640], requires_grad=True)

In [256]:
list(model.parameters())

[Parameter containing:
 tensor([[ 0.5267, -0.2628,  0.2328]], requires_grad=True),
 Parameter containing:
 tensor([0.5640], requires_grad=True)]

In [257]:
optimizer = SGD(model.parameters(), lr=1e-3)

Suppose your `epochs=10`, your training data has `10,000` observations, and batch size is `1,000`. Then, then number of weight updates that will take place is `10 * (10,000/1,000)`. It means in each epoch, there will be `10000/1000` or `10` steps. Therefore, in `10` epochs, there will be `100` steps.

In [258]:
epochs = 25

for e in range(epochs):
    for xb, yb in dataloader:
        predictions = model(xb)
        loss = mse_loss(predictions, yb)
    
    print('Epoch ', e)
    print(loss)
    
    with torch.no_grad():
        loss.backward()
        optimizer.step()
        optimizer.zero_grad()

Epoch  0
tensor(6.2764, grad_fn=<MseLossBackward>)
Epoch  1
tensor(5.3186, grad_fn=<MseLossBackward>)
Epoch  2
tensor(3.7363, grad_fn=<MseLossBackward>)
Epoch  3
tensor(3.2597, grad_fn=<MseLossBackward>)
Epoch  4
tensor(2.3804, grad_fn=<MseLossBackward>)
Epoch  5
tensor(1.7077, grad_fn=<MseLossBackward>)
Epoch  6
tensor(1.6132, grad_fn=<MseLossBackward>)
Epoch  7
tensor(1.1950, grad_fn=<MseLossBackward>)
Epoch  8
tensor(0.8286, grad_fn=<MseLossBackward>)
Epoch  9
tensor(0.8251, grad_fn=<MseLossBackward>)
Epoch  10
tensor(0.6117, grad_fn=<MseLossBackward>)
Epoch  11
tensor(0.5613, grad_fn=<MseLossBackward>)
Epoch  12
tensor(0.4942, grad_fn=<MseLossBackward>)
Epoch  13
tensor(0.3717, grad_fn=<MseLossBackward>)
Epoch  14
tensor(0.3754, grad_fn=<MseLossBackward>)
Epoch  15
tensor(0.2714, grad_fn=<MseLossBackward>)
Epoch  16
tensor(0.2096, grad_fn=<MseLossBackward>)
Epoch  17
tensor(0.2350, grad_fn=<MseLossBackward>)
Epoch  18
tensor(0.2689, grad_fn=<MseLossBackward>)
Epoch  19
tensor(0.247

# Logistic Regression

In [287]:
import matplotlib.pyplot as plt
import torchvision
from torch.nn.functional import relu, softmax, cross_entropy
from torchvision.datasets import MNIST
from torchvision.transforms import transforms
from torch.utils.data.sampler import SubsetRandomSampler

In [264]:
mnist = MNIST('./data/', download=True, transform=transforms.ToTensor())

In [266]:
mnist

Dataset MNIST
    Number of datapoints: 60000
    Root location: ./data/
    Split: Train
    StandardTransform
Transform: ToTensor()

In [268]:
def train_val_split(n, val_fraction=0.2):
    nval = int(n * val_fraction)
    idx = np.random.permutation(n)
    return idx[nval:], idx[:nval]

In [269]:
train_idx, val_idx = train_val_split(len(mnist))

In [270]:
print(len(train_idx))
print(len(val_idx))

48000
12000


In [271]:
train_sampler = SubsetRandomSampler(train_idx)
val_sampler = SubsetRandomSampler(val_idx)

train_loader = DataLoader(mnist, 32, sampler=train_sampler)
val_loader = DataLoader(mnist, 32, sampler=val_sampler)

In [272]:
class MNISTClassifier(nn.Module):
    def __init__(self):
        super().__init__()
        self.linear = nn.Linear(28*28, 10)
    
    def forward(self, xb):
        xb = xb.view(-1, 784)
        output = self.linear(xb)
        return output

In [273]:
model = MNISTClassifier()

In [274]:
print(model.linear.weight.shape)
print(model.linear.bias.shape)

torch.Size([10, 784])
torch.Size([10])


In [275]:
optimizer = torch.optim.SGD(model.parameters(), lr=0.001)

In [276]:
def accuracy(pred, label):
    _, predictions = torch.max(pred, dim=1)
    return torch.sum(predictions == label).item() / len(pred)

In [277]:
def train_batch(model, loss_func, bx, by, optimizer, metric=accuracy):
    output = model(bx)       
    loss = cross_entropy(output, by)

    with torch.no_grad():
        loss.backward()
        optimizer.step()
        optimizer.zero_grad()
    
    result = metric(output, by)
    return loss.item(), len(bx), result

In [278]:
def evaluate(model, loss_func, validation_data, optimizer, metric=accuracy):
    results = [train_batch(model, loss_func, bx, by, optimizer, metric) for bx, by in validation_data]
    losses, lens, results = zip(*results)
    total_data = np.sum(lens)
    avg_loss = np.sum(np.multiply(losses, lens)) / total_data
    avg_result = np.sum(np.multiply(results, lens)) / total_data
    
    return avg_loss, total_data, avg_result

In [279]:
def fit(model, epochs, loss_func, optimizer, train, validation, metric=accuracy):
    for e in (range(epochs)):
        for bx, by in train:
            loss, _, _ = train_batch(model, loss_func, bx, by, optimizer, metric)

        val_loss, _, val_result = evaluate(model, loss_func, validation, optimizer, metric)
        print('Epoch {}/{}: Training Loss: {:.2f}, Validation Loss: {:.2f}, Validation Metric: {:.2f}'.format(e+1, epochs, loss, val_loss, val_result))

In [280]:
fit(model, 10, cross_entropy, optimizer, train_loader, val_loader)

Epoch 1/10: Training Loss: 1.43, Validation Loss: 1.29, Validation Metric: 0.79
Epoch 2/10: Training Loss: 0.94, Validation Loss: 0.93, Validation Metric: 0.82
Epoch 3/10: Training Loss: 0.84, Validation Loss: 0.78, Validation Metric: 0.84
Epoch 4/10: Training Loss: 0.63, Validation Loss: 0.69, Validation Metric: 0.85
Epoch 5/10: Training Loss: 0.75, Validation Loss: 0.63, Validation Metric: 0.85
Epoch 6/10: Training Loss: 0.63, Validation Loss: 0.59, Validation Metric: 0.86
Epoch 7/10: Training Loss: 0.57, Validation Loss: 0.56, Validation Metric: 0.86
Epoch 8/10: Training Loss: 0.67, Validation Loss: 0.54, Validation Metric: 0.87
Epoch 9/10: Training Loss: 0.43, Validation Loss: 0.52, Validation Metric: 0.87
Epoch 10/10: Training Loss: 0.42, Validation Loss: 0.50, Validation Metric: 0.87


# A Simple Neural Network

In [288]:
class MNISTNetwork(nn.Module):
    def __init__(self):
        super().__init__()
        self.linear1 = nn.Linear(28*28, 128)
        self.linear2 = nn.Linear(128, 10)
    
    def forward(self, bx):
        bx = bx.view(bx.size(0), -1)
        output = self.linear1(bx)
        output = relu(output)
        output = self.linear2(output)
        
        return output

In [289]:
model = MNISTNetwork()

In [290]:
optimizer = Adam(model.parameters(), lr=0.001, betas=(0.9, 0.999))

In [291]:
fit(model, 10, cross_entropy, optimizer, train_loader, val_loader)

Epoch 1/10: Training Loss: 0.13, Validation Loss: 0.18, Validation Metric: 0.95
Epoch 2/10: Training Loss: 0.27, Validation Loss: 0.11, Validation Metric: 0.97
Epoch 3/10: Training Loss: 0.01, Validation Loss: 0.08, Validation Metric: 0.98
Epoch 4/10: Training Loss: 0.02, Validation Loss: 0.06, Validation Metric: 0.98
Epoch 5/10: Training Loss: 0.10, Validation Loss: 0.04, Validation Metric: 0.99
Epoch 6/10: Training Loss: 0.15, Validation Loss: 0.04, Validation Metric: 0.99
Epoch 7/10: Training Loss: 0.01, Validation Loss: 0.03, Validation Metric: 0.99
Epoch 8/10: Training Loss: 0.01, Validation Loss: 0.03, Validation Metric: 0.99
Epoch 9/10: Training Loss: 0.05, Validation Loss: 0.02, Validation Metric: 0.99
Epoch 10/10: Training Loss: 0.01, Validation Loss: 0.02, Validation Metric: 0.99
