# Pytorch Basics

## Part 1: Basic Methods

In [1]:
##### basics ######
import torch
import torch.nn as nn

In [2]:
# use gpu if available
device = torch.device('cpu')
if torch.cuda.is_available():
    device = torch.device("cuda")
print(device)

cpu


In [3]:
# creating tensors
x = torch.empty(4,5)
x = torch.zeros(4,5)
x = torch.ones(4,1, dtype = torch.float32)
x = torch.eye(4) #identity matrix

x = torch.randint(low=0, high=100, size=(4,1)) # randomints
x = torch.rand(4, 5, requires_grad=True) # random uniform
x = torch.normal(0, 1, size =(4,5)) # random normal
x

tensor([[-0.3948, -1.7226, -1.7597,  1.8367,  1.2553],
        [ 2.8181, -0.1345,  0.1668,  1.5465,  0.1209],
        [ 0.2747,  0.4224, -0.0367, -0.6486, -1.6042],
        [-0.1773, -2.2585,  1.2980, -1.6444,  0.5410]])

In [4]:
# tensort attributes
print(f'shape: {x.shape}, dtype: {x.dtype}, requires_grad: {x.requires_grad}, device: {x.device}')

shape: torch.Size([4, 5]), dtype: torch.float32, requires_grad: False, device: cpu


In [5]:
# methods
x = torch.rand(4,5)
x_numpy = x.numpy() # return a copy of x as numpy
x = x.to(device) # return a copy of x on the device
x_cpu = x.to('cpu') # return a copy of x on the cpu

In [6]:
# some math methods
x.T # transpose
x @ x.T # matrix multiplication
x.add(x) # add
x.add_(x) # method ending with "_" is in place

max_vals, max_args = x.max(dim=1) # return max vals and indices by row
print('max_vals:', max_vals, 'indices:', max_args)

max_vals: tensor([1.6570, 1.6633, 1.9301, 1.9928]) indices: tensor([0, 0, 1, 3])


In [7]:
# getting values
print('x[0,0] :', x[0,0]) # this is a one element tensor
print('x[0,0].item() :', x[0,0].item()) # this a numpy float


x[0,0] : tensor(1.6570)
x[0,0].item() : 1.6570290327072144


In [8]:
# gradients:
# lets calc dz/dx where z is scalar z(y(x))
# here z = sum(y**2) where y = x + 2
x = torch.rand(3, requires_grad=True) # gradient tracked: we want dz/dx later
print('x:', x, 'x.grad:', x.grad) # shows required grad = true
y = x + 2  
print('y:', y, 'x.grad:', x.grad) # y now has grad_fn attribute <AddBackward>
z = sum(y**2)
print('z:', z, 'x.grad:', x.grad)  
print('calling z.backward()')
z.backward() # dz/dx is calculated and x.grad is now populated
print('(dz/dx) x_grad:', x.grad)
print('check gradient:', x.grad == 2*(x+2)) # xgrad is dz/dx i.e. 2*(x+2)

x: tensor([0.6613, 0.1218, 0.5651], requires_grad=True) x.grad: None
y: tensor([2.6613, 2.1218, 2.5651], grad_fn=<AddBackward0>) x.grad: None
z: tensor(18.1647, grad_fn=<AddBackward0>) x.grad: None
calling z.backward()
(dz/dx) x_grad: tensor([5.3226, 4.2437, 5.1302])
check gradient: tensor([True, True, True])


In [9]:
# stop tracking gradients (3 ways)
x.requires_grad_(False)
x.detach()
with torch.no_grad(): # wrap code in this
    x+x

# reset gradients, this is important as otherwise looping will add to the gradient
x.grad.zero_() # do this after each training loop

tensor([0., 0., 0.])

## Part 2: Full Examples

In [10]:
##### linear regression using autograd #####
# true parameters
w_true = torch.tensor([1,2,3,4,5], dtype = torch.float32).view(-1,1)
b_true = torch.tensor(10, dtype = torch.float32)

# X is input, y is target
X = torch.normal(mean=0, std=1, size=(100,5)) # n = 100, dim = 5
y = X @ w_true + b_true

# define the model parameters
w_est = torch.normal(mean=0, std=0.1, size = (5,1), dtype= torch.float32, requires_grad = True)
b_est = torch.normal(mean=0, std=0.1, size = (1,1), dtype= torch.float32, requires_grad = True)

# define the model
def forward(X):
    return X @ w_est + b_est

# define the loss function
def loss(y, y_pred):
    return ((y-y_pred)**2).mean() # MSE

# define the learning parameters
learning_rate, epochs = 0.03, 150

# training loop
for epoch in range(epochs):
    #forward pass
    y_pred = forward(X)
    l = loss(y, y_pred)

    #backward pass
    l.backward() # this calculate dl/dw and dl/db

    #updates
    with torch.no_grad(): # this calc should not recalculate the gradients
        w_est -= learning_rate * w_est.grad
        b_est -= learning_rate * b_est.grad
    
    #zeroise grads (otherwise the gradient will be incremented each loop)
    w_est.grad.zero_(), b_est.grad.zero_() 

    #print output
    if((epoch + 1) %25 == 0): # console update every 20 iterations
        param_string = ' '.join([f'w_{i}: {w.item():.2f}' for i, w in enumerate(w_est)]) + f' b: {b_est.item():.2f}'
        print(f'epoch: {epoch + 1}, loss: {l:.2f}', 'estimated params:', param_string)


epoch: 25, loss: 6.58 estimated params: w_0: 1.38 w_1: 1.64 w_2: 2.76 w_3: 3.38 w_4: 3.92 b: 8.04
epoch: 50, loss: 0.30 estimated params: w_0: 1.17 w_1: 1.92 w_2: 3.03 w_3: 3.90 w_4: 4.72 b: 9.60
epoch: 75, loss: 0.02 estimated params: w_0: 1.05 w_1: 1.97 w_2: 3.02 w_3: 3.99 w_4: 4.92 b: 9.92
epoch: 100, loss: 0.00 estimated params: w_0: 1.02 w_1: 1.99 w_2: 3.01 w_3: 4.00 w_4: 4.98 b: 9.98
epoch: 125, loss: 0.00 estimated params: w_0: 1.00 w_1: 2.00 w_2: 3.00 w_3: 4.00 w_4: 4.99 b: 10.00
epoch: 150, loss: 0.00 estimated params: w_0: 1.00 w_1: 2.00 w_2: 3.00 w_3: 4.00 w_4: 5.00 b: 10.00


In [11]:
##### logistic regression using pytorch class structure #####
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler


# 0) prepare data
dataset = load_breast_cancer()
X, y = dataset.data, dataset.target
n_samples, n_features = X.shape 

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size = 0.2, stratify = y, random_state = 123)

# standardise data and convert to tensors
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)

X_train = torch.tensor(X_train, dtype=torch.float32)
X_test = torch.tensor(X_test, dtype=torch.float32)
y_train = torch.tensor(y_train, dtype=torch.float32).view(-1,1)
y_test = torch.tensor(y_test, dtype=torch.float32).view(-1,1)

def calc_accuracy(y_prob, y_true):
    y_pred = y_prob.round() # round to 0 or 1
    acc = (y_pred == y_true).sum()/len(y_true)
    return acc

# 1) model
class LogisticRegression(nn.Module):
    def __init__(self, n_input_features):
        # initialze superclass
        super(LogisticRegression, self).__init__()
        # initialize layer objects
        self.linear = nn.Linear(n_input_features, 1)
    
    def forward(self, X):
        out = torch.sigmoid(self.linear(X))
        return out

model = LogisticRegression(n_features).to(device)

# 2) loss and optimizer
criterion = nn.BCELoss() # binary cross entropy loss
optimizer = torch.optim.Adam(model.parameters(), lr = learning_rate)

# 3) training loop
num_epochs = 100
learning_rate = 0.01
for epoch in range(num_epochs):
    #forward pass
    X_train, y_train = X_train.to(device), y_train.to(device)
    y_prob = model(X_train)
    loss = criterion(y_prob, y_train)
    
    #backward pass
    loss.backward()
    
    #updates
    optimizer.step()
    optimizer.zero_grad()
    
    if((epoch +1) % 20 == 0):
        with torch.no_grad(): # we don't want to add to the gradients, having just zeroised them
            X_test.to(device), y_test.to(device)
            train_acc = calc_accuracy(y_prob, y_train)
            test_acc = calc_accuracy(model(X_test), y_test)
            print(f'epoch: {epoch +1}, loss: {loss.item():.2f}, train_acc:{train_acc:.2f}, test_acc:{test_acc:.2f}')

epoch: 20, loss: 0.11, train_acc:0.97, test_acc:0.96
epoch: 40, loss: 0.09, train_acc:0.98, test_acc:0.97
epoch: 60, loss: 0.08, train_acc:0.98, test_acc:0.97
epoch: 80, loss: 0.07, train_acc:0.98, test_acc:0.97
epoch: 100, loss: 0.07, train_acc:0.98, test_acc:0.97
