# Imports

In [1]:
import torch
from torch import nn #neural network modules
import torch.nn.functional as F #layers, actions, other functions
from torch import optim # Gradient (SGD,Adam,...) and newton

from torch.utils.data import Dataset, DataLoader #datasets

from torchvision import datasets #famous image datasets
from torchvision import models #famous pretrained conv models
from torchvision import transforms #image transformations

import numpy as np
from PIL import Image

# Device

In [2]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

x = torch.randn(2, 3)

x.to(device, dtype=torch.float) #move tensor to cpu/gpu

net = nn.Sequential(nn.Linear(4, 6), nn.ReLU())
net.to(device) #copy all params / buffers of module to cpu/gpu

x.cuda() #new tensor in GPU
x.cpu() #new tensor in CPU
None

# Tensors Creation

In [3]:
#from N(0, 1)
x = torch.randn(2,3, dtype=torch.float, device=device, requires_grad=True)

#with all 1 or 0
y = torch.zeros(2,4, dtype=torch.float, device=device, requires_grad=True)
z = torch.ones(2,5, dtype=torch.float, device=device, requires_grad=True)

x2 = x.clone() #create a clone of x

#from nested list
a = torch.tensor([[1.,2.,3.]], device=device, requires_grad=True)

torch.cat((x,y,z), dim=1) #concatenate into (2, 12)
a = torch.cat((x, a), dim=0) #concatenate into (3, 3)

torch.randint(0, 4, (3, 5)) #tensor (3,5) from 0 included to 4 excluded

nx = np.random.randn(4, 6)
x = torch.from_numpy(nx) #same tensor data than numpy: tensor (4, 6)
x.numpy() #same tensor data than torch: numpy tensor (4,6)

None

# Dimensions

In [4]:
x = torch.randn(4, 8, 6)
x.size() #tuple (4, 8, 6)
x.shape #(tuple 4, 8, 6)

x.view(4, 8, 3, 2) #same tensor (4, 8, 3, 2)
x.view(4, -1) #same tensor (r, 48)

x.permute(0, 2, 1) #tranpose dims: same tensor (4, 6, 8)

x.unsqueeze(1) #add dim of size one: same tensor (4, 1, 8, 6)

x = torch.randn(3, 1, 5, 1, 7)
x.squeeze() #remove all dims of size one: same tensor (3, 5, 7)
x.squeeze(1) #remove dim of size one: same tensor (3, 5, 1, 7)

x = torch.randn(1, 1, 1)
x.item() #get python value of tensor of full length 1
None

# Operations

In [5]:
x = torch.randn(4, 7)
y = torch.randn(4, 7)

#element-wise opeartions
x + y
x - y
x * y
x / y
x % y

x = torch.randn(4, 6)
y = torch.randn(6, 7)
x @ y #matrix-matrix, matrix-vec, or vec-vec dot product
x.t() #matrix transpose
None

# Automatic differentiation

In [6]:
x = torch.randn(4, 7, requires_grad=True)
y = torch.randn(4, 7, requires_grad=True)
a = torch.randn(4, 7)
E = torch.sum(a+x*y)

E.backward() #compute all gradients (add to previous grad)
x.grad #dE/dx
y.grad #dE/dy

x.grad.zero_() #reset gradient

x.requires_grad #True
(y**2).requires_grad #True
a.requires_grad #False

with torch.no_grad():
    # no tracking computation history
    (y**2).requires_grad #False
    
#start/stop tracking gradient (inplace)
a.requires_grad_(True)
a.requires_grad_(False)
None

# Functional operations

In [7]:
x = torch.randn(5, 6)
F.relu(x, inplace=False)
F.leaky_relu(x, 0.2, inplace=False)
torch.sigmoid(x)
torch.tanh(x)
F.logsigmoid(x)
F.log_softmax(x, dim=1)
F.softmax(x, dim=1)
vals, idxs = torch.max(x, dim=1)
torch.argmax(x, dim=1)
torch.sum(x, dim=1)
torch.mean(x, dim=1)

F.dropout(x, p=0.5, training=True, inplace=False) #dropout operation
None

# Neural Net modules (operations)

In [8]:
nn.ReLU(inplace=False)
nn.LeakyReLU(inplace=False)
nn.Dropout(p=0.5, inplace=False)
nn.LogSoftmax(dim=1)
nn.Softmax(dim=1)
nn.Sigmoid()

#batch normalization of a 4d tensors with 16 channels (N,16,w,h)
nn.BatchNorm2d(num_features=16)

nn.Sequential(nn.Sigmoid(), nn.ReLU()) #sequentil list of operations

None

In [9]:
#Fully connected Layer

fc = nn.Linear(in_features=5, out_features=3, bias=True)
x = torch.randn(32, 5)
y = fc(x) #new tensor (32, 3)
None

In [10]:
#Embedding layer

VOC_SIZE = 4
VEC_LEN = 7
embed = nn.Embedding(num_embeddings=VOC_SIZE, embedding_dim=VEC_LEN)

x = torch.randint(0, VOC_SIZE, (32,))
y = embed(x) #new tensor (32,7)
None

In [11]:
#Conv2D layer

conv = nn.Conv2d(3, 64, kernel_size=(5,5), stride=(2,2), padding=(0,0),
                bias=True)
#kenel_size, stride, padding can take 1 number as value

# compute ouput size
def conv2d_size(size, kernel_size, stride = 1, pad = 0):
    return (size - kernel_size + 2 * pad) // stride + 1

x = torch.randn(32, 3, 120, 120)
y = conv(x) #new tensor (32, 64, 58, 58)
conv2d_size(120, 5, stride=2, pad=0) #58
None

In [12]:
# Max Pooling layer
#out_w = floor(in_w / kernel_w)
#out_h = floor(in_h / kernel_h)

max_pool = nn.MaxPool2d(kernel_size=(2,2))
#kernel_size can take 1 number as value

x = torch.randn(32, 3, 120, 120)
y = max_pool(x) #new tensor (32, 3, 60, 60)
None

In [13]:
#Conv tranpose 2D layer

deconv = nn.ConvTranspose2d(16, 8, kernel_size=(5,5), stride=(2,2),
                           padding=(0,0), bias=True)
#kenel_size, stride, padding can take 1 number as value

x = torch.randn(32, 16, 64, 64)
y = deconv(x) #new tensor (32, 8, 131, 131)
None

## Custom module

In [14]:
class Net(nn.Module):
    
    def __init__(self, in_size, hidden_size, out_size):
        super().__init__()
        self.fc1 = nn.Linear(in_size, hidden_size)
        self.fc2 = nn.Linear(hidden_size, out_size)
    
    def forward(self, x):
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        return x

net = Net(120, 50, 4)

x = torch.randn(32, 120)
y = net(x) #new tensor (32, 4)
None

## Module methods

In [15]:
net = Net(120, 50, 4)

y = net(x) #compute output value of module

net.parameters() #weights of the whole module (and its components)

net.to(device) #send all params and buffers of net to cpu/gpu

net.zero_grad() #reset the gradient of all parameters

net.eval() #set module (and its components) to evaluation mode
net.train() #set module (and its components) to evaluation mode

#get / state state dict (serialization)
sd_net = net.state_dict()
net.load_state_dict(sd_net)

None

# Load / Save checkpoint

In [16]:
#Build model
model = Net(15, 7, 2)
optimizer = optim.SGD(model.parameters(), lr=0.2)
epoch = 4
loss = [0.4, 0.2, 0.3, 0.1]

#train model...

#Save model
torch.save({
    'model_sd': model.state_dict(),
    'optimizer_sd': optimizer.state_dict(),
    'epoch': epoch,
    'loss': loss
}, '/tmp/model.pth')

#Load model
model = Net(15, 7, 2)
optimizer = optim.SGD(model.parameters(), lr=0.2)
checkpoint = torch.load('/tmp/model.pth')
model.load_state_dict(checkpoint['model_sd'])
optimizer.load_state_dict(checkpoint['optimizer_sd'])
epoch = checkpoint['epoch']
loss = checkpoint['loss']

# Loss functions

In [17]:
#mean squared error
criterion = torch.nn.MSELoss(reduction='mean')
#reduction=sum takes the sum, reduction=mean divides by total items

pred = torch.randn(32, 7)
target = torch.randn(32, 7)
loss = criterion(pred, target) #sum((pred-target)**2) / (32*7)

In [18]:
#negative log-likelihood
#classification with K classes

criterion = torch.nn.NLLLoss(reduction='mean')
#reduction=sum takes the sum, reduction=mean divides by N

# pred matrix (N, K). 
# Each line contains the log-probabilities for the k classes
x = torch.randn(32, 10)
pred = F.log_softmax(x, dim=1)

# target vector (N) dtype=long
# Each item contains the true class (0->K-1)
target = torch.randint(0, 10, (32,))

loss = criterion(pred, target)

# Optimizers

In [19]:
#stochastig gradient descent
optimizer = optim.SGD(net.parameters(), lr=0.01, momentum=0.9)

#RMSProp
optimizer = optim.RMSprop(net.parameters(), lr=0.01, momentum=0.9,
                          alpha=0.99, eps=1e-8)

#Adam
optimizer = optim.Adam(net.parameters(), lr=0.001, 
                       betas=(0.9, 0.999), eps=1e-08)


optimizer.step() #update weights

optimizer.zero_grad() #reset gradient of all parameters

# Datasets

## Custom Dataset

In [20]:

class MyDataset(Dataset):
    
    def __init__(self, X, y):
        self.X = X
        self.y = y
        
    def __len__(self):
        return len(self.X)
    
    def __getitem__(self, idx):
        x = torch.from_numpy(self.X[idx])
        y = torch.tensor(self.y[idx]).view(1)
        return x, y


#dataset of 135 observations with 7 features + one real target
X = np.random.randn(135, 7)
y = np.random.randn(135)

train_ds = MyDataset(X, y)
len(train_ds) #135
train_ds[0]

(tensor([ 0.2873,  1.4085, -0.4436,  0.3722,  1.2029, -1.2058, -0.3320],
        dtype=torch.float64), tensor([-0.3838]))

## Dataloader

In [21]:
train_dl = DataLoader(train_ds, batch_size=8,
                     shuffle=True, num_workers=4)

#Iterate over the dataset by batch
for i, (X, y) in enumerate(train_dl):
    #i current index of batch
    #X tensor of shape (8,7)
    #y tensor of shape (8,1)
    pass

# Torchvision transforms

In [22]:
transforms.CenterCrop(64) #crop the image at the center to size 64
transforms.Grayscale(1) #convert image to grayscale with 1 (or 3) chanels
transforms.RandomCrop(64) #crop the image to size 64 at random location
transforms.RandomHorizontalFlip(0.5) #flip image with proba 0.5
transforms.Resize(64) #resize image to size 64
transforms.ToTensor() #convert to torch tensor

#set image mean and std for n channels
transforms.Normalize(mean=[.485, .456, .406], std=[.229, .224, .225])

#apply a user-defined transform
transforms.Lambda(lambda x: torch.clamp(x, 0, 1))

#apply a sequence of transformations
trfs = transforms.Compose([
    transforms.Resize((64,64)), 
    transforms.ToTensor()
])

x = Image.fromarray(np.random.randn(3, 120, 120).astype('uint8'), 'RGB')
y = trfs(x) #new tensor (3, 64, 64)

## Custom transformation

In [23]:
class MyResize(object):
    
    def __init__(self, size):
        self.size = size
        
    def __call__(self, x):
        return x.resize((self.size, self.size), Image.BICUBIC)

trfs = transforms.Compose([
    MyResize(64), 
    transforms.ToTensor()
])

x = Image.fromarray(np.random.randn(3, 120, 120).astype('uint8'), 'RGB')
y = trfs(x) #new tensor (3, 64, 64)

# Torchvision Datasets

In [24]:
#ImageFolder
# Dataset to load images and labels for K classes
# labels are integer from 0 to K-1
# each item is a tuple (X, y) (PIL Image, n in [0,K-1])

if False:
    train_ds = datasets.ImageFolder('./root', transform=trfs)

'''
Directory structure:
root/dog/xxx.png
root/dog/xxx.png
root/dog/xxx.png

root/cat/xxx.png
root/cat/xxx.png
'''
None

In [25]:
'''
CIFAR 10 dataset
X: image of size (3, 32, 32)
y: label between 0 and 9
Train set: 50.000 observations
 Test set: 10.000 observations
'''
if False:
    datasets.CIFAR10(root='/tmp/data', train=True,
                     download=True, transform=None)
    

'''
MNIST dataset
X: image of size (1, 28, 28)
y: label between 0 and 9
Train set: 60.000 observations
 Test set: 10.000 observations
'''
if False:
    datasets.MNIST(root='/tmp/data', train=True,
                     download=True, transform=None)

# Train a model

In [26]:
### Parameters ###
LR = 0.2
BATCH_SIZE = 16
N_EPOCHS = 50

### Prepare dataset ###
X = np.random.randn(70, 13).astype(np.float32)
y = np.random.randn(70, 1).astype(np.float32)
train_ds = MyDataset(X, y)
train_dl = DataLoader(train_ds, batch_size=BATCH_SIZE,
                     shuffle=True, num_workers=4)

### Build model ###
model = Net(13, 3, 1)
criterion = nn.MSELoss()
optimizer = optim.SGD(model.parameters(), lr=LR)

X, y = next(iter(train_dl))
print('loss (before training):', criterion(model(X), y).item())

### Training ###
for epoch in range(1, N_EPOCHS+1):
    
    total_loss = 0
    
    for batch_i, (X, y) in enumerate(train_dl):
        
        pred = model(X)
        loss = criterion(pred, y)
        loss.backward()
        optimizer.step()
        optimizer.zero_grad()
        
        total_loss += len(X) * loss.item()
    
    if epoch % 10 == 0:
        print('epoch: {}, loss = {}'.format(
            epoch, total_loss / len(train_dl.dataset)
        ))
        
X, y = next(iter(train_dl))
print('loss (after training):', criterion(model(X), y).item())

loss (before training): 1.2406035661697388
epoch: 10, loss = 0.7083002788679941
epoch: 20, loss = 0.6155309609004429
epoch: 30, loss = 0.5778408186776297
epoch: 40, loss = 0.5664470876966204
epoch: 50, loss = 0.5275125205516815
loss (after training): 0.3419869542121887
