# Imports

### General

In [None]:
import torch
from torch.utils.data import Dataset, dataloader, sampler, dataset

### Neural Network API

In [None]:
import torch.autograd as autograd  # computation path
from torch import Tensor           # tensor node
import torch.nn as nn              # neural networks
import torch.nn.functional as F    # layers, activations
import torch.optim as optim        # optimisers

### Vision

In [None]:
from torchvision import datasets, models, transforms
import torchvision.transforms as transforms

### Distributed Training

In [None]:
import torch.distributed as dist
from multiprocessing import Process

# Tensors

### Creation

In [None]:
torch.randn([1, 2, 2])

In [None]:
torch.zeros([2, 1, 1])

In [None]:
torch.ones([1, 2, 1])

In [None]:
L = [[1, 2], [2, 1]]
torch.Tensor(L).clone()    # deep copy of a tensor

In [None]:
with torch.no_grad():       # stop tracking tensor history
    requires_grad = True    # set to track for future

### Dimensionality

In [None]:
x = torch.randn([2, 2, 2, 2])
x.size()

In [None]:
y = torch.randn([2, 1, 2, 2])  
torch.cat([x,y], dim=1)    # concatenate tensors along dim

In [None]:
x.view(2, 8)     # reshape tensors into given size   

In [None]:
x.view(-1, 2)    # reshape into some missing size

In [None]:
x.transpose(1, 2)    # swap two dimensions

In [None]:
x.unsqueeze(dim=4)   # add a new dimension with size 1

### Algebra

In [None]:
A = torch.Tensor([[1, 2], [3, 3]])
B = torch.Tensor([[4, 4], [1, 5]])
C = torch.Tensor([1,2])
A.mm(B)        # matrix multiplication
A.mv(C)        # matrix-vector multiplication
A.t()          # matrix transpose

### GPU Usage

In [None]:
torch.cuda.is_available()

In [None]:
# x.cuda()    # move x to GPU and return a new object
x.cpu()       # move back to CPU

In [None]:
# check device cuda modularity

if not args.disable_cuda and torch.cuda.is_available():
    args.device = torch.device('cuda')
else: 
    args.device = torch.device('cpu')

net.to(device)    # convert parameters to device specific
x.to('cpu')       # copy tensor to a device

# Deep Learning

### Layers

In [None]:
m = 64
n = 4
s = 16

nn.Linear(m, n)       # fully connected layer

nn.Conv2d(m, n, s)    # 2-d conv layer with kernel size s
                      # could be 1, 2, 3-d
nn.MaxPool2d(s)       # pooling layer

nn.Dropout(p=0.5, inplace=False)  # dropout layer 

nn.Dropout2d(p=0.5)   # 2d channel-wise dropout

nn.BatchNorm2d(s*m)   # batch norm layer

# Tensor-wise mapping from indices to embedding vectors
nn.Embedding(num_embeddings=m, embedding_dim=8)

# Recurrent layers
nn.RNN(m, s)    # apply a RNN with tanh/ReLU non-linearity

nn.LSTM(n, s)   # apply a LSTM with tanh/ReLU non-linearity

mod = nn.GRU(m, s)    # apply a gated recurrent unit

### Loss Functions

In [None]:
for i in dir(nn):
    if i.endswith('Loss'):
        print(i)

### Activation Functions

ReLU, ReLU6, ELU, SELU, PReLU, LeakyReLU, 
Threshold, HardTanh, Sigmoid, Tanh, LogSigmoid, 
Softplus, SoftShrink, Softsign, TanhShrink, 
Softmin, Softmax, Softmax2d or LogSoftmax

### Optimisers

SGD, Adadelta, Adagrad, Adam, SparseAdam, Adamax, ASGD, LBFGS, RMSProp or Rprop

In [None]:
opt = optim.Adagrad(mod.parameters())  # create optimiser

opt.step()    # update weights

### Learning Rate

LambdaLR, StepLR, MultiStepLR, ExponentialLR or ReduceLROnPLateau

In [None]:
# create lr scheduler
scheduler = optim.lr_scheduler.StepLR(opt, 2)
scheduler.step()    # update lr at start of epoch

# Data Utilities

In [None]:
print(dir(dataset))

In [None]:
Dataset()  # abstract class representing dataset

### Dataloaders and DataSamplers

In [None]:
# load data batches
dataloader(Dataset, batch_size=1)  

In [None]:
# abstract class for sampling from dataset
sampler.Sampler(dataset)  