# Pytorch

In [None]:
import torch

# Tensor: like numpy, can run on GPU
# dtype = torch.FloatTensor     # CPU
dtype = torch.cuda.FloatTensor  # GPU

N, D_in, H, D_out = 64, 1000, 100, 10
# create random tensors(data, weight)
x = torch.randn(N, D_in).type(dtype)
y = torch.randn(N, D_out).type(dtype)
w1 = torch.randn(D_in, H).type(dtype)
w2 = torch.randn(H, D_in).type(dtype)

learning_rate = 1e-6
for t in range(500):
    # forward pass: compute pred, loss
    h = x.mm(w1)
    h_relu = h.clamp(min=0)
    y_pred = h_relu.mm(w2)
    loss = (y_pred - y).pow(2).sum()

    # backward pass: compute grad
    grad_y_pred = 2.0 * (y_pred - y)
    grad_w2 = h_relu.t().mm(grad_y_pred)
    grad_h_relu = grad_y_pred.mm(w2.t())
    grad_h = grad_h_relu.clone()
    grad_h[h<0] = 0
    grad_w1 = x.t().mm(grad_h)

    # gradient descent step on W
    w1 -= learning_rate * grad_w1
    w2 -= learning_rate * grad_w2


### Autograd

In [None]:
'''V1'''
import torch

N, D_in, H, D_out = 64, 1000, 100, 10
''''''
# create Variables(nodes in graph)
# x.data: Tensor
# x.grad: Variable of gradients
# x.grad.data: Tensor of gradients
x = Variable(torch.randn(N, D_in), requires_grad=False) # requires_grad=False: no grad
y = Variable(torch.randn(N, D_out), requires_grad=False)
w1 = Variable(torch.randn(D_in, H), requires_grad=True) # requires_grad=True: want grad
w2 = Variable(torch.randn(H, D_in), requires_grad=True)

learning_rate = 1e-6
for t in range(500):
    # forward pass: compute pred, loss
    y_pred = x.mm(w1).clamp(min=0).mm(w2)
    loss = (y_pred - y).pow(2).sum()

    # backward pass: compute grad
    if w1.grad: w1.grad.data.zero_()
    if w2.grad: w2.grad.data.zero_()
    loss.backward()

    # gradient descent step on W
    w1 -= learning_rate * grad_w1
    w2 -= learning_rate * grad_w2


In [None]:
'''V2'''
import torch

''''''
# define own autograd function
class ReLU(torch.autograd.Function):
    def forward(self, x):
        self.save_for_backward(x)
        return x.clamp(min=0)
    def backward(self, grad_y):
        x, = self.saved_tensors
        grad_input = grad_y.clone()
        grad_input[x<0] = 0
        return grad_input

N, D_in, H, D_out = 64, 1000, 100, 10

x = Variable(torch.randn(N, D_in), requires_grad=False) # requires_grad=False: no grad
y = Variable(torch.randn(N, D_out), requires_grad=False)
w1 = Variable(torch.randn(D_in, H), requires_grad=True) # requires_grad=True: want grad
w2 = Variable(torch.randn(H, D_in), requires_grad=True)

learning_rate = 1e-6
for t in range(500):
    ''''''
    # use own autograd runc in forward pass
    relu = ReLU()
    y_pred = relu(x.mm(w1)).mm(w2)
    loss = (y_pred - y).pow(2).sum()

    # backward pass: compute grad
    if w1.grad: w1.grad.data.zero_()
    if w2.grad: w2.grad.data.zero_()
    loss.backward()

    # gradient descent step on W
    w1 -= learning_rate * grad_w1
    w2 -= learning_rate * grad_w2


### nn

In [None]:
import torch
from torch.autograd import Variable

N, D_in, H, D_out = 64, 1000, 100, 10

x = Variable(torch.randn(N, D_in))
y = Variable(torch.randn(N, D_out), requires_grad=False)

''''''
# define model as a sequence of layers
model = torch.nn.Sequential(
            torch.nn.Linear(D_in, H),
            torch.nn.ReLU(),
            torch.nn.Linear(H, D_out))
loss_fn = torch.nn.MSELoss(size_average=False)

learning_rate = 1e-6
for t in range(500):
    # forward pass: feed data to model, pred to loss func
    y_pred = model(x)
    loss = loss_fn(y_pred, y)

    # backward pass: compute grad
    model.zero_grad()
    loss.backward()

    # gradient descent step on W
    for param in model.parameters():
        param.data -= learning_rate * param.grad.data


### optim

In [None]:
import torch
from torch.autograd import Variable

N, D_in, H, D_out = 64, 1000, 100, 10

x = Variable(torch.randn(N, D_in))
y = Variable(torch.randn(N, D_out), requires_grad=False)

# define model as a sequence of layers
model = torch.nn.Sequential(
            torch.nn.Linear(D_in, H),
            torch.nn.ReLU(),
            torch.nn.Linear(H, D_out))
loss_fn = torch.nn.MSELoss(size_average=False)

''''''
# optimizer with update rules
learning_rate = 1e-4
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

for t in range(500):
    # forward pass: feed data to model, pred to loss func
    y_pred = model(x)
    loss = loss_fn(y_pred, y)

    # backward pass: compute grad
    model.zero_grad()
    loss.backward()

    ''''''
    # gradient descent step on W
    optimizer.step()


### Modules

In [None]:
import torch
from torch.autograd import Variable

''''''
# define model as a single module
class TwoLayerNet(torch.nn.Module):
    # initializer set up // two children modules
    def __init__(self, D_in, H, D_out):
        super(TwoLayerNet, self).__init__()
        self.linear1 = torch.nn.Linear(D_in, H)
        self.linear2 = torch.nn.Linear(H, D_out)
    # define forward pass // child modules, autograd ops on Variables
    # no need to backward
    def forward(self, x):
        h_relu = self.linear1(x).clamp(min=0)
        y_pred = self.linear2(h_relu)
        return y_pred

N, D_in, H, D_out = 64, 1000, 100, 10

x = Variable(torch.randn(N, D_in))
y = Variable(torch.randn(N, D_out), requires_grad=False)

# construct model
model = TwoLayerNet(D_in, H, D_out)

criterion = torch.nn.MSELoss(size_average=False)
optimizer = torch.optim.SGD(model.parameters(), lr=1e-4)

# train model
for t in range(500):
    # forward pass: feed data to model, pred to loss func
    y_pred = model(x)
    loss = criterion(y_pred, y)

    # backward pass: compute grad
    model.zero_grad()
    loss.backward()
    
    # gradient descent step on W
    optimizer.step()


### DataLoaders

In [None]:
import torch
from torch.autograd import Variable
from torch.utils.data import TensorDataset, DataLoader

N, D_in, H, D_out = 64, 1000, 100, 10

x = Variable(torch.randn(N, D_in))
y = Variable(torch.randn(N, D_out), requires_grad=False)

''''''
# DataLoader wraps Dataset, provides minibatching/shuffling/multithreading
# can load custom data(with own Dataset class)
loader = DataLoader(TensorDataset(x, y), batch_size=8)

# construct model
model = TwoLayerNet(D_in, H, D_out)

criterion = torch.nn.MSELoss(size_average=False)
optimizer = torch.optim.SGD(model.parameters(), lr=1e-4)

# load minibatch to loader over each iter
for epoch in range(10):
    for x_batch, y_batch in loader:
        # loader returns Tensor => need to wrap with Variable
        x_var, y_var = Variable(x), Variable(y)
        y_pred = model(x)
        loss = criterion(y_pred, y)

        # backward pass: compute grad
        model.zero_grad()
        loss.backward()
        
        # gradient descent step on W
        optimizer.step()


### Pretrained Models

In [None]:
import torch
import torchvision

alexnet = torchvision.models.alexnet(pretrained=True)
vgg16 = torchvision.models.vgg16(pretrained=True)
resnet101 = torchvision.models.resnet101(pretrained=True)