In [None]:
import torch

In [None]:
N, D_in, H, D_out = 64,1000,100,10

In [None]:
x = torch.randn(N,D_in)
y = torch.randn(N,D_out)

w1 = torch.randn(D_in,H,requires_grad=True)
w2 = torch.randn(H,D_out,requires_grad=True)

In [None]:
learning_rate = 1e-6
for t in range(500):
    # mm is similar with matmul, but not support broadcast
    y_pred = x.mm(w1).clamp(min=0).mm(w2)
    loss = (y_pred - y).pow(2).sum()
    
    if t % 100 == 0:
        print(loss)
        
    loss.backward()
    
    with torch.no_grad():
        w1 -= learning_rate*w1.grad
        w2 -= learning_rate*w2.grad
        w1.grad.zero_()
        w2.grad.zero_()

## PyTorch nn

In [None]:
model = torch.nn.Sequential(
            torch.nn.Linear(D_in,H),
            torch.nn.ReLU(),
            torch.nn.Linear(H,D_out))

learning_rate = 1e-4
optimizer = torch.optim.Adam(model.parameters(),lr=learning_rate)

for t in range(500):
    y_pred = model(x)
    loss = torch.nn.functional.mse_loss(y_pred,y)
    
    if t % 100 == 0:
        print(loss)
    loss.backward()
    
    optimizer.step()
    optimizer.zero_grad()

## PyTorch DataLoaders

In [1]:
import torch
from torch.utils.data import DataLoader,TensorDataset
import numpy as np

In [3]:
N, D_in, H, D_out = 64,1000,100,10
# x = torch.randn(N,D_in)
# y = torch.randn(N,D_out)

x = np.random.randn(N,D_in)
y = np.random.randn(N,D_out)

In [4]:
x = torch.Tensor(x)
y = torch.Tensor(y)

In [5]:
loader = DataLoader(TensorDataset(x,y),batch_size=8)

In [6]:
class TwoLayerNet(torch.nn.Module):
    def __init__(self,D_in,H,D_out):
        super(TwoLayerNet,self).__init__()
        self.linear1 = torch.nn.Linear(D_in,H)
        self.linear2 = torch.nn.Linear(H,D_out)
    
    def forward(self,x):
        h_relu = self.linear1(x).clamp(min=0)   # clamp(min=0) is the same as ReLU function
        y_pred = self.linear2(h_relu)
        return y_pred

In [7]:
model = TwoLayerNet(D_in,H,D_out)
optimizer = torch.optim.SGD(model.parameters(),lr=1e-2)

In [8]:
for epoch in range(20):
    epoch_loss = 0
    for x_batch,y_batch in loader:
        y_pred = model(x_batch)
        loss = torch.nn.functional.mse_loss(y_pred,y_batch)
        
        epoch_loss += loss
        
        loss.backward()
        optimizer.step()
        optimizer.zero_grad()
    print('epoch {}, loss:{}'.format(epoch,epoch_loss))

epoch 0, loss:8.96192455291748
epoch 1, loss:8.122140884399414
epoch 2, loss:7.422724723815918
epoch 3, loss:6.831385612487793
epoch 4, loss:6.316688060760498
epoch 5, loss:5.8617024421691895
epoch 6, loss:5.453439235687256
epoch 7, loss:5.080478191375732
epoch 8, loss:4.73899507522583
epoch 9, loss:4.425826549530029
epoch 10, loss:4.133258819580078
epoch 11, loss:3.8617002964019775
epoch 12, loss:3.6074259281158447
epoch 13, loss:3.369208335876465
epoch 14, loss:3.1448025703430176
epoch 15, loss:2.9338207244873047
epoch 16, loss:2.7353134155273438
epoch 17, loss:2.5480527877807617
epoch 18, loss:2.3729209899902344
epoch 19, loss:2.207909345626831
