In [None]:
import torch
import math

dtype = torch.float
device = torch.device("cpu")
# device = torch.device("cuda:0") # Uncomment this to run on GPU

* 利用 y = sin(x), 產生測試資料, 目標是訓練一個模型,給定 x, 可以預測 y
* 我們的模型是 y_pred = a + b x + c x^2 + d x^3, 其中 a, b, c, d 為訓練參數
* 使用梯度下降, 學 a, b, c, d
* 目標函數為 L = (y_pred - y)^2
![image](https://img-blog.csdnimg.cn/20181201204534330.png?x-oss-process=image/watermark,type_ZmFuZ3poZW5naGVpdGk,shadow_10,text_aHR0cHM6Ly9ibG9nLmNzZG4ubmV0L3FxcWlucnVp,size_16,color_FFFFFF,t_70)

In [None]:
# Testing data - Create random input and output data
x = torch.linspace(-math.pi, math.pi, 2000, device=device, dtype=dtype)
y = torch.sin(x)

print(x.shape)
print(y.shape)

In [None]:
# Randomly initialize weights
a = torch.randn((), device=device, dtype=dtype)
b = torch.randn((), device=device, dtype=dtype)
c = torch.randn((), device=device, dtype=dtype)
d = torch.randn((), device=device, dtype=dtype)

# Gradient descent
learning_rate = 1e-6
print('iteration\tloss')
for t in range(2000):
    # Forward pass: compute predicted y
    y_pred = a + b * x + c * x ** 2 + d * x ** 3

    # Compute and print loss
    loss = (y_pred - y).pow(2).sum().item()
    if t % 100 == 0:
        print(f'{t}\t{loss}')

    # Backprop to compute gradients of a, b, c, d with respect to loss
    grad_y_pred = 2.0 * (y_pred - y)
    grad_a = grad_y_pred.sum()
    grad_b = (grad_y_pred * x).sum()
    grad_c = (grad_y_pred * x ** 2).sum()
    grad_d = (grad_y_pred * x ** 3).sum()

    # Update weights using gradient descent
    a -= learning_rate * grad_a
    b -= learning_rate * grad_b
    c -= learning_rate * grad_c
    d -= learning_rate * grad_d

print(f'Result: y = {a.item()} + {b.item()} x + {c.item()} x^2 + {d.item()} x^3')

In [None]:
def sin_pred(x):
    return a + b * x + c * x ** 2 + d * x ** 3

print(f'Sin(0) = {sin_pred(0)}')
print(f'Sin(pi) = {sin_pred(math.pi)}')
print(f'Sin(pi / 2) = {sin_pred(math.pi / 2)}')

# Autograd
* Pytorch 可以自己算梯度

In [None]:
# Randomly initialize weights
# Setting requires_grad=True indicates that we want to compute gradients with
# respect to these Tensors during the backward pass
a = torch.randn((), device=device, dtype=dtype, requires_grad=True)
b = torch.randn((), device=device, dtype=dtype, requires_grad=True)
c = torch.randn((), device=device, dtype=dtype, requires_grad=True)
d = torch.randn((), device=device, dtype=dtype, requires_grad=True)

# Gradient descent
learning_rate = 1e-6
print('iteration\tloss')
for t in range(2000):
    y_pred = a + b * x + c * x ** 2 + d * x ** 3

    loss = (y_pred - y).pow(2).sum()
    if t % 100 == 0:
        print(f'{t}\t{loss}')

    # Use autograd to compute the backward pass. This call will compute the
    # gradient of loss with respect to all Tensors with requires_grad=True.
    # After this call a.grad, b.grad. c.grad and d.grad will be Tensors holding
    # the gradient of the loss with respect to a, b, c, d respectively.
    loss.backward()

    # Manually update weights using gradient descent. Wrap in torch.no_grad()
    # because weights have requires_grad=True
    with torch.no_grad():
        a -= learning_rate * a.grad
        b -= learning_rate * b.grad
        c -= learning_rate * c.grad
        d -= learning_rate * d.grad

        # Manually zero the gradients after updating weights
        a.grad = None
        b.grad = None
        c.grad = None
        d.grad = None

print(f'Result: y = {a.item()} + {b.item()} x + {c.item()} x^2 + {d.item()} x^3')

# Using torch.nn to define custom module
* 一般會把模型包成類別, 繼承 torch.nn.Module

In [None]:
class Polynomial3(torch.nn.Module):
    def __init__(self):
        """
        In the constructor we instantiate four parameters and assign them as
        member parameters.
        """
        super().__init__()
        self.a = torch.nn.Parameter(torch.randn(()))
        self.b = torch.nn.Parameter(torch.randn(()))
        self.c = torch.nn.Parameter(torch.randn(()))
        self.d = torch.nn.Parameter(torch.randn(()))

    def forward(self, x):
        return self.a + self.b * x + self.c * x ** 2 + self.d * x ** 3

    def string(self):
        return f'y = {self.a.item()} + {self.b.item()} x + {self.c.item()} x^2 + {self.d.item()} x^3'

In [None]:
model = Polynomial3()
# Mean Squared Error (MSE) as our loss function.
loss_fn = torch.nn.MSELoss(reduction='sum')

# Gradient descent
learning_rate = 1e-6
print('iteration\tloss')
for t in range(2000):
    y_pred = model(x)
    loss = loss_fn(y_pred, y)
    
    if t % 100 == 0:
        print(f'{t}\t{loss}')

    # Zero the gradients before running the backward pass.
    model.zero_grad()
    loss.backward()

    # Update the weights using gradient descent. Each parameter is a Tensor, so
    # we can access its gradients like we did before.
    with torch.no_grad():
        for param in model.parameters():
            param -= learning_rate * param.grad

print(f'Result: {model.string()}')

xx = torch.tensor([math.pi / 2], dtype=torch.float32)
print(f'Sin(pi / 2) = {model(xx).item()}')

xx = torch.tensor([0, math.pi / 2], dtype=torch.float32)
print(f'Sin(0, pi / 2) = {model(xx)}')

# Neural network style
* 等價的類神經網路風格的模型

In [None]:
class Polynomial3_nn(torch.nn.Module):
    def __init__(self):
        super().__init__()
        # y = wx + b
        self.fc = torch.nn.Linear(3, 1)

    def forward(self, x):
        x = x.unsqueeze(-1)
        x = torch.cat((x, x ** 2, x ** 3), 1)
        y = self.fc(x)
        return y.squeeze()

    def string(self):
        return f'y = {self.fc.bias.item()} + {self.fc.weight[:, 0].item()} x + {self.fc.weight[:, 1].item()} x^2 + {self.fc.weight[:, 2].item()} x^3'


In [None]:
model = Polynomial3_nn()
loss_fn = torch.nn.MSELoss(reduction='sum')

# Gradient descent
learning_rate = 1e-6
print('iteration\tloss')
for t in range(2000):
    y_pred = model(x)

    loss = loss_fn(y_pred, y)
    if t % 100 == 0:
        print(f'{t}\t{loss}')

    # Zero the gradients before running the backward pass.
    model.zero_grad()
    loss.backward()

    with torch.no_grad():
        for param in model.parameters():
            param -= learning_rate * param.grad

print(f'Result: {model.string()}')
xx = torch.tensor([0, math.pi / 2], dtype=torch.float32)
print(f'Sin(0, pi / 2) = {model(xx)}')

# Using optimizer in PyTorch
* 前面是使用 stochastic gradient descent (SGD)
* pytorch 內建許多 optimizer, 主要是對步長做手腳(learning rate)
* Ex: AdaGrad, RMSProp, Adam...etc

In [None]:
model = Polynomial3_nn()
loss_fn = torch.nn.MSELoss(reduction='sum')

# Gradient descent
learning_rate = 1e-2
# optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
print('iteration\tloss')
for t in range(2000):
    y_pred = model(x)

    loss = loss_fn(y_pred, y)
    if t % 100 == 0:
        print(f'{t}\t{loss}')

    # Zero the gradients before running the backward pass.
    model.zero_grad()
    loss.backward()

    # Calling the step function on an Optimizer makes an update to its parameters
    optimizer.step()

print(f'Result: {model.string()}')

xx = torch.tensor([0, math.pi / 2], dtype=torch.float32)
print(f'Sin(0, pi / 2) = {model(xx)}')

# Convert pytorch (dynamic graph) to onnx (static graph)

In [None]:
import torch.onnx

dummy_input = torch.randn(1)
torch.onnx.export(model, dummy_input, "model.onnx")