In [7]:
import torch
import math
dtype = torch.float
device = torch.device("mps")

# Create random input and output data
x = torch.linspace(-math.pi, math.pi, 2000, device=device, dtype=dtype)
y = torch.sin(x)

# Randomly initialize weights
a = torch.randn((), device=device, dtype=dtype)
b = torch.randn((), device=device, dtype=dtype)
c = torch.randn((), device=device, dtype=dtype)
d = torch.randn((), device=device, dtype=dtype)

learning_rate = 1e-6
for t in range(2000):
    # Forward pass: compute predicted y
    y_pred = a + b * x + c * x ** 2 + d * x ** 3

    # Compute and print loss
    loss = (y_pred - y).pow(2).sum().item()
    if t % 100 == 99:
        print(t, loss)

# Backprop to compute gradients of a, b, c, d with respect to loss
    grad_y_pred = 2.0 * (y_pred - y)
    grad_a = grad_y_pred.sum()
    grad_b = (grad_y_pred * x).sum()
    grad_c = (grad_y_pred * x ** 2).sum()
    grad_d = (grad_y_pred * x ** 3).sum()

    # Update weights using gradient descent
    a -= learning_rate * grad_a
    b -= learning_rate * grad_b
    c -= learning_rate * grad_c
    d -= learning_rate * grad_d


print(f'Result: y = {a.item()} + {b.item()} x + {c.item()} x^2 + {d.item()} x^3')

99 1083.049072265625
199 758.2042236328125
299 531.9873657226562
399 374.3208923339844
499 264.3468017578125
599 187.580322265625
699 133.9552001953125
799 96.46939849853516
899 70.2479476928711
999 51.894378662109375
1099 39.04008865356445
1199 30.032033920288086
1299 23.715879440307617
1399 19.284881591796875
1499 16.174793243408203
1599 13.990854263305664
1699 12.456541061401367
1799 11.378170013427734
1899 10.619937896728516
1999 10.08659839630127
Result: y = -0.036353595554828644 + 0.8659560084342957 x + 0.006271598860621452 x^2 + -0.09464116394519806 x^3


In [1]:
import torch
import math
dtype = torch.float
device = torch.device("cpu")

# Create random input and output data
x = torch.linspace(-math.pi, math.pi, 2000, device=device, dtype=dtype)
y = torch.sin(x)

# Randomly initialize weights
a = torch.randn((), device=device, dtype=dtype)
b = torch.randn((), device=device, dtype=dtype)
c = torch.randn((), device=device, dtype=dtype)
d = torch.randn((), device=device, dtype=dtype)

learning_rate = 1e-6
for t in range(2000):
    # Forward pass: compute predicted y
    y_pred = a + b * x + c * x ** 2 + d * x ** 3

    # Compute and print loss
    loss = (y_pred - y).pow(2).sum().item()
    if t % 100 == 99:
        print(t, loss)

# Backprop to compute gradients of a, b, c, d with respect to loss
    grad_y_pred = 2.0 * (y_pred - y)  #这个是因为对某一个y_prded求导
    grad_a = grad_y_pred.sum()
    grad_b = (grad_y_pred * x).sum()
    grad_c = (grad_y_pred * x ** 2).sum()
    grad_d = (grad_y_pred * x ** 3).sum()

    # Update weights using gradient descent
    a -= learning_rate * grad_a
    b -= learning_rate * grad_b
    c -= learning_rate * grad_c
    d -= learning_rate * grad_d


print(f'Result: y = {a.item()} + {b.item()} x + {c.item()} x^2 + {d.item()} x^3')

99 1876.3504638671875
199 1253.8363037109375
299 839.2432861328125
399 563.0013427734375
499 378.85565185546875
599 256.0414733886719
699 174.08920288085938
799 119.37342071533203
899 82.82135009765625
999 58.38847732543945
1099 42.04633331298828
1199 31.1084041595459
1299 23.782678604125977
1399 18.87261390686035
1499 15.579191207885742
1599 13.36843204498291
1699 11.883232116699219
1799 10.884614944458008
1899 10.212570190429688
1999 9.75991439819336
Result: y = -0.018499277532100677 + 0.8322007060050964 x + 0.003191431052982807 x^2 + -0.08983977138996124 x^3


数据集比较小时，gpu的速度不一定比cpu快，得数据集有一定大小时才能体现gpu的优势