<a href="https://colab.research.google.com/github/shu-bee/Pytorch_tutorial/blob/main/Learning_PyTorch_with_Examples.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

numpy

In [3]:
import numpy as np
import math

x = np.linspace(-math.pi,math.pi,2000)
y = np.sin(x)

a0=np.random.randn()
b0=np.random.randn()
c0=np.random.randn()
d0=np.random.randn()

a,b,c,d=a0,b0,c0,d0
print(a,b,c,d)

learning_rate=1e-6
for t in range(2000):
  # y = a + b x + c x^2 + d x^3
  y_pred = a + b * x + c * x ** 2 + d * x ** 3

  loss = np.square(y_pred-y).sum()
  if t %100==99:
    print(t,loss)

  grad_y_pred=2.0*(y_pred-y)
  grad_a = grad_y_pred.sum()
  grad_b = (grad_y_pred * x).sum()
  grad_c = (grad_y_pred * x ** 2).sum()
  grad_d = (grad_y_pred * x ** 3).sum()

  a -= learning_rate*grad_a
  b -= learning_rate*grad_b
  c -= learning_rate*grad_c
  d -= learning_rate*grad_d

print(f'Result: y = {a} + {b} x+ {c} x^2+ {d} x^3')

[-3.14159265 -3.13844949 -3.13530633 ...  3.13530633  3.13844949
  3.14159265]
-0.11539367782933685 0.5703215804969494 -2.1952562072086987 0.8758641949461718
99 171.15088085974332
199 118.0690237648532
299 82.41625675076581
399 58.4443546724296
499 42.31234300421377
599 31.446488892979783
699 24.12093024744778
799 19.177470372297325
899 15.838242562099083
999 13.580384155349789
1099 12.052138368087386
1199 11.01665030805799
1299 10.314288104685115
1399 9.837364033911975
1499 9.513161472915115
1599 9.292529331800335
1699 9.142211228164928
1799 9.039681941971859
1899 8.96966847501264
1999 8.921803925061049
Result: y = 0.008053208578007459 + 0.8500940431620712 x+ -0.0013893124856754547 x^2+ -0.0923849396340005 x^3


pytorch

In [21]:
import torch
import math

dtype = torch.float
device = torch.device("cpu")

x=torch.linspace(-math.pi,math.pi,2000,device=device,dtype=dtype)
y=torch.sin(x)

a=torch.randn((),device=device,dtype=dtype)
b=torch.randn((),device=device,dtype=dtype)
c=torch.randn((),device=device,dtype=dtype)
d=torch.randn((),device=device,dtype=dtype)
print(a,b,c,d)

learning_rate=1e-6
for t in range(2000):
  y_pred = a + b * x + c * x ** 2 + d * x ** 3

  loss=(y_pred-y).pow(2).sum().item()
  if t%100==99:
    print(t,loss)

    grad_y_pred = 2.0 * (y_pred - y)
    grad_a = grad_y_pred.sum()
    grad_b = (grad_y_pred * x).sum()
    grad_c = (grad_y_pred * x ** 2).sum()
    grad_d = (grad_y_pred * x ** 3).sum()

    # Update weights using gradient descent
    a -= learning_rate * grad_a
    b -= learning_rate * grad_b
    c -= learning_rate * grad_c
    d -= learning_rate * grad_d


print(f'Result: y = {a.item()} + {b.item()} x + {c.item()} x^2 + {d.item()} x^3')

tensor(-0.7675) tensor(-1.6948) tensor(-0.9568) tensor(0.8470)
99 145740.46875
199 64340.75390625
299 44038.46875
399 36171.2734375
499 31297.67578125
599 27513.42578125
699 24373.91015625
799 21726.6796875
899 19485.634765625
999 17586.1953125
1099 15975.30859375
1199 14608.4130859375
1299 13447.87890625
1399 12461.8857421875
1499 11623.5302734375
1599 10910.0546875
1699 10302.2158203125
1799 9783.736328125
1899 9340.8505859375
1999 8961.9150390625
Result: y = -0.5993348360061646 + -1.668657660484314 x + -0.10055004060268402 x^2 + 0.2658855617046356 x^3


Autograd

we can use automatic differentiation to automate the computation of backward passes in neural networks. The autograd package in PyTorch provides exactly this functionality. When using autograd, the forward pass of your network will define a computational graph; nodes in the graph will be Tensors, and edges will be functions that produce output Tensors from input Tensors. Backpropagating through this graph then allows you to easily compute gradients.





In [2]:
import torch
import math

dtype=torch.float
device=torch.device("cpu")

x = torch.linspace(-math.pi,math.pi,2000,device=device,dtype=dtype)
y = torch.sin(x)

a = torch.randn((),device=device,dtype=dtype,requires_grad=True)
b = torch.randn((),device=device,dtype=dtype,requires_grad=True)
c = torch.randn((),device=device,dtype=dtype,requires_grad=True)
d = torch.randn((),device=device,dtype=dtype,requires_grad=True)
print(a,b,c,d)

learning_rate=1e-6
for t in range(2000):
  y_pred = a + b * x + c * x ** 2 + d * x ** 3

  loss = (y_pred-y).pow(2).sum()
  if t % 100==99:
    print(t,loss.item())

  loss.backward()
  with torch.no_grad():
    a -=learning_rate * a.grad
    b -=learning_rate * b.grad
    c -=learning_rate * c.grad
    d -=learning_rate * d.grad

    a.grad=None
    b.grad=None
    c.grad=None
    d.grad=None

print(f'Result: y = {a.item()} + {b.item()} x + {c.item()} x^2 + {d.item()} x^3')



tensor(-0.2547, requires_grad=True) tensor(-0.6643, requires_grad=True) tensor(0.8711, requires_grad=True) tensor(-1.7299, requires_grad=True)
99 1222.2607421875
199 815.7386474609375
299 545.6017456054688
399 366.0360107421875
499 246.63446044921875
599 167.210693359375
699 114.35916900634766
799 79.1761474609375
899 55.744850158691406
999 40.13320541381836
1099 29.726648330688477
1199 22.786357879638672
1299 18.155330657958984
1399 15.063515663146973
1499 12.99813461303711
1599 11.617648124694824
1699 10.694300651550293
1799 10.076350212097168
1899 9.662487030029297
1999 9.38510513305664
Result: y = -0.012384394183754921 + 0.8365586996078491 x + 0.0021365159191191196 x^2 + -0.09045965224504471 x^3


nn module

The nn package defines a set of Modules, which are roughly equivalent to neural network layers. A Module receives input Tensors and computes output Tensors, but may also hold internal state such as Tensors containing learnable parameters. The nn package also defines a set of useful loss functions that are commonly used when training neural networks.

In [7]:
import torch
import math

x = torch.linspace(-math.pi, math.pi, 2000)
y = torch.sin(x)

# tensor (x, x^2, x^3).
p = torch.tensor([1, 2, 3])
xx = x.unsqueeze(-1).pow(p)
print(xx)

# Use the nn package to define our model as a sequence of layers. nn.Sequential
# is a Module which contains other Modules, and applies them in sequence to
# produce its output. The Linear Module computes output from input using a
# linear function, and holds internal Tensors for its weight and bias.
# The Flatten layer flatens the output of the linear layer to a 1D tensor,
# to match the shape of `y`.
model=torch.nn.Sequential(
    torch.nn.Linear(3,1),
    torch.nn.Flatten(0,1)
)
print("model",model)

loss_fn=torch.nn.MSELoss(reduction='sum')

learning_rate=1e-6
for t in range(2000):
  y_pred=model(xx)

  loss=loss_fn(y_pred,y)
  if t % 100 == 99:
    print(t,loss.item())

  model.zero_grad()

  loss.backward()

  with torch.no_grad():
    for param in model.parameters():
      param -= learning_rate*param.grad

linear_layer=model[0]

print(f'Result: y = {linear_layer.bias.item()} + {linear_layer.weight[:, 0].item()} x + {linear_layer.weight[:, 1].item()} x^2 + {linear_layer.weight[:, 2].item()} x^3')


tensor([[ -3.1416,   9.8696, -31.0063],
        [ -3.1384,   9.8499, -30.9133],
        [ -3.1353,   9.8301, -30.8205],
        ...,
        [  3.1353,   9.8301,  30.8205],
        [  3.1384,   9.8499,  30.9133],
        [  3.1416,   9.8696,  31.0063]])
model Sequential(
  (0): Linear(in_features=3, out_features=1, bias=True)
  (1): Flatten(start_dim=0, end_dim=1)
)
99 677.791259765625
199 459.5148010253906
299 312.7415771484375
399 213.96286010742188
499 147.42469787597656
599 102.56243896484375
699 72.28584289550781
799 51.83306121826172
899 38.002479553222656
999 28.64029884338379
1099 22.296289443969727
1199 17.992839813232422
1299 15.070423126220703
1399 13.083646774291992
1499 11.731431007385254
1599 10.810088157653809
1699 10.181598663330078
1799 9.752374649047852
1899 9.45890998840332
1999 9.258031845092773
Result: y = 0.016797447577118874 + 0.8433757424354553 x + -0.002897839527577162 x^2 + -0.09142931550741196 x^3


nn with optimizer

In [8]:
import torch
import math

x = torch.linspace(-math.pi, math.pi, 2000)
y = torch.sin(x)

# tensor (x, x^2, x^3).
p = torch.tensor([1, 2, 3])
xx = x.unsqueeze(-1).pow(p)
print(xx)

model=torch.nn.Sequential(
    torch.nn.Linear(3,1),
    torch.nn.Flatten(0,1)
)
print("model",model)

loss_fn=torch.nn.MSELoss(reduction='sum')

learning_rate=1e-3
# Use the optim package to define an Optimizer that will update the weights of
# the model for us. Here we will use RMSprop; the optim package contains many other
# optimization algorithms. The first argument to the RMSprop constructor tells the
# optimizer which Tensors it should update.
optimizer=torch.optim.RMSprop(model.parameters(),lr=learning_rate)
for t in range(2000):
  y_pred=model(xx)

  loss=loss_fn(y_pred,y)
  if t % 100 == 99:
    print(t,loss.item())

  # Before the backward pass, use the optimizer object to zero all of the
  # gradients for the variables it will update (which are the learnable
  # weights of the model). This is because by default, gradients are
  # accumulated in buffers( i.e, not overwritten) whenever .backward()
  # is called. Checkout docs of torch.autograd.backward for more details.
  optimizer.zero_grad()

  loss.backward()

  # Calling the step function on an Optimizer makes an update to its parameters
  optimizer.step()

linear_layer=model[0]

print(f'Result: y = {linear_layer.bias.item()} + {linear_layer.weight[:, 0].item()} x + {linear_layer.weight[:, 1].item()} x^2 + {linear_layer.weight[:, 2].item()} x^3')


tensor([[ -3.1416,   9.8696, -31.0063],
        [ -3.1384,   9.8499, -30.9133],
        [ -3.1353,   9.8301, -30.8205],
        ...,
        [  3.1353,   9.8301,  30.8205],
        [  3.1384,   9.8499,  30.9133],
        [  3.1416,   9.8696,  31.0063]])
model Sequential(
  (0): Linear(in_features=3, out_features=1, bias=True)
  (1): Flatten(start_dim=0, end_dim=1)
)
99 1959.2606201171875
199 1195.62548828125
299 1020.6521606445312
399 853.280517578125
499 692.38916015625
599 547.0382080078125
699 420.3686828613281
799 312.79595947265625
899 223.8001251220703
999 152.2157745361328
1099 97.26840209960938
1199 57.58738327026367
1299 31.659496307373047
1399 17.07789421081543
1499 10.783099174499512
1599 9.151917457580566
1699 8.966609954833984
1799 8.906667709350586
1899 8.904130935668945
1999 8.9237060546875
Result: y = 0.00049802684225142 + 0.8562657237052917 x + 0.0004980287048965693 x^2 + -0.09384801238775253 x^3


Custum nn Modules

Sometimes you will want to specify models that are more complex than a sequence of existing Modules; for these cases you can define your own Modules by subclassing nn.Module and defining a forward which receives input Tensors and produces output Tensors using other modules or other autograd operations on Tensors.

In [12]:
import torch
import math

class Polynomial3(torch.nn.Module):
  def __init__(self):
    """
    In the constructor we instantiate four parameters and assign them as
    member parameters.
    """
    super().__init__()
    self.a=torch.nn.Parameter(torch.randn(()))
    self.b=torch.nn.Parameter(torch.randn(()))
    self.c=torch.nn.Parameter(torch.randn(()))
    self.d=torch.nn.Parameter(torch.randn(()))
  
  def forward(self,x):
    """
    In the forward function we accept a Tensor of input data and we must return
    a Tensor of output data. We can use Modules defined in the constructor as
    well as arbitrary operators on Tensors.
    """
    return self.a+self.b*x+self.c*x**2+self.d*x**3

  def string(self):
    """
    Just like any class in Python, you can also define custom method on PyTorch modules
    """
    return f'y = {self.a.item()} + {self.b.item()} x + {self.c.item()} x^2 + {self.d.item()} x^3'

#Create Tensors to hold input and outputs.
x=torch.linspace(-math.pi,math.pi,2000)
y=torch.sin(x)

#Construct our model by instantiating the class defined above 
model = Polynomial3()

# Construct our loss function and an Optimizer. The call to model.parameters()
# in the SGD constructor will contain the learnable parameters of the nn.Linear
# module which is members of the model.
criterion=torch.nn.MSELoss(reduction='sum')
optimizer=torch.optim.SGD(model.parameters(),lr=1e-6)
for t in range(2000):
  y_pred=model(x)

  loss=criterion(y_pred,y)
  if t % 100 ==99:
    print(t,loss.item())
    
  optimizer.zero_grad()
  loss.backward()
  optimizer.step()
  
print(f'Result:{model.string()}')



99 1556.3314208984375
199 1101.218505859375
299 780.07177734375
399 553.4161987304688
499 393.4232482910156
599 280.4683837890625
699 200.71055603027344
799 144.3852996826172
899 104.60313415527344
999 76.50167083740234
1099 56.649078369140625
1199 42.62240219116211
1299 32.71099853515625
1399 25.70684242248535
1499 20.756732940673828
1599 17.257986068725586
1699 14.784891128540039
1799 13.036649703979492
1899 11.800713539123535
1999 10.92690658569336
Result:y=-0.048358283936977386+0.8521280884742737+0.008342606946825981+-0.09267426282167435


In [13]:
# -*- coding: utf-8 -*-
import torch
import math


class Polynomial3(torch.nn.Module):
    def __init__(self):
        """
        In the constructor we instantiate four parameters and assign them as
        member parameters.
        """
        super().__init__()
        self.a = torch.nn.Parameter(torch.randn(()))
        self.b = torch.nn.Parameter(torch.randn(()))
        self.c = torch.nn.Parameter(torch.randn(()))
        self.d = torch.nn.Parameter(torch.randn(()))

    def forward(self, x):
        """
        In the forward function we accept a Tensor of input data and we must return
        a Tensor of output data. We can use Modules defined in the constructor as
        well as arbitrary operators on Tensors.
        """
        return self.a + self.b * x + self.c * x ** 2 + self.d * x ** 3

    def string(self):
        """
        Just like any class in Python, you can also define custom method on PyTorch modules
        """
        return f'y = {self.a.item()} + {self.b.item()} x + {self.c.item()} x^2 + {self.d.item()} x^3'


# Create Tensors to hold input and outputs.
x = torch.linspace(-math.pi, math.pi, 2000)
y = torch.sin(x)

# Construct our model by instantiating the class defined above
model = Polynomial3()

# Construct our loss function and an Optimizer. The call to model.parameters()
# in the SGD constructor will contain the learnable parameters of the nn.Linear
# module which is members of the model.
criterion = torch.nn.MSELoss(reduction='sum')
optimizer = torch.optim.SGD(model.parameters(), lr=1e-6)
for t in range(2000):
    # Forward pass: Compute predicted y by passing x to the model
    y_pred = model(x)

    # Compute and print loss
    loss = criterion(y_pred, y)
    if t % 100 == 99:
        print(t, loss.item())

    # Zero gradients, perform a backward pass, and update the weights.
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

print(f'Result: {model.string()}')

99 1010.9580688476562
199 678.1954956054688
299 456.196044921875
399 308.0096740722656
499 209.03799438476562
599 142.8973846435547
699 98.66962432861328
799 69.07551574707031
899 49.259788513183594
999 35.982154846191406
1099 27.078868865966797
1199 21.104143142700195
1299 17.09164047241211
1399 14.394590377807617
1499 12.580179214477539
1599 11.35848617553711
1699 10.535148620605469
1799 9.979721069335938
1899 9.604706764221191
1999 9.351224899291992
Result: y = -0.01507451944053173 + 0.8390489220619202 x + 0.0026006055995821953 x^2 + -0.09081386774778366 x^3


Control Flow + Weight Sharing

As an example of dynamic graphs and weight sharing, we implement a very strange model: a third-fifth order polynomial that on each forward pass chooses a random number between 3 and 5 and uses that many orders, reusing the same weights multiple times to compute the fourth and fifth order.

For this model we can use normal Python flow control to implement the loop, and we can implement weight sharing by simply reusing the same parameter multiple times when defining the forward pass.

In [11]:
# -*- coding: utf-8 -*-
import random
import torch
import math


class DynamicNet(torch.nn.Module):
    def __init__(self):
        """
        In the constructor we instantiate four parameters and assign them as
        member parameters.
        """
        super().__init__()
        self.a = torch.nn.Parameter(torch.randn(()))
        self.b = torch.nn.Parameter(torch.randn(()))
        self.c = torch.nn.Parameter(torch.randn(()))
        self.d = torch.nn.Parameter(torch.randn(()))
        self.e = torch.nn.Parameter(torch.randn(()))

    def forward(self, x):
        """
        For the forward pass of the model, we randomly choose either 4, 5
        and reuse the e parameter to compute the contribution of these orders.

        Since each forward pass builds a dynamic computation graph, we can use normal
        Python control-flow operators like loops or conditional statements when
        defining the forward pass of the model.

        Here we also see that it is perfectly safe to reuse the same parameter many
        times when defining a computational graph.
        """
        return self.a + self.b * x + self.c * x ** 2 + self.d * x ** 3
        for exp in range(4,random.randint(4,6)):
          y=y+self.e*x**exp
        return y

    def string(self):
        """
        Just like any class in Python, you can also define custom method on PyTorch modules
        """
        return f'y = {self.a.item()} + {self.b.item()} x + {self.c.item()} x^2 + {self.d.item()} x^3 + {self.e.item()} x^4 ? + {self.e.item()} x^5 ?'


# Create Tensors to hold input and outputs.
x = torch.linspace(-math.pi, math.pi, 2000)
y = torch.sin(x)

# Construct our model by instantiating the class defined above
model = DynamicNet()

# Construct our loss function and an Optimizer. The call to model.parameters()
# in the SGD constructor will contain the learnable parameters of the nn.Linear
# module which is members of the model.
criterion = torch.nn.MSELoss(reduction='sum')
optimizer = torch.optim.SGD(model.parameters(), lr=1e-8, momentum=0.9)
for t in range(30000):
    # Forward pass: Compute predicted y by passing x to the model
    y_pred = model(x)

    # Compute and print loss
    loss = criterion(y_pred, y)
    if t % 2000 == 1999:
        print(t, loss.item())

    # Zero gradients, perform a backward pass, and update the weights.
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

print(f'Result: {model.string()}')

1999 1905.299072265625
3999 846.3731079101562
5999 379.2395324707031
7999 172.90354919433594
9999 81.63180541992188
11999 41.1929931640625
13999 23.243913650512695
15999 15.261205673217773
17999 11.703105926513672
19999 10.113356590270996
21999 9.40114974975586
23999 9.0811767578125
25999 8.936957359313965
27999 8.871721267700195
29999 8.842118263244629
Result: y = -0.003130276221781969 + 0.8528169393539429 x + 0.0005400248919613659 x^2 + -0.09277224540710449 x^3 + 1.4095135927200317 x^4 ? + 1.4095135927200317 x^5 ?
