# Python与Pytorch教程

## 1. Python基础

- Python基础数据类型：整型、浮点型、字符串、布尔型、列表、元组、字典、集合
- Python基础语法：条件语句、循环语句、函数、类
- Numpy基础：数组、矩阵、线性代数、随机数
- Matplotlib基础：绘图、子图、图例、坐标轴、文本、注释


### 1.1 Python基础数据类型
#### 整数和浮点数

In [None]:
x = 3
print(x, type(x))

In [None]:
print(x + 2) # Addition
print(x - 2) # Subtraction
print(x * 4) # Multiplication
print(x ** 4) # Exponentiation

In [None]:
x += 2
print(x)
x *= 2
print(x)

In [None]:
y = 3.5
print(type(y)) # Prints "<class 'float'>"
print(y, y + 1, y * 2, y ** 2) # Prints "3.5 4.5 7.0 12.25"

#### 布尔型

In [None]:
t, f = True, False
print(type(t)) # Prints "<class 'bool'>"

In [None]:
print(t and f) # Logical AND; prints "False"
print(t or f) # Logical OR; prints "True"
print(not t) # Logical NOT; prints "False"
print(t != f) # Logical XOR; prints "True"

#### 字符串

In [None]:
first_word = '2023'
second_word = 'AI训练营'
print(first_word, len(first_word))

In [None]:
two_words = first_word + ' ' + second_word
print(two_words)

In [None]:
two_words_format = '{} {} {}'.format(first_word, second_word, 2)
print(two_words_format)

#### 字符串方法

In [None]:
w = 'hello'
print(w.capitalize()) # Capitalize a string; prints "Hello"
print(w.upper()) # Convert a string to uppercase; prints "HELLO"
print(w.rjust(7)) # Right-justify a string, padding with spaces; prints " hello"
print(w.center(7)) # Center a string, padding with spaces; prints " hello "
print(w.replace('l', '(ell)')) # Replace all instances of one substring with another;
print(' world '.strip()) # Strip leading and trailing whitespace; prints "world"


#### 列表

In [None]:
ls = [5,2,3,4,1] # Create a list
print(ls, ls[2]) # Prints "[5, 2, 3, 4, 1] 3"
print(ls[-1]) # Negative indices count from the end of the list; prints "1"

In [None]:
ls[2] = 'university' # Lists can contain elements of different types
print(ls) # Prints "[5, 2, 'university', 4, 1]"

In [None]:
ls.append('AI') # Add a new element to the end of the list
print(ls) # Prints "[5, 2, 'university', 4, 1, 'AI']"

In [None]:
a = ls.pop() # Remove and return the last element of the list
print(a, ls) # Prints "AI [5, 2, 'university', 4, 1]"

#### 列表切片

In [None]:
nums = list(range(7)) # range is a built-in function that creates a list of integers
print(nums) # Prints "[0, 1, 2, 3, 4, 5, 6]"
print(nums[2:4]) # Get a slice from index 2 to 4 (exclusive); prints "[2, 3]"
print(nums[2:]) # Get a slice from index 2 to the end; prints "[2, 3, 4, 5, 6]"
print(nums[:2]) # Get a slice from the start to index 2 (exclusive); prints "[0, 1]"
print(nums[:]) # Get a slice of the whole list; prints "[0, 1, 2, 3, 4, 5, 6]"
print(nums[:-1]) # Slice indices can be negative; prints "[0, 1, 2, 3, 4, 5]"
nums[2:4] = [8, 9] # Assign a new sublist to a slice
print(nums) # Prints "[0, 1, 8, 9, 4, 5, 6]"

#### 循环

In [None]:
AI_concepts = ['AI', 'Deep Learning', 'Machine Learning', 'Neural Network']
for concept in AI_concepts:
    print(concept)

In [None]:
AI_concepts = ['AI', 'Deep Learning', 'Machine Learning', 'Neural Network']
for i, concept in enumerate(AI_concepts):
    print('#%d: %s' % (i + 1, concept))

#### 列表推导式

In [None]:
nums = [0, 1, 2, 3, 4]
squares = []
for x in nums:
    squares.append(x ** 2)
print(squares) # Prints "[0, 1, 4, 9, 16]"

In [None]:
nums = [0, 1, 2, 3, 4]
squares = [x ** 2 for x in nums]
print(squares) # Prints "[0, 1, 4, 9, 16]"

In [None]:
nums = [0, 1, 2, 3, 4]
even_squares = [x ** 2 for x in nums if x % 2 == 0]
print(even_squares) # Prints "[0, 4, 16]"

#### 字典

In [None]:
d = {'AI': 'Artificial Intelligence', 'DL': 'Deep Learning', 'ML': 'Machine Learning'}
print(d['AI']) # Get an entry from a dictionary; prints "Artificial Intelligence"
print('AI' in d) # Check if a dictionary has a given key; prints "True"

In [None]:
d['NN'] = 'Neural Network' # Set an entry in a dictionary
print(d['NN']) # Prints "Neural Network"

In [None]:
print(d[RNN]) # KeyError: 'RNN' not a key of d

In [None]:
print(d.get('RNN', 'Not Found')) # Get an element with a default; prints "Not Found"
print(d.get('AI', 'Not Found')) # Get an element with a default; prints "Artificial Intelligence"

In [None]:
del d['AI'] # Remove an element from a dictionary
print(d.get('AI', 'Not Found')) # "AI" is no longer a key; prints "Not Found"

In [None]:
d = {'AI': 'Artificial Intelligence', 'DL': 'Deep Learning', 'ML': 'Machine Learning'}
for abbreviation, concept in d.items():
    print('%s stands for %s' % (abbreviation, concept))

In [None]:
nums = [0, 1, 2, 3, 4]
even_num_to_square = {x: x ** 2 for x in nums if x % 2 == 0}
print(even_num_to_square) # Prints "{0: 0, 2: 4, 4: 16}"

#### 集合

In [None]:
AI_concepts = {'AI', 'Deep Learning', 'Machine Learning', 'Neural Network'}
print('AI' in AI_concepts) # Check if an element is in a set; prints "True"
print('RNN' in AI_concepts) # prints "False"

In [None]:
AI_concepts.add('Recurrent Neural Network') # Add an element to a set
print('RNN' in AI_concepts) # Prints "True"
print(len(AI_concepts)) # Number of elements in a set; prints "5"

In [None]:
AI_concepts.add('AI') # Adding an element that is already in the set does nothing
print(len(AI_concepts)) # Prints "5"
AI_concepts.remove('AI') # Remove an element from a set
print(len(AI_concepts)) # Prints "4"

In [None]:
AI_concepts = {'AI', 'Deep Learning', 'Machine Learning', 'Neural Network'}
for i, concept in enumerate(AI_concepts):
    print('#%d: %s' % (i + 1, concept))

In [None]:
from math import sqrt
print(nums = {int(sqrt(x)) for x in range(30)})

#### 元组

In [None]:
d = {(x, x + 1): x for x in range(10)} # Create a dictionary with tuple keys
t = (5, 6) # Create a tuple
print(type(t)) # Prints "<class 'tuple'>"
print(d[t]) # Prints "5"
print(d[(1, 2)]) # Prints "1"

In [None]:
t[0] = 1 # Tuples are immutable

#### 函数

In [None]:
def sign(x):
    if x > 0:
        return 'positive'
    elif x < 0:
        return 'negative'
    else:
        return 'zero'

for x in [-1, 0, 1]:
    print(sign(x))

In [None]:
def hello(name, loud=False):
    if loud:
        print('HELLO, %s!' % name.upper())
    else:
        print('Hello, %s' % name)

hello('Bob') # Prints "Hello, Bob"
hello('Fred', loud=True) # Prints "HELLO, FRED!"

#### 类

In [None]:
class Greeter(object):

    # Constructor
    def __init__(self, name):
        self.name = name # Create an instance variable

    # Instance method
    def greet(self, loud=False):
        if loud:
            print('HELLO, %s!' % self.name.upper())
        else:
            print('Hello, %s' % self.name)

g = Greeter('Fred') # Construct an instance of the Greeter class
g.greet() # Call an instance method; prints "Hello, Fred"
g.greet(loud=True) # Call an instance method; prints "HELLO, FRED!"

### 1.2 Numpy基础

In [None]:
import numpy as np

In [None]:
a = np.array([1, 2, 3]) # Create an array
print(type(a), a.shape, a[0], a[1], a[2])
a[0] = 5 # Change an element of the array
print(a)

In [None]:
b = np.array([[1,2,3],[4,5,6]]) # Create another array
print(b)

In [None]:
print(b.shape)
print(b[0, 0], b[0, 1], b[1, 0])

In [None]:
a = np.zeros((2,2)) # Create an array of all zeros
print(a)

In [None]:
b = np.ones((1,2)) # Create an array of all ones
print(b)

In [None]:
c = np.full((2,2), 7) # Create a constant array
print(c)

In [None]:
d = np.eye(2) # Create a 2x2 identity matrix
print(d)

In [None]:
e = np.random.random((2,2)) # Create an array filled with random values
print(e)

#### Array索引

In [None]:
import numpy as np

a = np.array([[1,2,3,4],[5,6,7,8],[9,10,11,12]])


b = a[:2, 1:3]
print(b)


In [None]:
print(a[0, 1]) # Prints "2"
b[0, 0] = 77 # b[0, 0] is the same piece of data as a[0, 1]
print(a[0, 1]) # Prints "77"

In [None]:
row_r1 = a[1, :] # Rank 1 view of the second row of a
row_r2 = a[1:2, :] # Rank 2 view of the second row of a
row_r3 = a[[1], :] # Rank 2 view of the second row of a
print(row_r1, row_r1.shape) # Prints "[5 6 7 8] (4,)"
print(row_r2, row_r2.shape) # Prints "[[5 6 7 8]] (1, 4)"
print(row_r3, row_r3.shape) # Prints "[[5 6 7 8]] (1, 4)"

In [None]:
col_r1 = a[:, 1]
col_r2 = a[:, 1:2]
print(col_r1, col_r1.shape) # Prints "[ 2  6 10] (3,)"
print(col_r2, col_r2.shape) # Prints "[[ 2]

使用整数索引访问数组中的元素

In [None]:
a = np.array([[1,2], [3, 4], [5, 6]])
print(a[[0, 1, 2], [0, 1, 0]]) # Prints "[1 4 5]"

print(np.array([a[0, 0], a[1, 1], a[2, 0]])) # Prints "[1 4 5]"

In [None]:
print(a[[0, 0], [1, 1]]) # Prints "[2 2]"
print(np.array([a[0, 1], a[0, 1]])) # Prints "[2 2]"

使用整数索引或转换来自另一个数组的元素

In [None]:
a = np.array([[1,2,3], [4,5,6], [7,8,9], [10,11,12]])
print(a) 

In [None]:
b = np.array([0, 2, 0, 1])
print(a[np.arange(4), b]) # Prints "[ 1  6  7 11]"

In [None]:
a[np.arange(4), b] += 10
print(a) # Prints "array([[11,  2,  3],

布尔值索引元素

In [None]:
import numpy as np
a = np.array([[1,2], [3, 4], [5, 6]])
bool_idx = (a > 2) # Find the elements of a that are bigger than 2;

print(bool_idx) # Prints "[[False False]


In [None]:
print(a[bool_idx]) # Prints "[3 4 5 6]"
print(a[a > 2]) # Prints "[3 4 5 6]"

#### Numpy数据类型

In [None]:
a = np.array([1, 2]) # Let numpy choose the datatype
b = np.array([1.0, 2.0]) # Let numpy choose the datatype
c = np.array([1, 2], dtype=np.int64) # Force a particular datatype

print(a.dtype, b.dtype, c.dtype)

#### 数组运算

In [None]:
a = np.array([[1,2],[3,4]], dtype=np.float64)
b = np.array([[5,6],[7,8]], dtype=np.float64)

print(a + b)
print(np.add(a, b))

In [None]:
print(a - b)
print(np.subtract(a, b))

In [None]:
print(a * b)
print(np.multiply(a, b))

In [None]:
print(a / b)
print(np.divide(a, b))

In [None]:
print(np.sqrt(a))


In [None]:
a = np.array([[1,2],[3,4]])
b = np.array([[5,6],[7,8]])

x = np.array([9,10])
y = np.array([11, 12])

print(x.dot(y))
print(np.dot(x, y))

In [None]:
print(x @ y)

In [None]:
print(a.dot(x))
print(np.dot(a, x))
print(a @ x)

In [None]:
print(a.dot(b))
print(np.dot(a, b))
print(a @ b)

#### Numpy函数运算

In [None]:
x = np.array([[1,2],[3,4]])

print(np.sum(x)) # Compute sum of all elements; prints "10"
print(np.sum(x, axis=0)) # Compute sum of each column; prints "[4 6]"
print(np.sum(x, axis=1)) # Compute sum of each row; prints "[3 7]"

In [None]:
print(x)
print("transpose\n", x.T)

In [None]:
y = np.array([[1,2,3]])
print(y)
print("transpose\n", y.T)

#### 广播
广播可以让我们处理不同形状的数组，对他们进行运算

In [None]:
x = np.array([[1,2,3],[4,5,6],[7,8,9],[10,11,12]])
v = np.array([1, 0, 1])
y = np.empty_like(x) # Create an empty matrix with the same shape as x

for i in range(4):
    y[i, :] = x[i, :] + v

print(y)

如果x非常大的话，这样操作就会十分耗时

In [None]:
vv = np.tile(v, (4, 1)) # Stack 4 copies of v on top of each other
print(vv)

In [None]:
y = x + vv # Add x and vv elementwise
print(y)

另一种方法：

In [None]:
import numpy as np

x = np.array([[1,2,3],[4,5,6],[7,8,9],[10,11,12]])
v = np.array([1, 0, 1])
y = x + v # Add v to each row of x using broadcasting
print(y)

y=x+v 尽管x的形状是(4,3)，v的形状是(3,)，但是v会被广播到x的每一行，因此输出的形状是(4,3)



In [None]:
v = np.array([1,2,3]) # v has shape (3,)
w = np.array([4,5]) # w has shape (2,)
print(np.reshape(v, (3, 1)) * w)

In [None]:
x = np.array([[1,2,3],[4,5,6]])
print(x + v)

In [None]:
print((x.T + w).T)

In [None]:
print(x + np.reshape(w, (2, 1)))


In [None]:
print(x * 2)

### 1.3 Matplotlib基础

In [None]:
import matplotlib.pyplot as plt

In [None]:
%matplotlib inline

In [None]:
x = np.arange(0, 3 * np.pi, 0.1)
y = np.sin(x)

plt.plot(x, y)

In [None]:
y_sin = np.sin(x)
y_cos = np.cos(x)

plt.plot(x, y_sin)
plt.plot(x, y_cos)
plt.xlabel('x axis label')
plt.ylabel('y axis label')
plt.title('Sine and Cosine')
plt.legend(['Sine', 'Cosine'])

In [None]:
x = np.arange(0, 3 * np.pi, 0.1)
y_sin = np.sin(x)
y_cos = np.cos(x)

plt.subplot(2, 1, 1)

plt.plot(x, y_sin)
plt.title('Sine')

plt.subplot(2, 1, 2)
plt.plot(x, y_cos)
plt.title('Cosine')

plt.show()

## 2. Pytorch基础
- Pytorch基础数据类型：张量、变量、自动求导
- 神经网络模块
- 神经网络训练流程

In [None]:
import torch
import torch.nn as nn

# Import pprint, module we use for making our print statements prettier
import pprint
pp = pprint.PrettyPrinter()

### 2.1 Pytorch基础数据类型

In [None]:
list_of_lists = [
  [1, 2, 3],
  [4, 5, 6],
]
print(list_of_lists)

In [None]:
# Initializing a tensor
data = torch.tensor([
                     [0, 1],    
                     [2, 3],
                     [4, 5]
                    ])
print(data)

每一个张量都有一种数据类型，最主要的两种是torch.float32和torch.int

In [None]:
# Initializing a tensor with an explicit data type
# Notice the dots after the numbers, which specify that they're floats
data = torch.tensor([
                     [0, 1],    
                     [2, 3],
                     [4, 5]
                    ], dtype=torch.float32)
print(data)

In [None]:
# Initializing a tensor with an explicit data type
# Notice the dots after the numbers, which specify that they're floats
data = torch.tensor([
                     [0.11111111, 1],    
                     [2, 3],
                     [4, 5]
                    ], dtype=torch.float32)
print(data)

In [None]:
# Initializing a tensor with an explicit data type
# Notice the dots after the numbers, which specify that they're floats
data = torch.tensor([
                     [0.11111111, 1],    
                     [2, 3],
                     [4, 5]
                    ])
print(data)

#### 常用函数

In [None]:
zeros = torch.zeros(2, 5)  # a tensor of all zeros
print(zeros) 

In [None]:
ones = torch.ones(3, 4)   # a tensor of all ones
print(ones)

In [None]:
rr = torch.arange(1, 10) # range from [1, 10) 
print(rr)

In [None]:
rr + 2

In [None]:
rr * 2

In [None]:
a = torch.tensor([[1, 2], [2, 3], [4, 5]])      # (3, 2)
b = torch.tensor([[1, 2, 3, 4], [5, 6, 7, 8]])  # (2, 4)  (3, 4)

print("A is", a)
print("B is", b)
print("The product is", a.matmul(b))
print("The other product is", a @ b) # +, -, *, @

In [None]:
v = torch.tensor([1, 2, 3])

In [None]:
v.shape

In [None]:
torch.tensor([[1, 2, 3], [4, 5, 6]]) @ v  #(2, 3) @ (3)  -> (2)

#### 矩阵运算

In [None]:
matr_2d = torch.tensor([[1, 2, 3], [4, 5, 6]])
print(matr_2d.shape)
print(matr_2d)

In [None]:
matr_3d = torch.tensor([[[1, 2, 3, 4], [-2, 5, 6, 9]], [[5, 6, 7, 2], [8, 9, 10, 4]], [[-3, 2, 2, 1], [4, 6, 5, 9]]])
print(matr_3d)
print(matr_3d.shape)

In [None]:
rr = torch.arange(1, 16)
print("The shape is currently", rr.shape)
print("The contents are currently", rr)
print()
rr = rr.view(5, 3)
print("After reshaping, the shape is currently", rr.shape)
print("The contents are currently", rr)

#### numpy与tensor的转换

In [None]:
import numpy as np

# numpy.ndarray --> torch.Tensor:
arr = np.array([[1, 0, 5]])
data = torch.tensor(arr)
print("This is a torch.tensor", data)

# torch.Tensor --> numpy.ndarray:
new_arr = data.numpy()
print("This is a np.ndarray", new_arr)

#### 并行操作

In [None]:
data = torch.arange(1, 36, dtype=torch.float32).reshape(5, 7)
print("Data is:", data)

# We can perform operations like *sum* over each row...
print("Taking the sum over columns:")
print(data.sum(dim=0))

# or over each column.
print("Taking thep sum over rows:")
print(data.sum(dim=1))

# Other operations are available:
print("Taking the stdev over rows:")
print(data.std(dim=1))

In [None]:
data.sum()

In [None]:
data = torch.tensor([[1, 2.2, 9.6], [4, -7.2, 6.3]])

row_avg = data.mean(dim=1)
col_avg = data.mean(dim=0)

print(row_avg.shape)
print(row_avg)

print(col_avg.shape)
print(col_avg)

#### 索引

In [None]:
# Initialize an example tensor
x = torch.Tensor([
                  [[1, 2], [3, 4]],
                  [[5, 6], [7, 8]], 
                  [[9, 10], [11, 12]] 
                 ])
x

In [None]:
x.shape

In [None]:
# Access the 0th element, which is the first row
x[0] # Equivalent to x[0, :]

In [None]:
x[:, 0]

In [None]:
matr = torch.arange(1, 16).view(5, 3)
print(matr)

In [None]:
matr[0]

In [None]:
matr[0, :]

In [None]:
matr[:, 0]

In [None]:
matr[0:3]

In [None]:
matr[:, 0:2]

In [None]:
matr[0:3, 0:2]

In [None]:
matr[0][2]

In [None]:
matr[0:3, 2]

In [None]:
matr[0:3][2]

In [None]:
matr[0:3]

In [None]:
matr[0,2,4]

In [None]:
# Get the top left element of each element in our tensor
x[:, 0, 0]

In [None]:
x[:, :, :]

In [None]:
# Print x again to see our tensor
x

In [None]:
# Let's access the 0th and 1st elements, each twice
i = torch.tensor([0, 0, 1, 1])
x[i]

In [None]:
# Let's access the 0th elements of the 1st and 2nd elements
i = torch.tensor([1, 2])
j = torch.tensor([0])
x[i, j]

In [None]:
x[0, 0, 0]

In [None]:
x[0, 0, 0].item()

#### 自动求导

In [None]:
# Create an example tensor
# requires_grad parameter tells PyTorch to store gradients
x = torch.tensor([2.], requires_grad=True)

# Print the gradient if it is calculated
# Currently None since x is a scalar
pp.pprint(x.grad)

In [None]:
# Calculating the gradient of y with respect to x
y = x * x * 3 # 3x^2
y.backward()
pp.pprint(x.grad) # d(y)/d(x) = d(3x^2)/d(x) = 6x = 12

In [None]:
z = x * x * 3 # 3x^2
z.backward()
pp.pprint(x.grad)

#### 神经网络模块

In [None]:
import torch.nn as nn

#### 线性层

我们使用nn.Linear(H_in, H_out)来创建一个线性层，其中H_in是输入的维度，H_out是输出的维度. 另一种表示方法(N, *, H_in) -> (N, *, H_out)，其中N是batch size，*表示任意数量的附加维度。线性层执行以下操作：y=Ax+b，其中A是权重矩阵，b是偏置向量。如果不需要偏置，则可以设置bias=False。

In [None]:
# Create the inputs
input = torch.ones(2,3,4)
# N* H_in -> N*H_out


# Make a linear layers transforming N,*,H_in dimensinal inputs to N,*,H_out
# dimensional outputs
linear = nn.Linear(4, 2)
linear_output = linear(input)
linear_output

In [None]:
list(linear.parameters()) # Ax + b

#### 激活函数

激活函数是神经网络中非常重要的一部分，它们可以将线性层的输出转换为非线性的输出，从而使神经网络具有强大的表达能力。PyTorch中提供了很多常用的激活函数，如ReLU、Sigmoid、Tanh、Softmax等。

In [None]:
linear_output

In [None]:
sigmoid = nn.Sigmoid()
output = sigmoid(linear_output)
output

#### 整合不同的层

In [None]:
block = nn.Sequential(
    nn.Linear(4, 2),
    nn.Sigmoid()
)

input = torch.ones(2,3,4)
output = block(input)
output

#### 自定义模块

除了使用PyTorch提供的模块，我们还可以自定义模块。自定义模块需要继承nn.Module类，并实现它的forward函数，forward函数定义了模块的前向传播过程。

要自定义模块，第一件事情要做的就是拓展nn.Module类，然后我们使用__init__初始化所需要的参数，然后在forward函数中定义前向传播的过程。

In [None]:
class MultilayerPerceptron(nn.Module):

  def __init__(self, input_size, hidden_size):
    # Call to the __init__ function of the super class
    super(MultilayerPerceptron, self).__init__()

    # Bookkeeping: Saving the initialization parameters
    self.input_size = input_size 
    self.hidden_size = hidden_size 

    # Defining of our model
    # There isn't anything specific about the naming of `self.model`. It could
    # be something arbitrary.
    self.model = nn.Sequential(
        nn.Linear(self.input_size, self.hidden_size),
        nn.ReLU(),
        nn.Linear(self.hidden_size, self.input_size),
        nn.Sigmoid()
    )
    
  def forward(self, x):
    output = self.model(x)
    return output

我们可以不使用nn.Sequential来定义，而是使用单个的函数来定义不同的层

In [None]:
class MultilayerPerceptron(nn.Module):

  def __init__(self, input_size, hidden_size):
    # Call to the __init__ function of the super class
    super(MultilayerPerceptron, self).__init__()

    # Bookkeeping: Saving the initialization parameters
    self.input_size = input_size 
    self.hidden_size = hidden_size 

    # Defining of our layers
    self.linear = nn.Linear(self.input_size, self.hidden_size)
    self.relu = nn.ReLU()
    self.linear2 = nn.Linear(self.hidden_size, self.input_size)
    self.sigmoid = nn.Sigmoid()
    
  def forward(self, x):
    linear = self.linear(x)
    relu = self.relu(linear)
    linear2 = self.linear2(relu)
    output = self.sigmoid(linear2)
    return output

In [None]:
# Make a sample input
input = torch.randn(2, 5)

# Create our model
model = MultilayerPerceptron(5, 3)

# Pass our input through our model
model(input)

使用named_parameters()函数可以获取模块中所有参数的名称和值

In [None]:
list(model.named_parameters())

#### 优化

我们使用backward()函数来计算梯度,但是这还不够，我还需要知道如何来更新参数，这就需要使用优化器了。常见的优化器有SGD、Adam、RMSProp等。当我们初始化优化器之后，我们将模型的参数传递给优化器，然后使用优化器的step()函数来更新参数。

In [None]:
import torch.optim as optim

在有了优化器之后，我们能够定义一种损失函数，然后使用优化器来更新参数，这样就能够训练模型了。常见的损失函数有均方误差、交叉熵等。

In [None]:
# Create the y data
y = torch.ones(10, 5)

# Add some noise to our goal y to generate our x
# We want out model to predict our original data, albeit the noise
x = y + torch.randn_like(y)
x

In [None]:
# Instantiate the model
model = MultilayerPerceptron(5, 3)

# Define the optimizer
adam = optim.Adam(model.parameters(), lr=1e-1)

# Define loss using a predefined loss function
loss_function = nn.BCELoss()

# Calculate how our model is doing now
y_pred = model(x)
loss_function(y_pred, y).item()

In [None]:
# Set the number of epoch, which determines the number of training iterations
n_epoch = 10 

for epoch in range(n_epoch):
  # Set the gradients to 0
  adam.zero_grad()

  # Get the model predictions
  y_pred = model(x)

  # Get the loss
  loss = loss_function(y_pred, y)

  # Print stats
  print(f"Epoch {epoch}: traing loss: {loss}")

  # Compute the gradients
  loss.backward()

  # Take a step to optimize the weights
  adam.step()

In [None]:
list(model.parameters())

In [None]:
# See how our model performs on the training data
y_pred = model(x)
y_pred

In [None]:
# Create test data and check how our model performs on it
x2 = y + torch.randn_like(y)
y_pred = model(x2)
y_pred