### 1. 测试安装

In [1]:
import torch

In [2]:
torch.__version__

'2.7.0+cu128'

In [3]:
! nvidia-smi

Sun Apr 27 15:47:44 2025       
+-----------------------------------------------------------------------------------------+
| NVIDIA-SMI 576.02                 Driver Version: 576.02         CUDA Version: 12.9     |
|-----------------------------------------+------------------------+----------------------+
| GPU  Name                  Driver-Model | Bus-Id          Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |           Memory-Usage | GPU-Util  Compute M. |
|                                         |                        |               MIG M. |
|   0  NVIDIA GeForce RTX 3080 Ti   WDDM  |   00000000:0A:00.0 Off |                  N/A |
|  0%   43C    P8             47W /  400W |    2599MiB /  12288MiB |      0%      Default |
|                                         |                        |                  N/A |
+-----------------------------------------+------------------------+----------------------+
                                                

In [4]:
# cuda == GPU
torch.cuda.is_available()

True

In [5]:
torch.cuda.get_device_name()

'NVIDIA GeForce RTX 3080 Ti'

### 2. 张量 基本计算

In [6]:
import random
import numpy as np
import torch

In [7]:
np_score = [np.random.randint(low=0, high=101, size=(2,3)) for s in range(30)]
# print(np_score)

print(np.random.random())

0.8484005514665999


In [8]:
# 生成30个0到100之间的随机整数
scores = [random.randint(a=0, b=100) for i in range(30)]

In [9]:
scores

[78,
 67,
 28,
 50,
 54,
 74,
 72,
 83,
 13,
 21,
 89,
 79,
 44,
 38,
 26,
 8,
 87,
 27,
 54,
 95,
 88,
 100,
 2,
 60,
 30,
 62,
 24,
 23,
 77,
 93]

In [10]:
# 转为numpy
arr = np.array(scores)

In [11]:
type(arr)

numpy.ndarray

In [12]:
# 转为张量，也就是tensor
t = torch.tensor(data=scores, dtype=torch.float32)

In [13]:
print(f'类型：{type(t)}\n数据:{t}')

类型：<class 'torch.Tensor'>
数据:tensor([ 78.,  67.,  28.,  50.,  54.,  74.,  72.,  83.,  13.,  21.,  89.,  79.,
         44.,  38.,  26.,   8.,  87.,  27.,  54.,  95.,  88., 100.,   2.,  60.,
         30.,  62.,  24.,  23.,  77.,  93.])


In [14]:
print(f'类型：{type(arr)}\n数据:{arr}')

类型：<class 'numpy.ndarray'>
数据:[ 78  67  28  50  54  74  72  83  13  21  89  79  44  38  26   8  87  27
  54  95  88 100   2  60  30  62  24  23  77  93]


In [15]:
print(f'类型：{type(scores)}\n数据:{scores}')

类型：<class 'list'>
数据:[78, 67, 28, 50, 54, 74, 72, 83, 13, 21, 89, 79, 44, 38, 26, 8, 87, 27, 54, 95, 88, 100, 2, 60, 30, 62, 24, 23, 77, 93]


In [16]:
arr.min()

np.int64(2)

In [17]:
t.min()

tensor(2.)

In [18]:
arr.max()

np.int64(100)

In [19]:
t.max()

tensor(100.)

In [20]:
arr.mean()

np.float64(54.86666666666667)

In [21]:
t.mean()

tensor(54.8667)

In [22]:
arr.sum()

np.int64(1646)

In [23]:
t.sum()

tensor(1646.)

In [24]:
# NumPy 方差 默认求的是 样本方差
print(arr.var())
# NumPy 求 总体方差
print(arr.var(ddof=1))

830.5822222222222
859.2229885057471


In [25]:
# PyTorch 方差 默认求的是 总体方差
print(t.var())
# PyTorch 求 样本方差
print(t.var(correction=0))

tensor(859.2230)
tensor(830.5822)


In [26]:
# 元素数量
arr.size

30

In [27]:
# 元素数量
t.numel()

30

In [28]:
# 查看维度
arr.ndim

1

In [29]:
# 查看维度
t.ndim

1

In [30]:
np.arange(12).reshape(3, 4)

array([[ 0,  1,  2,  3],
       [ 4,  5,  6,  7],
       [ 8,  9, 10, 11]])

In [31]:
torch.arange(12).reshape(3, 4)

tensor([[ 0,  1,  2,  3],
        [ 4,  5,  6,  7],
        [ 8,  9, 10, 11]])

In [32]:
np.ones(shape=(2, 3))

array([[1., 1., 1.],
       [1., 1., 1.]])

In [33]:
torch.ones(2, 3)

tensor([[1., 1., 1.],
        [1., 1., 1.]])

In [34]:
np.linalg.norm(arr)

np.float64(339.45250035903405)

In [35]:
torch.linalg.norm(t)

tensor(339.4525)

In [36]:
np.linspace(start=-5, stop=5, num=100)

array([-5.        , -4.8989899 , -4.7979798 , -4.6969697 , -4.5959596 ,
       -4.49494949, -4.39393939, -4.29292929, -4.19191919, -4.09090909,
       -3.98989899, -3.88888889, -3.78787879, -3.68686869, -3.58585859,
       -3.48484848, -3.38383838, -3.28282828, -3.18181818, -3.08080808,
       -2.97979798, -2.87878788, -2.77777778, -2.67676768, -2.57575758,
       -2.47474747, -2.37373737, -2.27272727, -2.17171717, -2.07070707,
       -1.96969697, -1.86868687, -1.76767677, -1.66666667, -1.56565657,
       -1.46464646, -1.36363636, -1.26262626, -1.16161616, -1.06060606,
       -0.95959596, -0.85858586, -0.75757576, -0.65656566, -0.55555556,
       -0.45454545, -0.35353535, -0.25252525, -0.15151515, -0.05050505,
        0.05050505,  0.15151515,  0.25252525,  0.35353535,  0.45454545,
        0.55555556,  0.65656566,  0.75757576,  0.85858586,  0.95959596,
        1.06060606,  1.16161616,  1.26262626,  1.36363636,  1.46464646,
        1.56565657,  1.66666667,  1.76767677,  1.86868687,  1.96

In [37]:
torch.linspace(start=-5, end=5, steps=100)

tensor([-5.0000, -4.8990, -4.7980, -4.6970, -4.5960, -4.4949, -4.3939, -4.2929,
        -4.1919, -4.0909, -3.9899, -3.8889, -3.7879, -3.6869, -3.5859, -3.4848,
        -3.3838, -3.2828, -3.1818, -3.0808, -2.9798, -2.8788, -2.7778, -2.6768,
        -2.5758, -2.4747, -2.3737, -2.2727, -2.1717, -2.0707, -1.9697, -1.8687,
        -1.7677, -1.6667, -1.5657, -1.4646, -1.3636, -1.2626, -1.1616, -1.0606,
        -0.9596, -0.8586, -0.7576, -0.6566, -0.5556, -0.4545, -0.3535, -0.2525,
        -0.1515, -0.0505,  0.0505,  0.1515,  0.2525,  0.3535,  0.4545,  0.5556,
         0.6566,  0.7576,  0.8586,  0.9596,  1.0606,  1.1616,  1.2626,  1.3636,
         1.4646,  1.5657,  1.6667,  1.7677,  1.8687,  1.9697,  2.0707,  2.1717,
         2.2727,  2.3737,  2.4747,  2.5758,  2.6768,  2.7778,  2.8788,  2.9798,
         3.0808,  3.1818,  3.2828,  3.3838,  3.4848,  3.5859,  3.6869,  3.7879,
         3.8889,  3.9899,  4.0909,  4.1919,  4.2929,  4.3939,  4.4949,  4.5960,
         4.6970,  4.7980,  4.8990,  5.00

### 3. GPU的使用

In [38]:
device = "cuda" if torch.cuda.is_available() else "cpu"

In [39]:
device

'cuda'

In [40]:
# 此时使用的是CPU，而不是cuda
t1 = torch.randn(2, 3)

In [41]:
# 若想使用cuda的话可以这么写
t2 = torch.randn(2, 3, device=device)

In [42]:
t1

tensor([[ 1.9038, -0.0402, -0.5958],
        [-0.5826,  0.5806,  0.4772]])

In [43]:
t2

tensor([[ 0.3096,  1.9067,  0.9746],
        [-0.5728,  0.1495, -1.0826]], device='cuda:0')

In [44]:
# 如果想做计算， 计算所需的数据必须放在同一个设备中，下面这种就由于数据不在一处所以无法运算
# t1 + t2

In [45]:
# 把t1放到cuda里再进行运算
t1.to(device=device) + t2

tensor([[ 2.2133,  1.8664,  0.3789],
        [-1.1554,  0.7301, -0.6054]], device='cuda:0')

In [46]:
# 把t2放到cpu里再进行运算
t2.cpu() + t1

tensor([[ 2.2133,  1.8664,  0.3789],
        [-1.1554,  0.7301, -0.6054]])

In [47]:
# 也可以这样把数据放到cuda里，但要确保设备上一定有显卡，所以这种方式有风险
t1.cuda() + t2

tensor([[ 2.2133,  1.8664,  0.3789],
        [-1.1554,  0.7301, -0.6054]], device='cuda:0')

### 4. 【重点】自动求导

$ y = x^2$

In [48]:
def fn(x):
    """
        原函数
    """
    return x ** 2

In [49]:
def dfn(x):
    """
        导函数
    """
    return 2 * x

In [50]:
# x1数学意义上是个常量
x1 = torch.randn(1)

In [51]:
x1

tensor([0.1014])

In [52]:
# x2数学数学意义上是个变量，把一个常量变成变量的方法就是加上requires_grad=True
# 然后打印出来的话，前面有个数，后面显示requires_grad=True； 这时前面的数字是该变量的 当前值
x2 = torch.randn(1, requires_grad=True)

In [53]:
x2

tensor([1.8850], requires_grad=True)

In [54]:
# 所以一个变量的当前值 和 一个变量的梯度值如下
# 梯度值也就是它的导数值，现在还没求导所以梯度为空，求完导就有值了
print(f'x2的当前值是：{x2.data}， x2的梯度值是：{x2.grad}')

x2的当前值是：tensor([1.8850])， x2的梯度值是：None


In [55]:
# x2是自变量，在上面已经被标记为需要求导；那么y是因变量
y = x2 ** 2

In [56]:
# 下面输出有grad_fn 意思是它是因变量
# 并且告诉了我们： 该因变量是由怎样的计算得来的，此处的例子是PowBackward0->意识是通过幂运算得来的
y

tensor([3.5530], grad_fn=<PowBackward0>)

In [57]:
# 求导
y.backward()

In [58]:
# 求导后梯度就不再是None了
# 梯度，这里的机制：梯度会累加，所以如果需要清空就用下面的函数
x2.grad

tensor([3.7699])

In [59]:
# 清空梯度
x2.grad.zero_()

tensor([0.])

In [60]:
x2.grad

tensor([0.])

In [61]:
x3 = torch.randn(2, requires_grad=True)
y = x3 ** 3 / 2
print(y)

tensor([-9.0146, -3.9253], grad_fn=<DivBackward0>)


### 5. 使用PyTorch的自动求导实现梯度下降法

In [62]:
def model(x):
    """
        定义 x ** 2的处理逻辑
    """
    return x ** 2
    
steps = 10000
# 1e-2 等于 ($1 \times 10^{-2}$)，即 0.01
learning_rate = 1e-3

In [63]:
# rough start is the best start!
# 随机生成一个数学意义上的 变量
x = torch.randint(low=-1000, high=1001, size=(1,), dtype=torch.float32, requires_grad=True)

In [64]:
print(f'x的初始值为：{x}')
for step in range(steps):
    # 1, 做反向传播的前提是，先做正向传播. 所以先正向传播，正向计算
    y = model(x)
    # 2, 反向传播，求y对x的偏导数
    y.backward()
    # 3, 梯度下降，经过第二步后grad有值了。梯度下降法： 当前值 = 当前值 - 梯度值*学习率
    x.data -= learning_rate * x.grad
    # 4, 清空梯度
    x.grad.zero_()
    # print(f"优化了{step+1}步，x为：{x.data}")
print(f"x最终值为：{x}")

x的初始值为：tensor([-506.], requires_grad=True)
x最终值为：tensor([-1.0223e-06], requires_grad=True)
