# Foundations

## Import Packages

In [1]:
import torch
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
print(torch.__version__)

2.3.1+cpu


In [2]:
# 设置代理, 如果要设置代理, 注释掉del行; 如果不需要设置代理, 直接无视就行
import os
proxy = 'socks5://127.0.0.1:5353'
os.environ['http_proxy'] = proxy
os.environ['HTTP_PROXY'] = proxy
os.environ['https_proxy'] = proxy
os.environ['HTTPS_PROXY'] = proxy
del os.environ['http_proxy']
del os.environ['https_proxy']

## Tensor

### Creation

#### tensor function

PyTorch的tensors可以通过 `torch.tensor()`创建.

In [3]:
# Scalar
scalar = torch.tensor(7)
scalar

tensor(7)

In [4]:
scalar.ndim

0

In [5]:
scalar.shape

torch.Size([])

In [6]:
# Get tensor back as Python int
scalar.item()

7

In [7]:
# Vector
vector = torch.tensor([7, 7])
vector

tensor([7, 7])

In [8]:
vector.ndim

1

In [9]:
vector.shape

torch.Size([2])

In [10]:
# MATRIX
MATRIX = torch.tensor([[7, 8], [9, 10]])
MATRIX

tensor([[ 7,  8],
        [ 9, 10]])

In [11]:
MATRIX.ndim

2

In [12]:
MATRIX[0]

tensor([7, 8])

In [13]:
MATRIX[1]

tensor([ 9, 10])

In [14]:
MATRIX.shape

torch.Size([2, 2])

In [15]:
# Tensor

TENSOR = torch.tensor([[[1, 2, 3], [3, 6, 9], [2, 4, 5]]])
TENSOR

tensor([[[1, 2, 3],
         [3, 6, 9],
         [2, 4, 5]]])

In [16]:
TENSOR.ndim

3

In [17]:
TENSOR.shape

torch.Size([1, 3, 3])

In [18]:
TENSOR[0]

tensor([[1, 2, 3],
        [3, 6, 9],
        [2, 4, 5]])

#### rand function

为什么是随机张量? 随机张量之所以重要, 是因为许多神经网络的学习方式是从充满随机数的张量开始, 并调整这些随机数以更好地表示数据.

<div class="admonition tip">
    <p class="admonition-title">Tip</p>
    <p style="margin: 10px;">
        这个过程可以表达如下: 从随机数开始 -> 查看数据 -> 更新随机数 -> 查看数据 -> 更新随机数.
    </p>
</div>

In [19]:
# Create a random tensor of size (3, 4)
random_tensor = torch.rand(3, 4)
random_tensor

tensor([[0.7843, 0.9212, 0.9939, 0.2277],
        [0.3122, 0.2953, 0.8918, 0.2394],
        [0.4509, 0.3599, 0.9998, 0.7432]])

In [20]:
random_tensor.dtype

torch.float32

In [21]:
# Create a random tensor with similar shape to run an image tensor
random_image_size_tensor = torch.rand(size=(244, 244, 3)) # Height, width, color channel (R, G, B)
random_image_size_tensor.shape, random_image_size_tensor.ndim

(torch.Size([244, 244, 3]), 3)

In [22]:
torch.rand(size=(3, 3))

tensor([[0.9137, 0.7984, 0.6131],
        [0.7220, 0.3540, 0.2633],
        [0.7329, 0.4907, 0.0528]])

#### zeros/ones function

In [23]:
# Create a tensor of all zeros
zeros = torch.zeros(size=(3, 4))
zeros

tensor([[0., 0., 0., 0.],
        [0., 0., 0., 0.],
        [0., 0., 0., 0.]])

In [24]:
zeros*random_tensor

tensor([[0., 0., 0., 0.],
        [0., 0., 0., 0.],
        [0., 0., 0., 0.]])

In [25]:
# Create a tensor of all ones
ones = torch.ones(size=(3, 4))
ones

tensor([[1., 1., 1., 1.],
        [1., 1., 1., 1.],
        [1., 1., 1., 1.]])

In [26]:
ones.dtype

torch.float32

#### arange function

In [27]:
# Use torch.arange()
one_to_ten = torch.arange(start=0, end=11, step=1)
one_to_ten

tensor([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10])

#### zeros_like function

用于创建和`input`相同形状的零tensor

In [28]:
# Creating tensor zeros like
ten_zeros = torch.zeros_like(input=one_to_ten)
ten_zeros

tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0])

#### ones_like function

用于创建和`input`相同形状的一tensor.

In [29]:
# Creating tensor ones like
ten_ones = torch.ones_like(input=one_to_ten)
ten_ones

tensor([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1])

### Datatype

<div class="admonition note">
    <p class="admonition-title">笔记</p>
    <p style="margin: 10px">
        Tensor的数据类型是学习PyTorch和深度学习中的三大难点之一:
            <ul>
                <li> Tensor的数据累不正确
                <li> Tensor的形状不正确
                <li> Tensor的设备不正确
            </ul>
    </p>
</div>

In [30]:
# Float 32 tensor
float_32_tensor = torch.tensor([3.0, 6.0, 9.0], 
                               dtype=None, # Tensor的数据类型是什么
                               device="cpu", # Tensor运行的设备类型是什么
                               requires_grad=False # 是否跟踪该Tensor在后续操作中的梯度
                               )
float_32_tensor, float_32_tensor.dtype

(tensor([3., 6., 9.]), torch.float32)

In [31]:
float_16_tensor = float_32_tensor.type(torch.float16)
float_16_tensor

tensor([3., 6., 9.], dtype=torch.float16)

In [32]:
float_16_tensor*float_32_tensor

tensor([ 9., 36., 81.])

In [33]:
int_32_tensor = torch.tensor([3, 6, 9], dtype=torch.int32)
int_32_tensor

tensor([3, 6, 9], dtype=torch.int32)

In [34]:
float_32_tensor*int_32_tensor

tensor([ 9., 36., 81.])

### Information Acquisition

In [35]:
random_tensor = torch.rand(3, 4)
# 获取数据类型信息, 获取形状信息, 获取设备信息
random_tensor.dtype, random_tensor.shape, random_tensor.device

(torch.float32, torch.Size([3, 4]), device(type='cpu'))

### Manipulation

Tensor操作包括:

- 加法
- 减法
- 乘法
- 除法
- 点积

In [36]:
tensor = torch.tensor([1, 2, 3])

#### Addition

In [37]:
tensor + 10, torch.add(tensor, 10)

(tensor([11, 12, 13]), tensor([11, 12, 13]))

#### Subtraction

In [38]:
tensor - 10

tensor([-9, -8, -7])

#### Multiplication

In [39]:
tensor * 10, torch.mul(tensor, 10)

(tensor([10, 20, 30]), tensor([10, 20, 30]))

#### Division

In [40]:
tensor / 10

tensor([0.1000, 0.2000, 0.3000])

#### Dot Product

In [41]:
tensor_new = torch.tensor([2, 3, 4])
tensor_new @ tensor, torch.matmul(tensor, tensor_new)

(tensor(20), tensor(20))

PyTorch实现的点积较手动用for循环实现的快了不少:

In [42]:
%%time
value = 0
for i in range(3): 
    value += tensor[i] * tensor_new[i]
print(value)

tensor(20)
CPU times: total: 0 ns
Wall time: 0 ns


In [43]:
%%time
tensor_new @ tensor

CPU times: total: 0 ns
Wall time: 0 ns


tensor(20)

In [44]:
tensor_A = torch.tensor([[1, 2], [3, 4], [5, 6]])
tensor_B = torch.tensor([[4, 5, 6], [7, 8, 9]])
tensor_A @ tensor_B

tensor([[18, 21, 24],
        [40, 47, 54],
        [62, 73, 84]])

In [45]:
%%capture
tensor_A = torch.tensor([[1, 2], [3, 4], [5, 6]])
tensor_B = torch.tensor([[7, 10], [8, 11], [9, 12]])
tensor_A @ tensor_B

RuntimeError: mat1 and mat2 shapes cannot be multiplied (3x2 and 3x2)

In [46]:
tensor_A = torch.tensor([[1, 2], [3, 4], [5, 6]])
tensor_B = torch.tensor([[7, 10], [8, 11], [9, 12]]).T
tensor_A @ tensor_B

tensor([[ 27,  30,  33],
        [ 61,  68,  75],
        [ 95, 106, 117]])

#### Aggregation

In [47]:
x = torch.arange(0, 100, 10)
x

tensor([ 0, 10, 20, 30, 40, 50, 60, 70, 80, 90])

In [48]:
torch.min(x), x.min()

(tensor(0), tensor(0))

In [49]:
torch.max(x), x.max()

(tensor(90), tensor(90))

In [50]:
torch.mean(x.type(torch.float32)), x.type(torch.float32).mean()

(tensor(45.), tensor(45.))

In [51]:
torch.sum(x), x.sum()

(tensor(450), tensor(450))

In [52]:
torch.argmin(x)

tensor(0)

In [53]:
torch.argmax(x)

tensor(9)

#### Reshaping

In [54]:
x = torch.arange(1, 10)
x, x.shape

(tensor([1, 2, 3, 4, 5, 6, 7, 8, 9]), torch.Size([9]))

In [106]:
%%capture
x_reshaped = x.reshape(1, 7)
x_reshaped, x_reshaped.shape

RuntimeError: shape '[1, 7]' is invalid for input of size 9

In [56]:
x_reshaped_1 = x.reshape(1, 9)
x_reshaped_2 = x.reshape(9, 1)
x_reshaped_1, x_reshaped_2, x_reshaped_1.ndim, x_reshaped_2.ndim, x.ndim

(tensor([[1, 2, 3, 4, 5, 6, 7, 8, 9]]),
 tensor([[1],
         [2],
         [3],
         [4],
         [5],
         [6],
         [7],
         [8],
         [9]]),
 2,
 2,
 1)

In [57]:
x_reshaped_3 = x.reshape(3, 3)
x_reshaped_3, x_reshaped_3.ndim

(tensor([[1, 2, 3],
         [4, 5, 6],
         [7, 8, 9]]),
 2)

#### Stacking

In [58]:
x = torch.arange(1, 10)
x_stacked = torch.stack([x, x, x, x, x])
x_stacked

tensor([[1, 2, 3, 4, 5, 6, 7, 8, 9],
        [1, 2, 3, 4, 5, 6, 7, 8, 9],
        [1, 2, 3, 4, 5, 6, 7, 8, 9],
        [1, 2, 3, 4, 5, 6, 7, 8, 9],
        [1, 2, 3, 4, 5, 6, 7, 8, 9]])

In [59]:
x_stacked_1 = torch.stack([x, x, x, x, x], dim=0)
x_stacked_1

tensor([[1, 2, 3, 4, 5, 6, 7, 8, 9],
        [1, 2, 3, 4, 5, 6, 7, 8, 9],
        [1, 2, 3, 4, 5, 6, 7, 8, 9],
        [1, 2, 3, 4, 5, 6, 7, 8, 9],
        [1, 2, 3, 4, 5, 6, 7, 8, 9]])

In [60]:
x_stacked_2 = torch.stack([x, x, x, x, x], dim=1)
x_stacked_2

tensor([[1, 1, 1, 1, 1],
        [2, 2, 2, 2, 2],
        [3, 3, 3, 3, 3],
        [4, 4, 4, 4, 4],
        [5, 5, 5, 5, 5],
        [6, 6, 6, 6, 6],
        [7, 7, 7, 7, 7],
        [8, 8, 8, 8, 8],
        [9, 9, 9, 9, 9]])

#### Squeezing

把所有的形状为1为维度都给移除掉.

In [61]:
x = torch.arange(1, 10)
x_reshaped = x.reshape(1, 9)
x_reshaped, x_reshaped.squeeze(), x_reshaped.squeeze().shape

(tensor([[1, 2, 3, 4, 5, 6, 7, 8, 9]]),
 tensor([1, 2, 3, 4, 5, 6, 7, 8, 9]),
 torch.Size([9]))

#### Unsqueezing

在原来的基础上在某一个位置添加一个维度.

In [62]:
x = torch.arange(1, 10)
x_reshaped = x.reshape(1, 9)
x_squeezed = x_reshaped.squeeze()
x_unsqueezed = x_squeezed.unsqueeze(dim=0)
x_squeezed, x_unsqueezed, x_squeezed.shape, x_unsqueezed.shape

(tensor([1, 2, 3, 4, 5, 6, 7, 8, 9]),
 tensor([[1, 2, 3, 4, 5, 6, 7, 8, 9]]),
 torch.Size([9]),
 torch.Size([1, 9]))

In [63]:
x = torch.arange(1, 10)
x_reshaped = x.reshape(1, 9)
x_squeezed = x_reshaped.squeeze()
x_unsqueezed = x_squeezed.unsqueeze(dim=1)
x_squeezed, x_unsqueezed, x_squeezed.shape, x_unsqueezed.shape

(tensor([1, 2, 3, 4, 5, 6, 7, 8, 9]),
 tensor([[1],
         [2],
         [3],
         [4],
         [5],
         [6],
         [7],
         [8],
         [9]]),
 torch.Size([9]),
 torch.Size([9, 1]))

#### Permutation

更改维度的排列已改变形状, 返回的是一个视图.

In [64]:
x = torch.randn(2, 3, 5)
torch.permute(x, (2, 0, 1)).shape # 表示新的形状组成为(原先的第2个维度, 原先的第0个维度, 原先的第1个维度)

torch.Size([5, 2, 3])

In [65]:
x_original = torch.rand(size=(224, 224, 3))
x_permuted = x_original.permute(2, 0, 1)
x_permuted, x_permuted.shape

(tensor([[[0.8574, 0.5607, 0.4884,  ..., 0.2249, 0.5719, 0.6389],
          [0.7270, 0.4327, 0.4216,  ..., 0.8345, 0.6448, 0.5187],
          [0.4591, 0.3215, 0.0952,  ..., 0.4656, 0.0574, 0.6755],
          ...,
          [0.2701, 0.8881, 0.4006,  ..., 0.2123, 0.0742, 0.6088],
          [0.7969, 0.5231, 0.6343,  ..., 0.2638, 0.3222, 0.6112],
          [0.9593, 0.9938, 0.0082,  ..., 0.6594, 0.2939, 0.7064]],
 
         [[0.3805, 0.1643, 0.4555,  ..., 0.5069, 0.0914, 0.8792],
          [0.7642, 0.0624, 0.7394,  ..., 0.1545, 0.2779, 0.1723],
          [0.3881, 0.8794, 0.1924,  ..., 0.3050, 0.7912, 0.6361],
          ...,
          [0.6704, 0.7251, 0.9654,  ..., 0.8564, 0.3860, 0.7505],
          [0.9055, 0.9941, 0.5398,  ..., 0.5803, 0.6704, 0.8699],
          [0.5215, 0.9100, 0.7403,  ..., 0.3473, 0.7780, 0.3613]],
 
         [[0.1207, 0.9424, 0.1988,  ..., 0.9135, 0.6066, 0.5401],
          [0.5001, 0.3245, 0.4041,  ..., 0.2208, 0.5118, 0.8222],
          [0.4376, 0.8177, 0.3766,  ...,

### View

In [66]:
x = torch.arange(1, 10)
z = x.view(1, 9)
z, z.shape

(tensor([[1, 2, 3, 4, 5, 6, 7, 8, 9]]), torch.Size([1, 9]))

In [67]:
z[:, 0] = 10000000
z, x

(tensor([[10000000,        2,        3,        4,        5,        6,        7,
                 8,        9]]),
 tensor([10000000,        2,        3,        4,        5,        6,        7,
                8,        9]))

### Indexing

请参考NumPy的索引, 两者一摸一样.

In [68]:
x = torch.arange(1, 10).reshape(1, 3, 3)
x, x.shape

(tensor([[[1, 2, 3],
          [4, 5, 6],
          [7, 8, 9]]]),
 torch.Size([1, 3, 3]))

In [69]:
x[0]

tensor([[1, 2, 3],
        [4, 5, 6],
        [7, 8, 9]])

In [70]:
x[:, 0]

tensor([[1, 2, 3]])

In [71]:
x[:, :, 0]

tensor([[1, 4, 7]])

In [72]:
x[0][0][0]

tensor(1)

In [73]:
x[0, :, :]

tensor([[1, 2, 3],
        [4, 5, 6],
        [7, 8, 9]])

In [74]:
x[:, :, 2]

tensor([[3, 6, 9]])

In [75]:
x[0, 2, 2]

tensor(9)

In [76]:
x[:, 0:1]

tensor([[[1, 2, 3]]])

In [77]:
x[:, 0:2]

tensor([[[1, 2, 3],
         [4, 5, 6]]])

In [78]:
x[0:1, 0:2]

tensor([[[1, 2, 3],
         [4, 5, 6]]])

In [79]:
x[:, :, 1:3]

tensor([[[2, 3],
         [5, 6],
         [8, 9]]])

In [80]:
x[0:1]

tensor([[[1, 2, 3],
         [4, 5, 6],
         [7, 8, 9]]])

### Integration with Numpy

- 将NumPy中的数组转为Tensor: 使用`torch.from_numpy`函数
- 将Tensor转为NumPy中的数组: 使用`torch.numpy`函数

In [81]:
array = np.arange(1.0, 8.0)
tensor = torch.from_numpy(array)
array, tensor

(array([1., 2., 3., 4., 5., 6., 7.]),
 tensor([1., 2., 3., 4., 5., 6., 7.], dtype=torch.float64))

In [82]:
array.dtype, torch.arange(1.0, 8.0).dtype
# 从这里可以看出, NumPy的默认数据类型是float64. 从上面了解到Tensor浮点数的默认数据类型是float32, 经过转换之后数据类型从float32变成了floa64

(dtype('float64'), torch.float32)

In [83]:
tensor.type(torch.float32) # 将其转换为float32

tensor([1., 2., 3., 4., 5., 6., 7.])

In [84]:
# 转换之后的Tensor和之前的NumPy数组不是同一个东西
array = array + 1
array, tensor

(array([2., 3., 4., 5., 6., 7., 8.]),
 tensor([1., 2., 3., 4., 5., 6., 7.], dtype=torch.float64))

In [85]:
tensor = torch.ones(7)
numpy_tensor = tensor.numpy()
tensor, numpy_tensor

(tensor([1., 1., 1., 1., 1., 1., 1.]),
 array([1., 1., 1., 1., 1., 1., 1.], dtype=float32))

In [86]:
# 转换之后的NumPy数组和之前的Tensor不是同一个东西
tensor = tensor + 1
tensor, numpy_tensor

(tensor([2., 2., 2., 2., 2., 2., 2.]),
 array([1., 1., 1., 1., 1., 1., 1.], dtype=float32))

## Reproducibility

简单的来说, 一个神经网络的学习流程是: 从一堆随机数开始 -> Tensor操作 -> 更新随机数使他们能够更好的表示数据 -> 重复上述操作.

为了能够减少在生成随机数时候的随机性, 引入了一个新的概念: 随机种子, random seed. 


In [92]:
random_tensor_A = torch.rand(3, 4)
random_tensor_B = torch.rand(3, 4)
print(random_tensor_A)
print(random_tensor_B)
print(random_tensor_A == random_tensor_B)

tensor([[0.7529, 0.4276, 0.4808, 0.4665],
        [0.2115, 0.3266, 0.7887, 0.8988],
        [0.1275, 0.8763, 0.9504, 0.3734]])
tensor([[0.7608, 0.6505, 0.6025, 0.3354],
        [0.0447, 0.7154, 0.9483, 0.0435],
        [0.3311, 0.9979, 0.0835, 0.1783]])
tensor([[False, False, False, False],
        [False, False, False, False],
        [False, False, False, False]])


In [94]:
# 采用随机种子

RANDOM_SEED = 0
torch.manual_seed(RANDOM_SEED)
random_tensor_C = torch.rand(3, 4)
torch.manual_seed(RANDOM_SEED)
random_tensor_D = torch.rand(3, 4)
print(random_tensor_C)
print(random_tensor_D)
print(random_tensor_C == random_tensor_D)

tensor([[0.4963, 0.7682, 0.0885, 0.1320],
        [0.3074, 0.6341, 0.4901, 0.8964],
        [0.4556, 0.6323, 0.3489, 0.4017]])
tensor([[0.4963, 0.7682, 0.0885, 0.1320],
        [0.3074, 0.6341, 0.4901, 0.8964],
        [0.4556, 0.6323, 0.3489, 0.4017]])
tensor([[True, True, True, True],
        [True, True, True, True],
        [True, True, True, True]])


<div class="admonition warning">
    <p class="admonition-title">注意</p>
    <p style="margin: 10px;">
        torch.manual_seed函数仅仅只对其下方的第一个随机函数生效. 
    </p>
</div>

## GPUs

NVIDIA+CUDA能够加速Pytorch的运算. 拿到GPU的简单方法:

1. 最简单: 使用Google Colab, 提供免费的GPU算力
2. 使用自己的GPU, 需要金钱投资, 配置较费力
3. 使用云计算服务, 如aws, gcp, azure

<div class="admonition warning">
    <p class="admonition-title">注意</p>
    <p style="margin: 10px;">
        请在GPU环境下运行下列代码.
    </p>
</div>

### Check for GPU Access

In [95]:
!nvidia-smi

'nvidia-smi' �����ڲ����ⲿ���Ҳ���ǿ����еĳ���
���������ļ���


In [98]:
torch.cuda.is_available()

False

In [100]:
# 若没有cuda, 则用cpu, 若有cuda, 则用cuda
device = "cuda" if torch.cuda.is_available() else "cpu"

In [101]:
# cuda设备数量
torch.cuda.device_count()

0

### Putting Tensors (and models) on the GPU

In [103]:
# 将Tensor移到GPU(如果可用的话)
device = "cuda" if torch.cuda.is_available() else "cpu"
tensor = torch.tensor([1, 2, 3])
print(tensor, tensor.device)
tensor_on_gpu = tensor.to(device)
tensor_on_gpu

tensor([1, 2, 3]) cpu


tensor([1, 2, 3])

### Moving Tensors back to the CPU

In [107]:
%%capture
# 如果Tensor在GPU上面, 那么无法将其转化为NumPy数组, 因为NumPy只支持在CPU上运行
device = "cuda" if torch.cuda.is_available() else "cpu"
tensor = torch.tensor([1, 2, 3])
print(tensor, tensor.device)
tensor_on_gpu = tensor.to(device)
tensor_on_gpu.numpy()

In [109]:
tensor_back_on_cpu = tensor_on_gpu.cpu().numpy()
tensor_back_on_cpu

array([1, 2, 3], dtype=int64)