# pytorch 基础


## 张量


In [1]:
import torch

torch.empty(2, 3)

tensor([[0., 0., 0.],
        [0., 0., 0.]])

In [2]:
torch.rand(2, 3)

tensor([[0.8299, 0.6094, 0.5877],
        [0.1292, 0.5377, 0.9977]])

In [3]:
torch.randn(2, 3)

tensor([[ 2.0942, -1.7385, -0.6006],
        [ 0.6626,  0.5604,  0.3006]])

In [4]:
torch.zeros(2, 3)

tensor([[0., 0., 0.],
        [0., 0., 0.]])

In [5]:
torch.zeros(2, 3, dtype=torch.double)

tensor([[0., 0., 0.],
        [0., 0., 0.]], dtype=torch.float64)

In [6]:
torch.zeros(2, 3, dtype=torch.long)

tensor([[0, 0, 0],
        [0, 0, 0]])

In [7]:
torch.arange(10)

tensor([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [9]:
array = [[1.0, 3.8, 2.1], [8.6, 4.0, 2.4]]
torch.tensor(array)

tensor([[1.0000, 3.8000, 2.1000],
        [8.6000, 4.0000, 2.4000]])

In [10]:
import numpy as np

array = np.array([[1.0, 3.8, 2.1], [8.6, 4.0, 2.4]])
torch.from_numpy(array)

tensor([[1.0000, 3.8000, 2.1000],
        [8.6000, 4.0000, 2.4000]], dtype=torch.float64)

**to gpu**


In [11]:
torch.rand(2, 3).cuda()
torch.rand(2, 3).to("cuda")
torch.rand(2, 3, device="cuda")

AssertionError: Torch not compiled with CUDA enabled

### 张量计算


In [12]:
x = torch.tensor([1, 2, 3], dtype=torch.double)
y = torch.tensor([4, 5, 6], dtype=torch.double)
print(x + y)

tensor([5., 7., 9.], dtype=torch.float64)


In [13]:
print(x - y)

tensor([-3., -3., -3.], dtype=torch.float64)


In [14]:
print(x * y)

tensor([ 4., 10., 18.], dtype=torch.float64)


In [15]:
print(x / y)

tensor([0.2500, 0.4000, 0.5000], dtype=torch.float64)


In [16]:
x.dot(y)

tensor(32., dtype=torch.float64)

In [17]:
x.sin()  # 在计算机中，正弦函数通常使用泰勒级数近似计算

tensor([0.8415, 0.9093, 0.1411], dtype=torch.float64)

In [18]:
x.exp()

tensor([ 2.7183,  7.3891, 20.0855], dtype=torch.float64)

In [19]:
x = torch.tensor([[1, 2, 3], [4, 5, 6]], dtype=torch.double)
print(x.mean())
print(x.mean(dim=0))
print(x.mean(dim=1))

tensor(3.5000, dtype=torch.float64)
tensor([2.5000, 3.5000, 4.5000], dtype=torch.float64)
tensor([2., 5.], dtype=torch.float64)


In [20]:
x.mean(dim=0, keepdim=True)
x.mean(dim=1, keepdim=True)

tensor([[2.],
        [5.]], dtype=torch.float64)

In [21]:
x = torch.tensor([[1, 2, 3], [4, 5, 6]], dtype=torch.double)
y = torch.tensor([[7, 8, 9], [10, 11, 12]], dtype=torch.double)

print(torch.cat((x, y), dim=0))
print(torch.cat((x, y), dim=1))

tensor([[ 1.,  2.,  3.],
        [ 4.,  5.,  6.],
        [ 7.,  8.,  9.],
        [10., 11., 12.]], dtype=torch.float64)
tensor([[ 1.,  2.,  3.,  7.,  8.,  9.],
        [ 4.,  5.,  6., 10., 11., 12.]], dtype=torch.float64)


### 自动微分


In [23]:
x = torch.tensor([2.0], requires_grad=True)
y = torch.tensor([3.0], requires_grad=True)
z = (x + y) * (y - 2)

z.backward()
print(x.grad, y.grad)

tensor([1.]) tensor([6.])


### 广播机制


In [27]:
x = torch.arange(1, 4).view(3, 1)
y = torch.arange(4, 6).view(1, 2)
print(x)
print(y)

tensor([[1],
        [2],
        [3]])
tensor([[4, 5]])


In [28]:
print(x + y)

tensor([[5, 6],
        [6, 7],
        [7, 8]])


### 索引与切片


In [31]:
x = torch.arange(12).view(3, 4)
x

tensor([[ 0,  1,  2,  3],
        [ 4,  5,  6,  7],
        [ 8,  9, 10, 11]])

In [32]:
x[1, 2:4]

tensor([6, 7])

In [33]:
x[:, 1:3]

tensor([[ 1,  2],
        [ 5,  6],
        [ 9, 10]])

### 降维与升维


In [34]:
a = torch.tensor([1, 2, 3, 4])
a.shape

torch.Size([4])

In [35]:
b = torch.unsqueeze(a, dim=0)
b

tensor([[1, 2, 3, 4]])

In [38]:
b = a.unsqueeze(dim=1)
b

tensor([[1],
        [2],
        [3],
        [4]])

In [39]:
b.squeeze()

tensor([1, 2, 3, 4])

# 2. 加载数据


### Dataset


In [43]:
# 迭代型
from torch.utils.data import IterableDataset, DataLoader


class MyIterableDataset(IterableDataset):
    def __init__(self, start, end):
        super(MyIterableDataset).__init__()
        self.start = start
        self.end = end

    def __iter__(self):
        return iter(range(self.start, self.end))


ds = MyIterableDataset(start=3, end=7)

print(list(DataLoader(ds, num_workers=0)))
print(list(DataLoader(ds, num_workers=2)))

[tensor([3]), tensor([4]), tensor([5]), tensor([6])]


Traceback (most recent call last):
Traceback (most recent call last):
  File "<string>", line 1, in <module>
  File "/Users/liangzhu/anaconda3/envs/llm/lib/python3.12/multiprocessing/spawn.py", line 122, in spawn_main
  File "<string>", line 1, in <module>
  File "/Users/liangzhu/anaconda3/envs/llm/lib/python3.12/multiprocessing/spawn.py", line 122, in spawn_main
    exitcode = _main(fd, parent_sentinel)
    exitcode = _main(fd, parent_sentinel)
               ^^^^^^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^^^^^^^^^^^^
^^^^  File "/Users/liangzhu/anaconda3/envs/llm/lib/python3.12/multiprocessing/spawn.py", line 132, in _main
^^^^^^^^^^^^^^^^
  File "/Users/liangzhu/anaconda3/envs/llm/lib/python3.12/multiprocessing/spawn.py", line 132, in _main
    self = reduction.pickle.load(from_parent)
      self = reduction.pickle.load(from_parent) 
        ^^^^^^^^^^^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^^^^^^^^^^^^^^^^^^^^^^^^^^
^^^^AttributeError^: ^Can't get attribute 'MyIterableDataset' on <module '__main__' (<class '_fr

RuntimeError: DataLoader worker (pid(s) 40005, 40006) exited unexpectedly

In [44]:
from torch.utils.data import get_worker_info
import math


def worker_init_fn(worker_id):
    worker_info = get_worker_info()
    dataset = worker_info.dataset
    overall_start = dataset.start
    overall_end = dataset.end
    per_worker = int(
        math.ceil((overall_end - overall_start) / float(worker_info.num_workers))
    )
    work_id = worker_info.id
    dataset.start = overall_start + work_id * per_worker
    dataset.end = min(dataset.start + per_worker, overall_end)


print(list(DataLoader(ds, num_workers=2, worker_init_fn=worker_init_fn)))
print(list(DataLoader(ds, num_workers=3, worker_init_fn=worker_init_fn)))

Traceback (most recent call last):
  File "<string>", line 1, in <module>
  File "/Users/liangzhu/anaconda3/envs/llm/lib/python3.12/multiprocessing/spawn.py", line 122, in spawn_main
    exitcode = _main(fd, parent_sentinel)
               ^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/Users/liangzhu/anaconda3/envs/llm/lib/python3.12/multiprocessing/spawn.py", line 132, in _main
    self = reduction.pickle.load(from_parent)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
AttributeError: Can't get attribute 'MyIterableDataset' on <module '__main__' (<class '_frozen_importlib.BuiltinImporter'>)>
Traceback (most recent call last):
  File "<string>", line 1, in <module>
  File "/Users/liangzhu/anaconda3/envs/llm/lib/python3.12/multiprocessing/spawn.py", line 122, in spawn_main
    exitcode = _main(fd, parent_sentinel)
               ^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/Users/liangzhu/anaconda3/envs/llm/lib/python3.12/multiprocessing/spawn.py", line 132, in _main
    self = reduction.pickle.load(from_par

RuntimeError: DataLoader worker (pid(s) 40424, 40425) exited unexpectedly

In [None]:
# 映射型数据集
import os
import pandas as pd
from torchvision.io import read_image
from torch.utils.data import Dataset


class CustomImageDataset(Dataset):
    def __init__(
        self, annotations_file, img_dir, transform=None, target_transform=None
    ):
        self.img_labels = pd.read_csv(annotations_file)
        self.img_dir = img_dir
        self.transform = transform
        self.target_transform = target_transform

    def __len__(self):
        return len(self.img_labels)

    def __getitem__(self, idx):
        img_path = os.path.join(self.img_dir, self.img_labels.iloc[idx, 0])
        image = read_image(img_path)
        label = self.img_labels.iloc[idx, 1]
        if self.transform:
            image = self.transform(image)
        if self.target_transform:
            label = self.target_transform(label)
        return image, label

### DataLoaders


In [3]:
from torch.utils.data import DataLoader
from torchvision import datasets
from torchvision.transforms import ToTensor

tranining_data = datasets.FashionMNIST(
    root="data", train=True, download=True, transform=ToTensor()
)

test_data = datasets.FashionMNIST(
    root="data", train=False, download=True, transform=ToTensor()
)

train_data = DataLoader(tranining_data, batch_size=64, shuffle=True)
test_data = DataLoader(test_data, batch_size=64, shuffle=True)

train_features, train_labels = next(iter(train_data))
print(f"Feature batch shape: {train_features.size()}")
print(f"Labels batch shape: {train_labels.size()}")

img = train_features[0].squeeze()
label = train_labels[0]
print(img.shape)
print(f"Label:{label}")

Feature batch shape: torch.Size([64, 1, 28, 28])
Labels batch shape: torch.Size([64])
torch.Size([28, 28])
Label:5


In [5]:
from torch.utils.data import DataLoader
from torch.utils.data import SequentialSampler, RandomSampler
from torchvision import datasets
from torchvision.transforms import ToTensor

training_data = datasets.FashionMNIST(
    root="data", train=True, download=True, transform=ToTensor()
)

test_data = datasets.FashionMNIST(
    root="data", train=False, download=True, transform=ToTensor()
)

train_sampler = RandomSampler(training_data)
test_sampler = SequentialSampler(test_data)

train_dataloader = DataLoader(training_data, batch_size=64, sampler=train_sampler)
test_data = DataLoader(test_data, batch_size=64, sampler=test_sampler)

train_features, train_labels = next(iter(train_dataloader))
print(f"Feature batch shape: {train_features.size()}")
print(f"Labels batch shape: {train_labels.size()}")

test_features, test_labels = next(iter(test_data))
print(f"Feature batch shape: {test_features.size()}")
print(f"Labels batch shape: {test_labels.size()}")

Feature batch shape: torch.Size([64, 1, 28, 28])
Labels batch shape: torch.Size([64])
Feature batch shape: torch.Size([64, 1, 28, 28])
Labels batch shape: torch.Size([64])


# 3. 训练数据


### 构建模型


In [7]:
import torch
from torch import nn

device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"using {device} device")


class NeuralNetwork(nn.Module):
    def __init__(self):
        super(NeuralNetwork, self).__init__()
        self.flatten = nn.Flatten()
        self.linear_relu_stack = nn.Sequential(
            nn.Linear(28 * 28, 512),
            nn.ReLU(),
            nn.Linear(512, 512),
            nn.ReLU(),
            nn.Linear(512, 10),
            nn.Dropout(p=0.2),
        )

    def forward(self, x):
        x = self.flatten(x)
        logits = self.linear_relu_stack(x)
        return logits


model = NeuralNetwork().to(device)

X = torch.rand(4, 28, 28, device=device)
logits = model(X)
pred_probab = nn.Softmax(dim=1)(logits)
print(pred_probab.size())

y_pred = pred_probab.argmax(-1)  # 用于找出最后一个轴上的最大值的索引。
print(f"Predicted class: {y_pred}")

using cpu device
torch.Size([4, 10])
Predicted class: tensor([5, 8, 5, 8])


### 优化模型参数


In [None]:
from tdqm.auto import tqdm
from transformers import AdamW, get_scheduler


def train_loop(dataloader, model, loss_fn, optimizer, lr_scheduler, epoch, total_loss):
    progress_bar = tqdm(range(len(dataloader)))
    progress_bar.set_description(f"Loss: {0:>7f}")
    finish_step_num = (epoch - 1) * len(dataloader)

    model.train()
    for step, (X, y) in enumerate(dataloader, start=1):
        X, y = X.to(device), y.to(device)
        optimizer.zero_grad()
        pred = model(X)
        loss = loss_fn(pred, y)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        lr_scheduler.step()

        total_loss += loss.item()
        progress_bar.set_description(
            f"Loss: {total_loss / (step + finish_step_num):>7f}"
        )
        progress_bar.update(1)

    return total_loss


def test_loop(dataloader, model, mode="Test"):
    assert mode in ["Valid", "Test"]
    size = len(dataloader.dataset)
    correct = 0

    model.eval()
    with torch.no_grad():
        for X, y in dataloader:
            X, y = X.to(device), y.to(device)
            pred = model(X)
            correct += (pred.argmax(1) == y).type(torch.float).sum().item()

    correct /= size
    print(f"{mode} Accuracy: {(100*correct):>0.1f}%\n")


learning_rate = 1e-5
epoch_num = 3

loss_fn = nn.CrossEntropyLoss()
optimizer = AdamW(model.parameters(), lr=learning_rate)
lr_scheduler = get_scheduler(
    "linear",
    optimizer=optimizer,
    num_warmup_steps=0,
    num_training_steps=epoch_num * len(train_dataloader),
)

total_loss = 0.0
for t in range(epoch_num):
    print(f"Epoch {t+1}/{epoch_num}\n-------------------------------")
    total_loss = train_loop(
        train_dataloader, model, loss_fn, optimizer, lr_scheduler, t + 1, total_loss
    )
    test_loop(valid_dataloader, model, mode="Valid")
print("Done!")

# 4. 保存及加载模型
