<a href="https://colab.research.google.com/github/snxly/colab/blob/master/pytorch/pytorch_quickstart.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import torch

In [None]:
from torch import nn
from torch.utils.data import DataLoader
from torchvision import datasets
from torchvision.transforms import ToTensor

# 1. Load Dataset

In [None]:
# load training data
training_data = datasets.FashionMNIST(train=True, root='data', download=True, transform=ToTensor())

# load test data (train = False)
test_data = datasets.FashionMNIST(train=False, root='data', download=True, transform=ToTensor())

100%|██████████| 26.4M/26.4M [00:00<00:00, 109MB/s]
100%|██████████| 29.5k/29.5k [00:00<00:00, 3.56MB/s]
100%|██████████| 4.42M/4.42M [00:00<00:00, 59.7MB/s]
100%|██████████| 5.15k/5.15k [00:00<00:00, 17.6MB/s]


# 2. Pass Dataset to DataLoader

In [None]:
batch_size = 64

# 创建data loader
train_loader = DataLoader(training_data, batch_size=batch_size)
test_loader = DataLoader(test_data, batch_size=batch_size)

# 使用dataloader 返回数据
for x, y in test_loader:
  print(x.shape)
  print(y.shape, y.dtype)
  break


torch.Size([64, 1, 28, 28])
torch.Size([64]) torch.int64


# 3. Create models

In [None]:
device = torch.accelerator.current_accelerator() if torch.accelerator.is_available() else 'cpu'
print(f'Using {device} device')

# define model
class NeuralNetwork(nn.Module):
  # 在 __init__ 里定义网络结构，layers
  def __init__(self):
    super().__init__()
    self.flatten = nn.Flatten()
    self.linear_relu_stack = nn.Sequential(
        nn.Linear(28*28, 512),
        nn.ReLU(),
        nn.Linear(512, 512),
        nn.ReLU(),
        nn.Linear(512, 10)
    )

  # 定义数据怎样通过网络
  def forward(self, x):
    x = self.flatten(x)
    logits = self.linear_relu_stack(x)
    return logits

model = NeuralNetwork().to(device)
print(model)

Using cuda device
NeuralNetwork(
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (linear_relu_stack): Sequential(
    (0): Linear(in_features=784, out_features=512, bias=True)
    (1): ReLU()
    (2): Linear(in_features=512, out_features=512, bias=True)
    (3): ReLU()
    (4): Linear(in_features=512, out_features=10, bias=True)
  )
)


#4. Optimizing model parameters - train the model
To train a model, we need
- a loss function `nn.CrossEntropyLoss`, and
- an optimizer `torch.optim.SGD(model.parameters(), lr=1e-3)`

In [None]:
# 定义训练过程，喂数据，计算loss，反向传播，更新weights， 梯度清零，打印进度
def train(dataloader, model, loss_fn, optimizer):
  # 设置训练模式
  model.train()

  # 放在外面，是因为不需要每个loop都计算一次
  data_size = len(dataloader.dataset)

  for batch, (x, y) in enumerate(dataloader):
    # 准备数据 （给GPU）
    x = x.to(device)
    y = y.to(device)
    # 喂数据
    pred = model(x)
    # 计算 loss
    loss = loss_fn(pred, y)

    # 反向传播, 计算梯度
    loss.backward()
    # 更新weights
    # optimizer里有weights， learing_rate等数据
    # ？ How does optimizer know about grads
    optimizer.step()
    # 梯度清零，这步是不是可以在反向传播之前做？
    optimizer.zero_grad()

    # 打印结果 - 评估
    # 训练了多少数据，当前的loss是多少
    if batch % 100 == 0:
      train_size = (batch + 1) * batch_size
      print(f'loss = {loss.item()}, trained data = {train_size} / {data_size}')

In [None]:
# 训练完成以后， 在 test set上进行评估
def test(dataloader, model, loss_fn):
  # 设置评估模式
  model.eval()

  loss = 0
  correct = 0
  # 评估模式无需更新梯度，这个能放在 model.eval() 里做吗？
  with torch.no_grad():
    for x,y in dataloader:
      # 喂数据
      x = x.to(device)
      y = y.to(device)
      pred = model(x)
      # 统计loss
      loss += loss_fn(pred, y).item()
      # 计算准确数目
      correct += (pred.argmax(1) == y).type(torch.float).sum().item()

  batch_num = len(dataloader)
  loss_avg = loss / batch_num
  data_num = len(dataloader.dataset)
  accuracy = correct / data_num
  print(f'Average loss is {loss_avg}, accuracy is {accuracy} \n')

In [None]:
# 真正开始训练
loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=1e-3)

epoch = 10
for i in range(epoch):
  print(f'Epoch {i + 1}\n')
  train(train_loader, model, loss_fn, optimizer)
  test(test_loader, model, loss_fn)

Epoch 1

loss = 1.1603496074676514, trained data = 64 / 60000
loss = 1.1513525247573853, trained data = 6464 / 60000
loss = 0.9771229028701782, trained data = 12864 / 60000
loss = 1.1207197904586792, trained data = 19264 / 60000
loss = 0.985601544380188, trained data = 25664 / 60000
loss = 1.0138530731201172, trained data = 32064 / 60000
loss = 1.0464586019515991, trained data = 38464 / 60000
loss = 0.9951626062393188, trained data = 44864 / 60000
loss = 1.0419878959655762, trained data = 51264 / 60000
loss = 0.9638720750808716, trained data = 57664 / 60000
Average loss is 0.9795228512423813, accuracy is 0.6586 

Epoch 2

loss = 1.0444252490997314, trained data = 64 / 60000
loss = 1.0539417266845703, trained data = 6464 / 60000
loss = 0.864147424697876, trained data = 12864 / 60000
loss = 1.03250253200531, trained data = 19264 / 60000
loss = 0.9007618427276611, trained data = 25664 / 60000
loss = 0.9209322929382324, trained data = 32064 / 60000
loss = 0.9709427356719971, trained data =

# 5. Saving models

In [None]:
model_path = 'model.pth'
torch.save(model.state_dict(), model_path)
print('Save model')

Save model


# 6. Loading model


In [None]:
model = NeuralNetwork().to(device)

weights = torch.load(model_path, weights_only=True)
model.load_state_dict(weights)

<All keys matched successfully>

# 7. Make predictions

In [None]:
# 设置到评估模式
model.eval()

[x, y] = test_data[0]
# 这个with 真的可以和 eval() 合并起来
with torch.no_grad():
  x = x.to(device)
  pred = model(x)
  pred_idx = torch.argmax(pred)
  print(f'prediction = {pred_idx}, actually is {y}')

prediction = 9, actually is 9
