<a href="https://colab.research.google.com/github/q759729997/test_pytorch/blob/master/softmax_example.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# softmax回归简洁实现

In [0]:
import torch

读取数据集

In [0]:
import torchvision
import sys

In [0]:
def load_data_fashion_mnist(batch_size, resize=None, root='~/Datasets/FashionMNIST'):
  """Download the fashion mnist dataset and then load into memory."""
  trans = []
  if resize:
    trans.append(torchvision.transforms.Resize(size=resize))
  trans.append(torchvision.transforms.ToTensor())
  
  transform = torchvision.transforms.Compose(trans)
  mnist_train = torchvision.datasets.FashionMNIST(root=root, train=True, download=True, transform=transform)
  mnist_test = torchvision.datasets.FashionMNIST(root=root, train=False, download=True, transform=transform)
  if sys.platform.startswith('win'):
    num_workers = 0  # 0表示不用额外的进程来加速读取数据
  else:
    num_workers = 4
  train_iter = torch.utils.data.DataLoader(mnist_train, batch_size=batch_size, shuffle=True, num_workers=num_workers)
  test_iter = torch.utils.data.DataLoader(mnist_test, batch_size=batch_size, shuffle=False, num_workers=num_workers)

  return train_iter, test_iter

In [4]:
batch_size = 256
train_iter,test_iter = load_data_fashion_mnist(batch_size)

  0%|          | 16384/26421880 [00:00<02:57, 148654.33it/s]

Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-images-idx3-ubyte.gz to /root/Datasets/FashionMNIST/FashionMNIST/raw/train-images-idx3-ubyte.gz


26427392it [00:00, 74248827.92it/s]                            


Extracting /root/Datasets/FashionMNIST/FashionMNIST/raw/train-images-idx3-ubyte.gz to /root/Datasets/FashionMNIST/FashionMNIST/raw


32768it [00:00, 472287.45it/s]
  2%|▏         | 98304/4422102 [00:00<00:04, 967795.90it/s]

Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-labels-idx1-ubyte.gz to /root/Datasets/FashionMNIST/FashionMNIST/raw/train-labels-idx1-ubyte.gz
Extracting /root/Datasets/FashionMNIST/FashionMNIST/raw/train-labels-idx1-ubyte.gz to /root/Datasets/FashionMNIST/FashionMNIST/raw
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-images-idx3-ubyte.gz to /root/Datasets/FashionMNIST/FashionMNIST/raw/t10k-images-idx3-ubyte.gz


4423680it [00:00, 23970454.92it/s]                         
8192it [00:00, 149350.56it/s]


Extracting /root/Datasets/FashionMNIST/FashionMNIST/raw/t10k-images-idx3-ubyte.gz to /root/Datasets/FashionMNIST/FashionMNIST/raw
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-labels-idx1-ubyte.gz to /root/Datasets/FashionMNIST/FashionMNIST/raw/t10k-labels-idx1-ubyte.gz
Extracting /root/Datasets/FashionMNIST/FashionMNIST/raw/t10k-labels-idx1-ubyte.gz to /root/Datasets/FashionMNIST/FashionMNIST/raw
Processing...
Done!


In [0]:
# 定义模型
class FlattenLayer(torch.nn.Module):
  def __init__(self):
    super(FlattenLayer, self).__init__()
  def forward(self, x): # x shape: (batch, *, *, ...)
    return x.view(x.shape[0], -1)

num_inputs = 784
num_outputs = 10

class SoftMaxNet(torch.nn.Module):
  def __init__(self, num_inputs, num_outputs):
    super(SoftMaxNet, self).__init__()
    self.flatten = FlattenLayer()
    self.linear = torch.nn.Linear(num_inputs, num_outputs)
  def forward(self, x): # x shape: (batch, 1, 28, 28)
    # y = self.linear(x.view(x.shape[0], -1))
    y = self.linear(self.flatten(x))
    return self.softmax(y)
  
  def softmax(self,X):
    X_exp = X.exp()
    partition = X_exp.sum(dim=1, keepdim=True)
    return X_exp / partition  # 这里应用了广播机制
    
net = SoftMaxNet(num_inputs, num_outputs)

In [6]:
print(net)

SoftMaxNet(
  (flatten): FlattenLayer()
  (linear): Linear(in_features=784, out_features=10, bias=True)
)


In [7]:
# 损失函数
loss = torch.nn.CrossEntropyLoss()
print(loss)

CrossEntropyLoss()


In [0]:
# 定义优化算法
optimizer = torch.optim.Adam(net.parameters())

In [9]:
print(optimizer)

Adam (
Parameter Group 0
    amsgrad: False
    betas: (0.9, 0.999)
    eps: 1e-08
    lr: 0.001
    weight_decay: 0
)


In [0]:
def evaluate_accuracy(data_iter, net):
  acc_sum, n = 0.0, 0
  for X, y in data_iter:
    acc_sum += (net(X).argmax(dim=1) == y).float().sum().item()
    n += y.shape[0]
  return acc_sum / n

In [0]:
# 训练模型
def train_ch3(net, train_iter, test_iter, loss, num_epochs, batch_size, optimizer=None):
  for epoch in range(num_epochs):
    train_l_sum, train_acc_sum, n = 0.0, 0.0, 0
    for X, y in train_iter:
      y_hat = net(X)
      l = loss(y_hat, y).sum()
      # 梯度清零
      optimizer.zero_grad()
      l.backward()
      optimizer.step()
      train_l_sum += l.item()
      # 按指定的维度返回最大元素的坐标
      train_acc_sum += (y_hat.argmax(dim=1) == y).sum().item()
      n += y.shape[0]
    test_acc = evaluate_accuracy(test_iter, net)
    print('epoch %d, loss %.4f, train acc %.3f, test acc %.3f'
          % (epoch + 1, train_l_sum / n, train_acc_sum / n, test_acc))

In [12]:
num_epochs = 5
train_ch3(net, train_iter, test_iter, loss, num_epochs, batch_size, optimizer)

epoch 1, loss 0.0072, train acc 0.689, test acc 0.753
epoch 2, loss 0.0067, train acc 0.795, test acc 0.801
epoch 3, loss 0.0066, train acc 0.819, test acc 0.816
epoch 4, loss 0.0065, train acc 0.828, test acc 0.823
epoch 5, loss 0.0065, train acc 0.835, test acc 0.824


In [13]:
# 测试
for X, y in train_iter:
  y_hat = net(X)
  print(y_hat)
  # 按指定的维度返回最大元素的坐标
  print(y_hat.argmax(dim=1))
  break

tensor([[7.0121e-13, 6.7845e-14, 1.6505e-06,  ..., 1.1991e-11, 9.9999e-01,
         1.2302e-06],
        [1.7142e-06, 2.9782e-06, 1.0952e-01,  ..., 2.1148e-09, 4.9318e-04,
         7.6909e-06],
        [1.5868e-10, 1.9363e-09, 1.3098e-08,  ..., 9.9980e-01, 3.4598e-05,
         1.2609e-05],
        ...,
        [1.7845e-09, 4.0784e-08, 5.4013e-07,  ..., 9.8444e-01, 1.9268e-04,
         5.6761e-04],
        [7.9065e-05, 1.1106e-04, 1.5078e-03,  ..., 8.7471e-08, 4.0056e-01,
         1.6939e-05],
        [2.5133e-07, 7.6533e-13, 1.3707e-06,  ..., 9.0330e-12, 9.9999e-01,
         3.2008e-06]], grad_fn=<DivBackward0>)
tensor([8, 6, 7, 0, 0, 4, 1, 7, 8, 4, 8, 4, 9, 7, 8, 9, 9, 8, 0, 1, 0, 7, 5, 0,
        0, 2, 2, 0, 8, 8, 0, 9, 4, 1, 3, 0, 2, 7, 1, 5, 2, 2, 0, 7, 0, 0, 9, 5,
        1, 2, 3, 0, 3, 0, 8, 4, 5, 1, 4, 7, 5, 2, 7, 5, 2, 5, 0, 4, 3, 0, 7, 8,
        1, 3, 2, 1, 3, 5, 7, 7, 9, 9, 5, 4, 8, 6, 0, 3, 4, 1, 0, 6, 8, 5, 9, 3,
        3, 4, 5, 5, 0, 1, 7, 0, 4, 3, 7, 9, 3, 2, 1, 0, 1, 9

In [14]:
def softmax(X):
  X_exp = X.exp()
  partition = X_exp.sum(dim=1, keepdim=True)
  return X_exp / partition  # 这里应用了广播机制

X = torch.rand((2, 5))
X_prob = softmax(X)
print(X)
print(X_prob, X_prob.sum(dim=1))

tensor([[0.0168, 0.7681, 0.8237, 0.8822, 0.7445],
        [0.8182, 0.6885, 0.2124, 0.1123, 0.5423]])
tensor([[0.1020, 0.2162, 0.2285, 0.2423, 0.2111],
        [0.2720, 0.2389, 0.1484, 0.1343, 0.2064]]) tensor([1., 1.])
