<a href="https://colab.research.google.com/github/quang-pham/DeeplearningTutorial/blob/main/CIFAR_10_Alexnet.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
# Main
import numpy as np

# Visualize
import matplotlib.pyplot as plt

# Deep learning
import torch
import torchvision
from torchvision import models
import torchvision.transforms as transforms
from torch.utils.data import TensorDataset, DataLoader
from torch import nn
import torch.nn.functional as F
from torch import optim

# TPUを利用する
import torch_xla


In [2]:
# データセットの読み込み
train_dataset = torchvision.datasets.CIFAR10(
    root="./data/",
    train="true",
    download=True,
    transform=transforms.Compose(
        [
         transforms.Resize(224),
         transforms.ToTensor(),
         transforms.Normalize(
             [0.5, 0.5, 0.5],
             [0.5, 0.5, 0.5],
         )
        ]
    )
)

test_dataset = torchvision.datasets.CIFAR10(
    root="./data/",
    download=True,
    transform=transforms.Compose(
        [
         transforms.Resize(224),
         transforms.ToTensor(),
         transforms.Normalize(
             [0.5, 0.5, 0.5],
             [0.5, 0.5, 0.5],
         )
        ]
    )
)

image, label = train_dataset[0]
print(f"画像サイズ: {image.size()}")
print(f"ラベル: {label}")


Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to ./data/cifar-10-python.tar.gz


  0%|          | 0/170498071 [00:00<?, ?it/s]

Extracting ./data/cifar-10-python.tar.gz to ./data/
Files already downloaded and verified
画像サイズ: torch.Size([3, 224, 224])
ラベル: 6


In [3]:
# ミニバッチへ変換
train_batch = torch.utils.data.DataLoader(
    dataset=train_dataset,
    batch_size=64,
    shuffle=True,
    num_workers=2
)

test_batch = torch.utils.data.DataLoader(
    dataset=test_dataset,
    batch_size=64,
    shuffle=False,
    num_workers=2
)

# ミニバッチを確認
for images, labels in train_batch:
  print(f"バッチ画像サイズ: {images.size()}")
  print(f"画像サイズ: {images[0].size()}")
  print(f"バッチラベルサイズ: {labels.size()}")
  break

バッチ画像サイズ: torch.Size([64, 3, 224, 224])
画像サイズ: torch.Size([3, 224, 224])
バッチラベルサイズ: torch.Size([64])


In [4]:
# GPUの設定
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)

# モデル取得
net = models.alexnet(pretrained=True)
net = net.to(device)
print(net)

cuda


Downloading: "https://download.pytorch.org/models/alexnet-owt-7be5be79.pth" to /root/.cache/torch/hub/checkpoints/alexnet-owt-7be5be79.pth


  0%|          | 0.00/233M [00:00<?, ?B/s]

AlexNet(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(11, 11), stride=(4, 4), padding=(2, 2))
    (1): ReLU(inplace=True)
    (2): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
    (3): Conv2d(64, 192, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
    (4): ReLU(inplace=True)
    (5): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
    (6): Conv2d(192, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (7): ReLU(inplace=True)
    (8): Conv2d(384, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (9): ReLU(inplace=True)
    (10): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace=True)
    (12): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (avgpool): AdaptiveAvgPool2d(output_size=(6, 6))
  (classifier): Sequential(
    (0): Dropout(p=0.5, inplace=False)
    (1): Linear(in_features=9216, out_features=4096, bias=True)
 

In [5]:
# すでに学習した重みについて学習させないようにする
for param in net.parameters():
  param.requires_grad = False
net = net.to(device)


In [6]:
# 出力層のクラスを1,000から10へ変更
num_features = net.classifier[6].in_features
num_classess = 10

net.classifier[6] = nn.Linear(in_features=num_features, out_features=num_classess).to(device)
print(net)

AlexNet(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(11, 11), stride=(4, 4), padding=(2, 2))
    (1): ReLU(inplace=True)
    (2): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
    (3): Conv2d(64, 192, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
    (4): ReLU(inplace=True)
    (5): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
    (6): Conv2d(192, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (7): ReLU(inplace=True)
    (8): Conv2d(384, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (9): ReLU(inplace=True)
    (10): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace=True)
    (12): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (avgpool): AdaptiveAvgPool2d(output_size=(6, 6))
  (classifier): Sequential(
    (0): Dropout(p=0.5, inplace=False)
    (1): Linear(in_features=9216, out_features=4096, bias=True)
 

In [7]:
# 損失関数
criterion = nn.CrossEntropyLoss()

# 最適化関数
optimizer = optim.Adam(net.parameters())

In [8]:
# 学習
train_loss_list = [] # 学習損失
train_accuracy_list = [] #学習データの正答率
test_loss_list = [] #評価損失
test_accuracy_list = [] #テストデータの正答率

epoch = 10
for i in range(0, epoch, 1):
  print("----------------------------")
  print(f"Epoch: {i+1}/{epoch}")

  # 損失と正答率の初期化
  train_loss = 0
  train_accuracy = 0
  test_loss = 0
  test_accuracy = 0

  # 学習
  net.train()
  for images, labels in train_batch:
    images = images.to(device)
    labels = labels.to(device)

    
    optimizer.zero_grad() # 勾配初期化
    y_pred_prob = net(images)
    loss = criterion(y_pred_prob, labels)
    loss.backward() # 勾配計算
    optimizer.step() #重み更新

    train_loss += loss.item()
    y_pred_labels = torch.max(y_pred_prob, 1)[1]

    # 正解数のカウント
    train_accuracy += torch.sum(y_pred_labels==labels).item() / len(labels)
  
  epoch_train_loss = train_loss / len(train_batch)
  epoch_train_accuracy = train_accuracy / len(train_batch)

  # 評価
  net.eval()
  with torch.no_grad():
    for images, labels in test_batch:
      images = images.to(device)
      labels = labels.to(device)

      y_pred_prob = net(images)
      loss = criterion(y_pred_prob, labels)
      test_loss += loss.item()

      y_pred_labels = torch.max(y_pred_prob, 1)[1]
      test_accuracy += torch.sum(y_pred_labels == labels).item() / len(labels)
  epoch_test_loss = test_loss / len(test_batch)
  epoch_test_accuracy = test_accuracy / len(test_batch)
  


----------------------------
Epoch: 1/10


  return torch.max_pool2d(input, kernel_size, stride, padding, dilation, ceil_mode)


----------------------------
Epoch: 2/10


KeyboardInterrupt: ignored