<a href="https://colab.research.google.com/github/miyuu157/LB_miyuu/blob/main/Fashion_1day.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## 課題

CNNを用いて、FashionMNISTの高精度な分類器を実装してみましょう。</br>
モデルのレイヤーを変更してみるなどして精度の向上にチャンレンジして下さい。

### 目標値

Accuracy: 93%

### ルール

- 訓練データはx_train、 t_train、テストデータはx_test、t_testで与えられます。ご自身のマイドライブにコピーして作業を始めてください。
- 予測ラベルは one_hot表現ではなく0~9のクラスラベル で表してください。
- **下のセルで指定されているx_train、t_train以外の学習データは使わないでください。**
- torchvision等で既に実装されているモデルを使用しないで下さい。**ただし、torchvision.transformsのみ使用しても良いです。**
- Conv, Pool, Flatten, Denseは必ず下記のクラスを補完する形で作成してください．（nn.Conv2D, nn.Pool2d, nn.Flatten, nn.Dense等を使用しないでください．）
- n_epoch <= 20 （学習時間が伸び1dayに収まらない可能性があります）
- input size <= 32x32



### 評価方法

- 予測ラベルのx_testに対する精度 (Accuracy) で評価します。


### データの読み込み　※この部分は変更しないでください

In [62]:
# この部分は変更しないでください
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [63]:
# この部分は変更しないでください
import gzip
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
import torch.autograd as autograd
import torch.nn.functional as F
import inspect

from torchvision import transforms
from PIL import Image

#学習データ

with gzip.open('/content/drive/MyDrive/data/train-labels-idx1-ubyte.gz', 'rb') as lbpath:
    t_train = np.frombuffer(lbpath.read(), dtype=np.uint8, offset=8)
with gzip.open('/content/drive/MyDrive/data/train-images-idx3-ubyte.gz', 'rb') as imgpath:
    x_train = np.frombuffer(imgpath.read(), dtype=np.uint8, offset=16).reshape(len(t_train), 784)

val_size = 10000 

#テストデータ
with gzip.open('/content/drive/MyDrive/data/t10k-labels-idx1-ubyte.gz', 'rb') as lbpath:
    t_test = np.frombuffer(lbpath.read(), dtype=np.uint8, offset=8)
with gzip.open('/content/drive/MyDrive/data/t10k-images-idx3-ubyte.gz', 'rb') as imgpath:
    x_test = np.frombuffer(imgpath.read(), dtype=np.uint8, offset=16).reshape(len(t_test), 784)


### DatasetとDataloader定義

In [64]:
# 前処理 -> 必要があれば編集してください
data_transform_test = transforms.Compose([
    transforms.ToTensor()  
])

data_transform_train = transforms.Compose([
    transforms.ToTensor(),  
    transforms.RandomHorizontalFlip(),
    transforms.RandomVerticalFlip()    
])

# Dataset定義 -> 必要があれば編集してください
class train_dataset(torch.utils.data.Dataset):
    def __init__(self, x_train, t_train, transform):
        self.x_train = x_train
        self.t_train = t_train
        self.transform = transform

    def __len__(self):
        return self.x_train.shape[0]

    def __getitem__(self, idx):
        img = np.uint8(self.x_train[idx]).reshape(28, 28)
        img = self.transform(Image.fromarray(img, mode="L"))
        return img, torch.tensor(self.t_train[idx], dtype=torch.long)

class test_dataset(torch.utils.data.Dataset):
    def __init__(self, x_test, t_test, transform):
        self.x_test = x_test
        self.t_test = t_test
        self.transform = transform

    def __len__(self):
        return self.x_test.shape[0]

    def __getitem__(self, idx):
        img = np.uint8(self.x_test[idx]).reshape(28, 28)
        img = self.transform(Image.fromarray(img, mode="L"))
        return img, torch.tensor(self.t_test[idx], dtype=torch.long)

trainval_data = train_dataset(x_train, t_train, transform=data_transform_train)
test_data = test_dataset(x_test, t_test, transform=data_transform_test)

In [65]:
# Dataloader 定義 -> 必要があれば編集してください

batch_size = 128
#バッチサイズ変える？

train_size = len(trainval_data) - val_size
train_data, val_data = torch.utils.data.random_split(trainval_data, [train_size, val_size])

dataloader_train = torch.utils.data.DataLoader(
    train_data,
    batch_size=batch_size,
    shuffle=True
)

dataloader_valid = torch.utils.data.DataLoader(
    val_data,
    batch_size=batch_size,
    shuffle=True
)

dataloader_test = torch.utils.data.DataLoader(
    test_data,
    batch_size=batch_size,
    shuffle=False
)

### 畳み込みニューラルネットワーク(CNN)の実装

- 各レイヤーの定義 -> コードを作成してください．

In [66]:
class Conv(nn.Module):
    def __init__(self, filter_shape, function=lambda x: x, stride=(1, 1), padding=0):
        super().__init__()
        # Heの初期値
        fan_in = filter_shape[1] * filter_shape[2] * filter_shape[3]
        # filter_shape: (出力チャンネル数)x(入力チャンネル数)x(縦の次元数)x(横の次元数)
        fan_out = filter_shape[0] * filter_shape[2] * filter_shape[3]

        self.W = nn.Parameter(torch.tensor(rng.normal(
                        0,
                        np.sqrt(2/fan_in),
                        size=filter_shape
                    ).astype('float32')))

        # バイアスはフィルタごとなので, 出力フィルタ数と同じ次元数
        self.b = nn.Parameter(torch.tensor(np.zeros((filter_shape[0]), dtype='float32')))
        self.function = function
        self.stride = stride
        self.padding = padding
        
    def forward(self, x):
        u = F.conv2d(x, self.W, bias=self.b, stride=self.stride, padding=self.padding)
        return self.function(u)    


class Pooling(nn.Module):
    def __init__(self, ksize=(2, 2), stride=(2, 2), padding=0):
        super().__init__()
        self.ksize = ksize
        self.stride = stride
        self.padding = padding

    def forward(self, x):
        return F.avg_pool2d(x, kernel_size=self.ksize, stride=self.stride, padding=self.padding)
          

class Flatten(nn.Module):
    def __init__(self):
        super().__init__()

    def forward(self, x):
        return x.view(x.size()[0], -1)


class Dense(nn.Module):
    def __init__(self, in_dim, out_dim, function=lambda x: x):
        super().__init__()
        # He Initialization
        # in_dim: 入力の次元数、out_dim: 出力の次元数              
        self.W = nn.Parameter(torch.tensor(rng.normal(
                        0,
                        np.sqrt(2/in_dim),
                        size=(in_dim, out_dim)
                    ).astype('float32')))

        self.b = nn.Parameter(torch.tensor(np.zeros([out_dim]).astype('float32')))
        self.function = function
    def forward(self, x):
        return self.function(torch.matmul(x, self.W) + self.b)

- 畳み込みニューラルネットワークモデル定義 -> コードを作成してください．

In [67]:
class ClassificationModel(nn.Module):
    # WRITE ME
    def __init__(self):
        super(ClassificationModel, self).__init__()
        self.conv = nn.Sequential(
            Conv((20, 1, 5, 5), F.relu),     # 28x28x 1 -> 24x24x20
            Pooling((2, 2)),                 # 24x24x20 -> 12x12x20
            Conv((50, 20, 5, 5), F.relu),    # 12x12x20 ->  8x 8x50
            Pooling((2, 2)),                 #  8x 8x50 ->  4x 4x50
            Flatten(),
            Dense(4*4*50, 10)
        )        

    def forward(self, x):
      x = self.conv(x) 
      return x




- モデルの引用

In [68]:
# モデルの引用
rng = np.random.RandomState(1234)
random_state = 42
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

conv_net = ClassificationModel()
conv_net.to(device)

ClassificationModel(
  (conv): Sequential(
    (0): Conv()
    (1): Pooling()
    (2): Conv()
    (3): Pooling()
    (4): Flatten()
    (5): Dense()
  )
)

### 最適な条件を見つける




In [71]:
def myfunc(train_data, batch_size, opt, layer, schedule, n_epochs=20, P = 0.5):

  # 前処理
  data_transform_test = transforms.Compose([
      transforms.ToTensor()  
  ])

  #augmentation
  data_transform_train = transforms.Compose([
      transforms.ToTensor(),  
      transforms.RandomHorizontalFlip(p=P),
      transforms.RandomVerticalFlip(p=P)    
  ])

  # Dataset定義
  trainval_data = train_dataset(x_train, t_train, transform=data_transform_train)
  test_data = test_dataset(x_test, t_test, transform=data_transform_test)

  #dataloader_trainの呼び出し
  train_size = len(trainval_data) - val_size
  train_data, val_data = torch.utils.data.random_split(trainval_data, [train_size, val_size])

  dataloader_train = torch.utils.data.DataLoader(
      train_data,
      batch_size=batch_size,
      shuffle=True
  )

 
  #optimizer
  if opt=="sgd":
    optimizer = optim.SGD(conv_net.parameters(), lr=0.01,momentum=0.9)# WRITE ME
  if opt=="adam":
    optimizer = optim.Adam(conv_net.parameters(), lr=0.001, eps=1e-08, weight_decay=0, amsgrad=False)  
  if opt =="rms":
    optimizer = optim.RMSprop(conv_net.parameters(), lr=0.001, alpha=0.99, eps=1e-08, weight_decay=0, momentum=0.5, centered=False)


  #layer architecture
  if layer=="low":
    #lenetに似せてる？
    conv_net = nn.Sequential(
        Conv((20, 1, 5, 5), F.relu, stride=(1, 1), padding=0),     # 28x28x 1 -> 24x24x20
        # filter_shape: (出力チャンネル数)x(入力チャンネル数)x(縦の次元数)x(横の次元数)

        #in_channels, out_channels, kernel_size, stride=1, padding=0, dilation=1, groups=1, 
        #bias=True, padding_mode='zeros'
        
        Pooling(ksize=(2, 2), stride=(2, 2), padding=0),          # 24x24x20 -> 12x12x20
        Conv((50, 20, 5, 5), F.relu, stride=(1, 1), padding=0),    # 12x12x20 ->  8x 8x50
        Pooling(ksize=(2, 2), stride=(2, 2), padding=0),          #  8x 8x50 ->  4x 4x50
        Flatten(),
        Dense(4*4*50, 10)
    )
  if layer=="alexnet":
    #alexnet
    conv_net = nn.Sequential(
        Conv((20, 1, 5, 5),F.relu, stride=(1, 1), padding=2),     # 28x28x 1 -> 24x24x20
        Pooling(ksize=(2, 2), stride=(2, 2), padding=0),            #24x24x20 -> 12x12x20
        Conv((64, 20, 5, 5), F.relu, stride=(1, 1), padding=2),    #12x12x20 -> 12x12x64
        Pooling(ksize=(2, 2), stride=(2, 2), padding=0),            #12x12x64 -> 6x6x64
        Conv((128, 64, 3, 3), F.relu, stride=(1, 1), padding=1),   #6x6x64 -> 6x6x128
        Conv((64, 128, 3, 3), F.relu, stride=(1, 1), padding=1),   #6x6x128 -> 6x6x64
        Conv((64, 64, 3, 3), F.relu, stride=(1, 1), padding=1),   #6x6x64 -> 6x6x64
        Pooling(ksize=(2, 2), stride=(2, 2), padding=0),            #3x3x64 -> 3x3x64
        Flatten(),
        Dense(3*3*64, 3*3*64),
        Dense(3*3*64, 3*3*64),
        Dense(3*3*64, 10)
    )
  if layer=="vgg":
    conv_net = nn.Sequential(
        Conv((20, 1, 3, 3),F.relu, stride=(1, 1), padding=1),     # 28x28x 1 -> 28x28x20
        Conv((20, 20, 3, 3),F.relu, stride=(1, 1), padding=1),     # 24x28x 20 -> 28x28x20
        Pooling(ksize=(2, 2), stride=(2, 2), padding=0),            #28x28x20 -> 14x14x20
        Conv((40, 20, 3, 3),F.relu, stride=(1, 1), padding=2),     # 16x16x 40 -> 16x16x40
        Conv((40, 40, 3, 3),F.relu, stride=(1, 1), padding=1),     # 16x16x 40 -> 16x16x40
        Pooling(ksize=(2, 2), stride=(2, 2), padding=0),            #16x16x40 -> 8x8x40
        Conv((80, 40, 3, 3),F.relu, stride=(1, 1), padding=1),     # 8x8x 40 -> 8x8x80
        Conv((80, 80, 3, 3),F.relu, stride=(1, 1), padding=1),     # 8x8x 80 -> 8x8x80
        Conv((80, 80, 3, 3),F.relu, stride=(1, 1), padding=1),     # 8x8x 80 -> 8x8x80
        Pooling(ksize=(2, 2), stride=(2, 2), padding=0),            #8x8x80 -> 4x4x80
        Conv((160, 80, 3, 3),F.relu, stride=(1, 1), padding=1),     # 4x4x 80 -> 4x4x160
        Conv((160, 160, 3, 3),F.relu, stride=(1, 1), padding=1),     # 4x4x 160 -> 4x4x160
        Conv((160, 160, 3, 3),F.relu, stride=(1, 1), padding=1),     # 4x4x 160 -> 4x4x160
        Pooling(ksize=(2, 2), stride=(2, 2), padding=0),            #4x4x160 -> 2x2x160
        Flatten(),
        Dense(2*2*160, 2*2*160),
        Dense(2*2*160, 160),
        Dense(160, 10)
    )
  if layer=="vgg16":
    conv_net = nn.Sequential(
        Conv((20, 1, 3, 3),F.relu, stride=(1, 1), padding=1),     # 28x28x 1 -> 28x28x20
        Conv((20, 20, 3, 3),F.relu, stride=(1, 1), padding=1),     # 24x28x 20 -> 28x28x20
        Pooling(ksize=(2, 2), stride=(2, 2), padding=0),            #28x28x20 -> 14x14x20
        Conv((40, 20, 3, 3),F.relu, stride=(1, 1), padding=2),     # 16x16x 40 -> 16x16x40
        Conv((40, 40, 3, 3),F.relu, stride=(1, 1), padding=1),     # 16x16x 40 -> 16x16x40
        Pooling(ksize=(2, 2), stride=(2, 2), padding=0),            #16x16x40 -> 8x8x40
        Conv((80, 40, 3, 3),F.relu, stride=(1, 1), padding=1),     # 8x8x 40 -> 8x8x80
        Conv((80, 80, 3, 3),F.relu, stride=(1, 1), padding=1),     # 8x8x 80 -> 8x8x80
        Conv((80, 80, 3, 3),F.relu, stride=(1, 1), padding=1),     # 8x8x 80 -> 8x8x80
        Pooling(ksize=(2, 2), stride=(2, 2), padding=0),            #8x8x80 -> 4x4x80
        Conv((160, 80, 3, 3),F.relu, stride=(1, 1), padding=1),     # 4x4x 80 -> 4x4x160
        Conv((160, 160, 3, 3),F.relu, stride=(1, 1), padding=1),     # 4x4x 160 -> 4x4x160
        Conv((160, 160, 3, 3),F.relu, stride=(1, 1), padding=1),     # 4x4x 160 -> 4x4x160
        Pooling(ksize=(2, 2), stride=(2, 2), padding=0),            #4x4x160 -> 2x2x160
        Conv((160, 160, 3, 3),F.relu, stride=(1, 1), padding=1),     # 2x2x 160 -> 2x2x160
        Conv((160, 160, 3, 3),F.relu, stride=(1, 1), padding=1),     # 2x2x 160 -> 2x2x160
        Conv((160, 160, 3, 3),F.relu, stride=(1, 1), padding=1),     # 2x2x 160 -> 2x2x160
        Pooling(ksize=(2, 2), stride=(2, 2), padding=0),            #2x2x160 -> 1x1x160
        Flatten(),
        Dense(1*1*160, 1*1*160),
        Dense(1*1*160, 40),
        Dense(40, 10)
    )
  if layer=="vgg19":
    conv_net = nn.Sequential(
        Conv((20, 1, 3, 3),F.relu, stride=(1, 1), padding=1),     # 28x28x 1 -> 28x28x20
        Conv((20, 20, 3, 3),F.relu, stride=(1, 1), padding=1),     # 24x28x 20 -> 28x28x20
        Pooling(ksize=(2, 2), stride=(2, 2), padding=0),            #28x28x20 -> 14x14x20
        Conv((40, 20, 3, 3),F.relu, stride=(1, 1), padding=2),     # 16x16x 40 -> 16x16x40
        Conv((40, 40, 3, 3),F.relu, stride=(1, 1), padding=1),     # 16x16x 40 -> 16x16x40
        Pooling(ksize=(2, 2), stride=(2, 2), padding=0),            #16x16x40 -> 8x8x40
        Conv((80, 40, 3, 3),F.relu, stride=(1, 1), padding=1),     # 8x8x 40 -> 8x8x80
        Conv((80, 80, 3, 3),F.relu, stride=(1, 1), padding=1),     # 8x8x 80 -> 8x8x80
        Conv((80, 80, 3, 3),F.relu, stride=(1, 1), padding=1),     # 8x8x 80 -> 8x8x80
        Conv((80, 80, 3, 3),F.relu, stride=(1, 1), padding=1),     # 8x8x 80 -> 8x8x80
        Pooling(ksize=(2, 2), stride=(2, 2), padding=0),            #8x8x80 -> 4x4x80
        Conv((160, 80, 3, 3),F.relu, stride=(1, 1), padding=1),     # 4x4x 80 -> 4x4x160
        Conv((160, 160, 3, 3),F.relu, stride=(1, 1), padding=1),     # 4x4x 160 -> 4x4x160
        Conv((160, 160, 3, 3),F.relu, stride=(1, 1), padding=1),     # 4x4x 160 -> 4x4x160
        Conv((160, 160, 3, 3),F.relu, stride=(1, 1), padding=1),     # 4x4x 160 -> 4x4x160
        Pooling(ksize=(2, 2), stride=(2, 2), padding=0),            #4x4x160 -> 2x2x160
        Conv((160, 160, 3, 3),F.relu, stride=(1, 1), padding=1),     # 2x2x 160 -> 2x2x160
        Conv((160, 160, 3, 3),F.relu, stride=(1, 1), padding=1),     # 2x2x 160 -> 2x2x160
        Conv((160, 160, 3, 3),F.relu, stride=(1, 1), padding=1),     # 2x2x 160 -> 2x2x160
        Conv((160, 160, 3, 3),F.relu, stride=(1, 1), padding=1),     # 2x2x 160 -> 2x2x160
        Pooling(ksize=(2, 2), stride=(2, 2), padding=0),            #2x2x160 -> 1x1x160
        Flatten(),
        Dense(1*1*160, 1*1*160),
        Dense(1*1*160, 40),
        Dense(40, 10)
    )



  #schedule
  if schedule=="y":
    if layer == "low": 
      scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=10, gamma=0.1)
    if layer == "vgg16" or layer == "vgg19":
      scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.1)
    else:
      if opt == "adam":
        scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=3, gamma=0.1)
      else:
        scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.1)

  conv_net.to(device)


  #モデルトレーニング
  for epoch in range(n_epochs):
      losses_train = []
      losses_valid = []
      train_num = 0
      train_true_num = 0
      valid_num = 0
      valid_true_num = 0

      processbar = tqdm(total = (train_size // batch_size + 1))
      processbar.set_description("Epoch %02d" % (epoch + 1))
      conv_net.train()  # 訓練時には勾配を計算するtrainモードにする
      for x, t in dataloader_train:
          # WRITE ME 
          #train_num += t.size()[0]

          conv_net.zero_grad()  # 勾配の初期化

          x = x.to(device)  # テンソルをGPUに移動

          t_hot = torch.eye(10)[t]  # 正解ラベルをone-hot vector化

          t_hot = t_hot.to(device)  # 正解ラベルとone-hot vectorをそれぞれGPUに移動

          y = conv_net.forward(x)  # 順伝播

          loss = -(t_hot*torch.log_softmax(y, dim=-1)).sum(axis=1).mean()  # 誤差(クロスエントロピー誤差関数)の計算

          loss.backward()  # 誤差の逆伝播

          optimizer.step()  # パラメータの更新

          pred = y.argmax(1)  # 最大値を取るラベルを予測ラベルとする

          losses_train.append(loss.tolist())

          acc = torch.where(t - pred.to("cpu") == 0, torch.ones_like(t), torch.zeros_like(t))
          train_num += acc.size()[0]
          train_true_num += acc.sum().item()

          processbar.set_postfix(OrderedDict(loss=loss.tolist(), acc=(acc.sum().item()/acc.size()[0])))
          processbar.update(1)

      conv_net.eval()  # 評価時には勾配を計算しないevalモードにする
      for x, t in dataloader_valid:
          # WRITE ME
          #n_val += t.size()[0]

          x = x.to(device)  # テンソルをGPUに移動

          t_hot = torch.eye(10)[t]  # 正解ラベルをone-hot vector化

          t_hot = t_hot.to(device)  # 正解ラベルとone-hot vectorをそれぞれGPUに移動

          y = conv_net.forward(x)  # 順伝播

          loss = -(t_hot*torch.log_softmax(y, dim=-1)).sum(axis=1).mean()  # 誤差(クロスエントロピー誤差関数)の計算

          pred = y.argmax(1)  # 最大値を取るラベルを予測ラベルとする

          losses_valid.append(loss.tolist())

          acc = torch.where(t - pred.to("cpu") == 0, torch.ones_like(t), torch.zeros_like(t))
          valid_num += acc.size()[0]
          valid_true_num += acc.sum().item()

      print('EPOCH: {}, Train [Loss: {:.3f}, Accuracy: {:.3f}], Valid [Loss: {:.3f}, Accuracy: {:.3f}]'.format(
          epoch+1,
          np.mean(losses_train),
          train_true_num/train_num,
          np.mean(losses_valid),
          valid_true_num/valid_num
      ))
      scheduler.step() 
  
  #モデル分類精度テスト 
  conv_net.eval()

  t_pred = []
  test_num = 0
  test_true_num = 0
  for x, t in dataloader_test:
      # Write Me ?
      x = x.to(device)
      y = conv_net.forward(x)
      pred = y.argmax(1) 
      acc = torch.where(t - pred.to("cpu") == 0, torch.ones_like(t), torch.zeros_like(t))
      test_num += acc.size()[0]
      test_true_num += acc.sum().item()

  print("Accuracy on test set: {:.3f}".format(test_true_num/test_num))  

In [None]:
myfunc(train_data, batch_size, opt, layer, schedule, n_epochs=20, P = 0.5)

In [None]:
myfunc(train_data, batch_size, opt, layer, schedule, n_epochs=20, P = 0.5)

### モデルトレーニング
- モデルトレーニングのコードを作成してください．

In [69]:
from tqdm.notebook import tqdm
from collections import OrderedDict
#import torch.optim.lr_scheduler.StepLR as StepLR

n_epochs = 20
optimizer = optim.Adam(conv_net.parameters(), lr=0.0001)
scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=3, gamma=0.1)
#学習率Irをスケジューリング？optによって最適な学習率が違う
#optimizerのアルゴリズムをいくつか試す？
#

for epoch in range(n_epochs):
    losses_train = []
    losses_valid = []
    train_num = 0
    train_true_num = 0
    valid_num = 0
    valid_true_num = 0

    processbar = tqdm(total = (train_size // batch_size + 1))
    processbar.set_description("Epoch %02d" % (epoch + 1))
    conv_net.train()  # 訓練時には勾配を計算するtrainモードにする
    for x, t in dataloader_train:
        # WRITE ME 
        #train_num += t.size()[0]

        conv_net.zero_grad()  # 勾配の初期化

        x = x.to(device)  # テンソルをGPUに移動

        t_hot = torch.eye(10)[t]  # 正解ラベルをone-hot vector化

        t_hot = t_hot.to(device)  # 正解ラベルとone-hot vectorをそれぞれGPUに移動

        y = conv_net.forward(x)  # 順伝播

        loss = -(t_hot*torch.log_softmax(y, dim=-1)).sum(axis=1).mean()  # 誤差(クロスエントロピー誤差関数)の計算

        loss.backward()  # 誤差の逆伝播

        optimizer.step()  # パラメータの更新

        pred = y.argmax(1)  # 最大値を取るラベルを予測ラベルとする

        #acc_train += (pred.to("cpu") == t).float().sum().item()
#ここまで確認

        losses_train.append(loss.tolist())

        acc = torch.where(t - pred.to("cpu") == 0, torch.ones_like(t), torch.zeros_like(t))
        train_num += acc.size()[0]
        train_true_num += acc.sum().item()

        processbar.set_postfix(OrderedDict(loss=loss.tolist(), acc=(acc.sum().item()/acc.size()[0])))
        processbar.update(1)

    conv_net.eval()  # 評価時には勾配を計算しないevalモードにする
    for x, t in dataloader_valid:
        # WRITE ME
        #n_val += t.size()[0]

        x = x.to(device)  # テンソルをGPUに移動

        t_hot = torch.eye(10)[t]  # 正解ラベルをone-hot vector化

        t_hot = t_hot.to(device)  # 正解ラベルとone-hot vectorをそれぞれGPUに移動

        y = conv_net.forward(x)  # 順伝播

        loss = -(t_hot*torch.log_softmax(y, dim=-1)).sum(axis=1).mean()  # 誤差(クロスエントロピー誤差関数)の計算

        pred = y.argmax(1)  # 最大値を取るラベルを予測ラベルとする

        #acc_val += (pred.to("cpu") == t).float().sum().item()
 #ここまで確認

        losses_valid.append(loss.tolist())

        acc = torch.where(t - pred.to("cpu") == 0, torch.ones_like(t), torch.zeros_like(t))
        valid_num += acc.size()[0]
        valid_true_num += acc.sum().item()

    print('EPOCH: {}, Train [Loss: {:.3f}, Accuracy: {:.3f}], Valid [Loss: {:.3f}, Accuracy: {:.3f}]'.format(
        epoch+1,
        np.mean(losses_train),
        train_true_num/train_num,
        np.mean(losses_valid),
        valid_true_num/valid_num
    ))
    scheduler.step()

  0%|          | 0/391 [00:00<?, ?it/s]

EPOCH: 1, Train [Loss: 1.369, Accuracy: 0.550], Valid [Loss: 0.959, Accuracy: 0.678]


  0%|          | 0/391 [00:00<?, ?it/s]

EPOCH: 2, Train [Loss: 0.847, Accuracy: 0.708], Valid [Loss: 0.814, Accuracy: 0.719]


  0%|          | 0/391 [00:00<?, ?it/s]

KeyboardInterrupt: ignored

### モデル分類精度テスト

In [None]:
conv_net.eval()

t_pred = []
test_num = 0
test_true_num = 0
for x, t in dataloader_test:
    # Write Me ?
    x = x.to(device)
    y = conv_net.forward(x)
    pred = y.argmax(1) 
    acc = torch.where(t - pred.to("cpu") == 0, torch.ones_like(t), torch.zeros_like(t))
    test_num += acc.size()[0]
    test_true_num += acc.sum().item()

print("Accuracy on test set: {:.3f}".format(test_true_num/test_num))