In [None]:
# Preparation for auto-grading
unit = "AI7"
!pip install otter-grader 
!wget https://github.com/keisuke-yanagisawa/isct-efdsai/raw/refs/heads/main/supplementary_materials/{unit}/tests.tar.gz -O {unit}.tar.gz
!tar -zxvf {unit}.tar.gz 
!rm {unit}.tar.gz
import otter
grader = otter.Notebook()

# 基盤人工知能演習 第7回 畳み込みニューラルネットワーク 補習資料

# Supplemental Material: Exercises in Fundamentals of Data Science (7) - 	AI7: Convolutional Neural Network

## 1 | CNNの構成要素のおさらい

## 1 | Review of CNN Components

#### 練習問題 1-1 | 畳み込み計算の実装

#### Exercise 1-1 | Implementation of Convolution Calculation

2次元画像の畳み込み演算を行う `conv(f,g)` を実装せよ。ただし、`f` は入力行列、`g` はフィルタである。`f`および`g`は二次元のNumPy配列であり、`f`のサイズは`g`よりも大きいことを仮定して良い。

ヒント：`f`から、`g`と同じサイズの部分行列を取得するのは `f[i:i+g.shape[0], j:j+g.shape[1]]` で行うことができる（`i`および`j`の範囲には注意せよ）。

Implement `conv(f,g)` that performs convolution operations on 2D images. Here, `f` is the input matrix and `g` is the filter. It is assumed that both `f` and `g` are 2D NumPy arrays and that the size of `f` is larger than that of `g`.

Hint: You can obtain a submatrix of the same size as `g` from `f` using `f[i:i+g.shape[0], j:j+g.shape[1]]` (be careful about the ranges of `i` and `j`).

In [None]:
# answer column
import numpy as np
import numpy.typing as npt

def conv(f: npt.NDArray[np.float64], g: npt.NDArray[np.float64]) -> npt.NDArray[np.float64]:
  ...


In [None]:
grader.check("Exercise 1-1")

#### 練習問題 1-2 | paddingの実装

#### Exercise 1-2 | Implementation of Padding

2次元画像にパディングを追加する `padding(f, p)` を実装せよ。`f`は入力行列、`p`はパディングの幅（正の偶数）であり、例えば `p = 2`とは、行列fの周囲1マスずつに0を追加することを意味する。（授業資料も参照せよ）

Implement `padding(f, p)` that adds padding to a 2D image. Here, `f` is the input matrix, and `p` is the width of the padding (a positive even number). For example, `p = 2` means adding 0s around the matrix `f` by one unit on each side. (Refer to the class materials as well.)

In [None]:
# answer column
import numpy as np
import numpy.typing as npt

def padding(f: npt.NDArray[np.float64], p: int) -> npt.NDArray[np.float64]:
  ...


In [None]:
grader.check("Exercise 1-2")

#### 練習問題 1-3 | flattenの実装

#### Exercise 1-3 | Implementation of Flatten

複数チャネルを持つ2次元画像 `f` を1次元配列に変換する `flatten(f)` を**NumPy配列の関数（メソッド） `f.flatten()` を利用せずに**実装せよ。例えば `f` は `f.shape=(1,28,28)` の形状を持つものとする。

Implement `flatten(f)` that converts a multi-channel 2D image `f` into a 1D array **without using the NumPy array method `f.flatten()`**. For example, `f` should have the shape `f.shape=(1,28,28)`.

In [None]:
# answer column
import numpy.typing as npt

def flatten(f: npt.NDArray[np.float64]) -> npt.NDArray[np.float64]:
  ...


In [None]:
grader.check("Exercise 1-3")

## 2 | CNNを用いた手書き数字の分類予測

## 2 | Classification Prediction Using CNN for handwritten digits

#### 事前準備

#### Preparation

In [None]:
!pip install livelossplot

In [None]:
# データセットの準備(1)

from sklearn import datasets
from sklearn.model_selection import train_test_split
import torch

digits = datasets.load_digits()
q2_X = digits.images
q2_y = digits.target
q2_X_train, q2_X_test, q2_y_train, q2_y_test = train_test_split(q2_X, q2_y, test_size=0.2, random_state=42)
q2_X_subtrain, q2_X_val, q2_y_subtrain, q2_y_val = train_test_split(q2_X_train, q2_y_train, test_size=0.1, random_state=42)
q2_X_subtrain_torch = torch.tensor(q2_X_subtrain.reshape((-1,1,8,8)), dtype=torch.float32) / 15 # 0~1の範囲に正規化
q2_X_val_torch = torch.tensor(q2_X_val.reshape((-1,1,8,8)), dtype=torch.float32) / 15 # 0~1の範囲に正規化
q2_X_test_torch = torch.tensor(q2_X_test.reshape((-1,1,8,8)), dtype=torch.float32) / 15   # 0~1の範囲に正規化
q2_y_subtrain_torch = torch.tensor(q2_y_subtrain, dtype=torch.long)
q2_y_val_torch = torch.tensor(q2_y_val, dtype=torch.long)
q2_y_test_torch = torch.tensor(q2_y_test, dtype=torch.long)

In [None]:
# データセットの準備(2)
batch_size=32
q2_subtrain_dataset = torch.utils.data.TensorDataset(q2_X_subtrain_torch, q2_y_subtrain_torch)
q2_val_dataset = torch.utils.data.TensorDataset(q2_X_val_torch, q2_y_val_torch)
q2_test_dataset = torch.utils.data.TensorDataset(q2_X_test_torch, q2_y_test_torch)
q2_subtrain_loader = torch.utils.data.DataLoader(q2_subtrain_dataset, batch_size=batch_size)
q2_val_loader = torch.utils.data.DataLoader(q2_val_dataset, batch_size=batch_size)
q2_test_loader = torch.utils.data.DataLoader(q2_test_dataset, batch_size=batch_size)

In [None]:
def update_model(model, loss_fn, opt, train_loader):
  train_loss = 0
  train_correct = 0
  train_count = len(train_loader.dataset)

  for X, y in train_loader:
    y_pred = model(X) # Predict y from X

    _, predicted = torch.max(y_pred.data, 1) # Get the one with the largest probability out of the 10 classes of predicted probabilities
    train_correct += (predicted == y).sum().item() # Count the number of successful predictions (for accuracy calculation)

    loss = loss_fn(y_pred, y)        # Calculate the average of the training losses in a mini-batch
    train_loss += loss.item()*len(y) # Calculate the total training loss for the entire epoch

    # Update weights
    opt.zero_grad()
    loss.backward()
    opt.step()

  # Calculate the average training loss and prediction accuracy within an epoch
  mean_train_loss = train_loss / train_count
  train_accuracy = train_correct / train_count
  
  return mean_train_loss, train_accuracy

In [None]:
def evaluate_model(model, loss_fn, dataloader):
  model.eval() # Set to “evaluate” state when training is not performed (Supplementary Material S1)

  valid_loss = 0
  valid_correct = 0
  valid_count = len(dataloader.dataset)

  for X, y in dataloader:
    y_pred = model(X) # Predict y from X

    _, predicted = torch.max(y_pred.data, 1) # Get the one with the largest probability out of the 10 classes of predicted probabilities
    valid_correct += (predicted == y).sum().item() # Count the number of successful predictions (for accuracy calculation)

    loss = loss_fn(y_pred, y)        # Calculate the average of the training losses in a mini-batch
    valid_loss += loss.item()*len(y) # Calculate the total training loss for the entire epoch

  mean_valid_loss = valid_loss / valid_count
  valid_accuracy = valid_correct / valid_count

  model.train() # Return from the “evaluate” state to the “train” state
  return mean_valid_loss, valid_accuracy

In [None]:
from livelossplot import PlotLosses

def train(model, loss_fn, opt, train_loader, valid_loader, epoch=50):
  liveloss = PlotLosses()
  for i in range(epoch):
    train_loss, train_accuracy = update_model(model, loss_fn, opt, train_loader)
    valid_loss, valid_accuracy = evaluate_model(model, loss_fn, valid_loader)

    # Visualize the loss and accuracy values.
    liveloss.update({
        'loss': train_loss,
        'val_loss': valid_loss,
        'accuracy': train_accuracy,
        'val_accuracy': valid_accuracy,
    })
    liveloss.draw()  
  print('Accuracy: {:.4f} (valid), {:.4f} (train)'.format(valid_accuracy, train_accuracy))
  return model # Return the trained model

#### 練習問題 2-1 | digitsデータを入力できるネットワークの作成(1)：flattenまでの実装

#### Exercise 2-1 | Create a Network That Can Input Digits Data (1): Implementation Up to Flatten

事前準備で用意したデータは1チャネル・縦横8ピクセルずつの画像データである。このデータを入力できる1層の畳み込み層を作成せよ。カーネルサイズ（フィルタサイズ）や出力チャネル数などは問わない。

The data prepared in advance consists of 1-channel images, each 8 pixels wide and tall. Create a single convolutional layer that can accept this data. The kernel size (filter size) and number of output channels are not specified.

In [None]:
# answer column
q21_partial = torch.nn.Sequential(
    torch.nn.Conv2d( ... ),
    torch.nn.ReLU(),
    torch.nn.Flatten(),
)

In [None]:
grader.check("Exercise 2-1")

#### 練習問題 2-2 | digitsデータを入力できるネットワークの作成(2)：全体の作成

#### Exercise 2-2 | Create a Network That Can Input Digits Data (2): Overall Creation

畳み込み層を1層以上用いて `["0","1",...,"9"]` の10クラス分類を行うことができるニューラルネットワーク `q22_cnn` を構築せよ。

Construct a neural network `q22_cnn` that can perform 10-class classification of the digits ["0","1",...,"9"] using one or more convolutional layers.

In [None]:
# answer column
import torch
torch.manual_seed(0) # Ensure reproducibility of training results

q22_cnn = torch.nn.Sequential(
    ...
)

In [None]:
grader.check("Exercise 2-2")

#### 練習問題 2-3 | digitsデータを用いた学習の実施

#### Exercise 2-3 | Implement Training Using Digits Data

練習問題 2-2 で作成した `q22_cnn` の学習を実施せよ。ただし、損失関数は交差エントロピー誤差 `torch.nn.CrossEntropyLoss()`とし、確率的勾配降下法による重み更新（学習率 `lr=0.1`）、学習は50エポック行え。

`q22_cnn` については、検証データの予測正解率が90%以上になるように適宜修正すること。

Conduct training of `q22_cnn` created in Exercise 2-2. Use the cross-entropy loss function `torch.nn.CrossEntropyLoss()` and update weights using stochastic gradient descent (learning rate `lr=0.1`). Train for 50 epochs.

Adjust `q22_cnn` as necessary to ensure that the validation data prediction accuracy exceeds 90%.

In [None]:
# answer column
import torch
torch.manual_seed(0) # Ensure reproducibility of training results

# Prepare loss functions and optimization methods
q23_loss_fn = ...
q23_optimizer = ...
q23_trained_model = ...

In [None]:
grader.check("Exercise 2-3")

#### 練習問題 2-4 | テストデータによる汎化性能評価

#### Exercise 2-4 | Generalization Performance Evaluation Using Test Data

練習問題 2-3 で訓練したモデル `q23_trained_model` を使ってテストデータを予測した時の予測正解率を `q24_test_accuracy` に記録せよ。なお、このテストデータに対する予測正解率をもとに `q23_trained_model` を更新することを禁ずる。（テストデータは本来答えを知らないデータであり、最良モデルの探索には使えないことに留意せよ）

Record the prediction accuracy `q24_test_accuracy` when predicting test data using the model `q23_trained_model`. Note that updating `q23_trained_model` based on this prediction accuracy is prohibited (test data is originally data that should not be known, so it should not be used for searching for the best model).

In [None]:
# answer column
...
q24_test_accuracy # show the test accuracy

In [None]:
grader.check("Exercise 2-4")

#### 練習問題 2-5 | 単層パーセプトロンとの比較

#### Exercise 2-5 | Comparison with Single-Layer Perceptron

以下に実装した単層パーセプトロン `q25_slp` の訓練を行い、テストデータに対する予測正解率 `q25_test_accuracy` を算出、CNNの予測正解率と比較せよ。ただし、単層パーセプトロンの訓練において、損失関数は交差エントロピー誤差 `torch.nn.CrossEntropyLoss()`とし、確率的勾配降下法による重み更新（学習率 `lr=0.1`）、学習は50エポック行え。

Train the implemented single-layer perceptron `q25_slp` below and calculate the prediction accuracy `q25_test_accuracy` for the test data, comparing it with the CNN's prediction accuracy. Note that for training the single-layer perceptron, the loss function must be the cross-entropy loss `torch.nn.CrossEntropyLoss()`, and weight updates must be made using stochastic gradient descent (learning rate `lr=0.1`), training for 50 epochs.

In [None]:
# answer column
# prepare the single layer perceptron model
import torch
torch.manual_seed(0) # Ensure reproducibility of training results

q25_slp = torch.nn.Sequential(
    torch.nn.Flatten(),
    torch.nn.Linear(1*8*8, 10),
)

In [None]:
# answer column
# Prepare loss functions and optimization methods
# and train the model
q25_loss_fn = ...
q25_optimizer = ...
q25_trained_model = ...

In [None]:
# answer column
# calculate the test accuracy `q25_test_accuracy``

...
q25_test_accuracy # show the test accuracy

In [None]:
grader.check("Exercise 2-5")