# Optimizer

## スクラッチ実装

In [1]:
import torch
import numpy as np
from torch import nn
from torch.nn import functional as F
from sklearn import datasets
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt

In [2]:
# ================MLP自作クラス=================
class MLP(nn.Module):
    def __init__(self, num_in, num_hidden, num_out):
        super().__init__()  # 親クラスであるnn.Moduleのインスタンス変数を呼ぶ
        self.l1 = nn.Linear(in_features=num_in, out_features=num_hidden)  # 隠れ層のLinear層
        self.l2 = nn.Linear(in_features=num_hidden, out_features=num_out)  # 出力層のLinear層
        # モデルにはReLUも必要だが、ReLUはパラメータを持たない関数なので、F.functionで定義する

    # 順伝播の関数
    def forward(self, x):  
        x = self.l2(F.relu(self.l1(x)))
        return x

# モデルの初期化
num_in = 64
num_hidden = 30
num_out = 10
model = MLP(num_in, num_hidden, num_out)

# model.parametes()はGeneratorなのでlistで返せるようにしておく
model.parameters()

<generator object Module.parameters at 0x7f99c0249ba0>

In [3]:
# ================Optimizer自作クラス=================
class Optimizer():

    def __init__(self, parameters, lr=0.03):
        self.parameters = list(parameters)
        self.lr = lr

    def step(self):
        with torch.no_grad():
            for param in self.parameters:
                param -= self.lr * param.grad

    def zero_grad(self):
        for param in self.parameters:
            if param.grad is not None:
                param.grad.zero_()

In [4]:
learning_rate = 0.01
opt = Optimizer(parameters=model.parameters(), lr=learning_rate)

In [5]:
## Refactoring後の学習ループ(OptimizerやDataset, Dataloaderは後ほどRefactoring)
# ===データの準備====
dataset = datasets.load_digits()
data = dataset['data']
target = dataset['target']
images = dataset['images']
X_train, X_val, y_train, y_val = train_test_split(images, target, test_size=0.2, random_state=42)
X_train = (X_train - X_train.mean()) / X_train.std()
X_val = (X_val - X_train.mean()) / X_train.std()
X_train = torch.tensor(X_train.reshape(-1, 64), dtype=torch.float32)
X_val = torch.tensor(X_val.reshape(-1, 64), dtype=torch.float32)
y_train = torch.tensor(y_train)
y_val =torch.tensor(y_val)
batch_size = 30
num_batches = np.ceil(len(y_train)/batch_size).astype(int)

# ログ
train_losses = []
val_losses = []
val_accuracies = []
for epoch in range(100):
    # エポック毎にデータをシャッフル
    shuffled_indices = np.random.permutation(len(y_train))
    
    running_loss = 0.0
    
    for i in range(num_batches):
        
        # mini batch作成
        start = i * batch_size
        end = start + batch_size

        batch_indices = shuffled_indices[start:end]
        y = y_train[batch_indices] # batch_size x 10
        
        X = X_train[batch_indices] # batch_size x 64
        # 順伝播と逆伝播の計算
        preds = model.forward(X) 
        loss = F.cross_entropy(preds, y)
        loss.backward()
        running_loss += loss.item()

        # パラメータ更新  # ============ここの部分をOptimzerクラスに書き換える============
        # with torch.no_grad():
        #     for param in model.parameters(): 
        #         param -= learning_rate * param.grad

        # model.zero_grad()
        opt.step()
        opt.zero_grad()

    # validation
    with torch.no_grad():
        preds_val = model(X_val)  
        val_loss = F.cross_entropy(preds_val, y_val)
        val_accuracy = torch.sum(torch.argmax(preds_val, dim=-1) == y_val) / y_val.shape[0] 

    train_losses.append(running_loss/num_batches)
    val_losses.append(val_loss.item())
    val_accuracies.append(val_accuracy)
    print(f'epoch: {epoch}: train error: {running_loss/num_batches}, validation error: {val_loss.item()}, validation accuracy: {val_accuracy}')

epoch: 0: train error: 2.2959280759096146, validation error: 2.3013250827789307, validation accuracy: 0.19166666269302368
epoch: 1: train error: 2.185075889031092, validation error: 1.8151521682739258, validation accuracy: 0.3194444477558136
epoch: 2: train error: 2.0629258677363396, validation error: 1.4268486499786377, validation accuracy: 0.46388888359069824
epoch: 3: train error: 1.9155480787158012, validation error: 1.1287775039672852, validation accuracy: 0.6027777791023254
epoch: 4: train error: 1.74362313747406, validation error: 0.9431319236755371, validation accuracy: 0.6472222208976746
epoch: 5: train error: 1.564373475809892, validation error: 0.8413791060447693, validation accuracy: 0.6694444417953491
epoch: 6: train error: 1.3898871317505836, validation error: 0.7255154848098755, validation accuracy: 0.7416666746139526
epoch: 7: train error: 1.2285650844375293, validation error: 0.6491394639015198, validation accuracy: 0.7888888716697693
epoch: 8: train error: 1.082624978

## torch.optim

In [6]:
from torch import optim
opt = optim.SGD(model.parameters(), lr=learning_rate)

In [7]:
## Refactoring後の学習ループ(OptimizerやDataset, Dataloaderは後ほどRefactoring)
# ===データの準備====
dataset = datasets.load_digits()
data = dataset['data']
target = dataset['target']
images = dataset['images']
X_train, X_val, y_train, y_val = train_test_split(images, target, test_size=0.2, random_state=42)
X_train = (X_train - X_train.mean()) / X_train.std()
X_val = (X_val - X_train.mean()) / X_train.std()
X_train = torch.tensor(X_train.reshape(-1, 64), dtype=torch.float32)
X_val = torch.tensor(X_val.reshape(-1, 64), dtype=torch.float32)
y_train = torch.tensor(y_train)
y_val =torch.tensor(y_val)
batch_size = 30
num_batches = np.ceil(len(y_train)/batch_size).astype(int)



# ログ
train_losses = []
val_losses = []
val_accuracies = []
for epoch in range(100):
    # エポック毎にデータをシャッフル
    shuffled_indices = np.random.permutation(len(y_train))
    
    running_loss = 0.0
    
    for i in range(num_batches):
        
        # mini batch作成
        start = i * batch_size
        end = start + batch_size

        batch_indices = shuffled_indices[start:end]
        y = y_train[batch_indices] # batch_size x 10
        
        X = X_train[batch_indices] # batch_size x 64
        # 順伝播と逆伝播の計算
        preds = model.forward(X) 
        loss = F.cross_entropy(preds, y)
        loss.backward()
        running_loss += loss.item()

        # パラメータ更新  # ============ここの部分をOptimzerクラスに書き換える============
        # with torch.no_grad():
        #     for param in model.parameters(): 
        #         param -= learning_rate * param.grad

        # model.zero_grad()
        opt.step()
        opt.zero_grad()

    # validation
    with torch.no_grad():
        preds_val = model(X_val)  
        val_loss = F.cross_entropy(preds_val, y_val)
        val_accuracy = torch.sum(torch.argmax(preds_val, dim=-1) == y_val) / y_val.shape[0] 

    train_losses.append(running_loss/num_batches)
    val_losses.append(val_loss.item())
    val_accuracies.append(val_accuracy)
    print(f'epoch: {epoch}: train error: {running_loss/num_batches}, validation error: {val_loss.item()}, validation accuracy: {val_accuracy}')

epoch: 0: train error: 0.08290219120681286, validation error: 0.522064745426178, validation accuracy: 0.9361110925674438
epoch: 1: train error: 0.08210546263338377, validation error: 0.5342123508453369, validation accuracy: 0.9361110925674438
epoch: 2: train error: 0.08127990520248811, validation error: 0.5317471027374268, validation accuracy: 0.9361110925674438
epoch: 3: train error: 0.08050674840342253, validation error: 0.5266197323799133, validation accuracy: 0.9361110925674438
epoch: 4: train error: 0.07995347950297098, validation error: 0.5186716914176941, validation accuracy: 0.9361110925674438
epoch: 5: train error: 0.07918641312668721, validation error: 0.5206379294395447, validation accuracy: 0.9388889074325562
epoch: 6: train error: 0.07863732206169516, validation error: 0.5513700842857361, validation accuracy: 0.9361110925674438
epoch: 7: train error: 0.07801462104544044, validation error: 0.5447068810462952, validation accuracy: 0.9388889074325562
epoch: 8: train error: 0.