## 重み初期化の比較
ランダム (一様分布による初期化)とxavierによる初期化を比較

### モデルの定義

In [20]:
import random

import torch
import torch.utils.data
from torch import nn
from torch import optim

class LinearLayer(nn.Module):
    """
    単層モデルクラス
    """
    def __init__(self, init_val, xavier=False):
        super(LinearLayer, self).__init__()
        self.linear = torch.nn.Linear(10, 1) # wx+b 初期パラメータはランダム
        self.linear.weight = torch.nn.Parameter(torch.reshape(torch.tensor(init_val), (1, 10)))
        if xavier: # フラグでxaviarありなしを設定
            nn.init.xavier_uniform_(self.linear.weight)

        # 損失関数
        self.loss_func = nn.MSELoss()

        
    def forward(self, input_vector):
        """
        順伝搬
        :param input_vector: 入力ベクトル
        :return tanh_w3x: モデルの出力
        """
        wx = self.linear(input_vector)
        return wx

    
    def forward_loss(self, input_vector, target_values):
        """
        順伝搬 + 損失計算
        :param input_vector: 入力ベクトル
        :param target_values: 正解の値
        :return loss:損失
        """
        wx = self.forward(input_vector)
        loss = self.loss_func(wx, target_values)
        return loss

### 学習
xavierと一様分布でそれぞれ初期化したモデルで同様に学習をして損失を比較<br />
複数回学習を実行して最終的な損失の平均を比較

In [22]:
def train(train_data, minibatch_size, linear_model_xavier, linear_model):
    # 損失の最小化のための最適化手法
    op_xavier = optim.SGD(linear_model_xavier.parameters(), lr=0.1) # lr:learning rate (学習率)
    op = optim.SGD(linear_model.parameters(), lr=0.1) # lr:learning rate (学習率)

    train_data_size = len(train_data)
    max_batch_no = train_data_size // minibatch_size
    # 学習
    # エポック
    for epoch in range(100):
        #print("epoch: ", epoch)

        # ミニバッチ学習のためのデータローダー
        train_data_loader = torch.utils.data.DataLoader(train_data, batch_size=minibatch_size, shuffle=True)
        
        # ミニバッチごとに逆伝搬とパラメータ更新
        # イテレーション
        for batch_no, (batch_input_vector, batch_target_values) in enumerate(train_data_loader):            
            # 順伝搬と損失計算
            loss_xavier = linear_model_xavier.forward_loss(batch_input_vector, batch_target_values)
            loss = linear_model.forward_loss(batch_input_vector, batch_target_values)
            #print("batch_no: {}/{} loss_xavier: {}, loss: {}".format(batch_no+1, max_batch_no, loss_xavier, los))

            # 逆伝搬 (勾配の設定)
            loss_xavier.backward()
            loss.backward()

            # パラメータの更新
            op_xavier.step()
            op.step()

            # 勾配の消去
            linear_model_xavier.zero_grad()
            linear_model.zero_grad()

        #print("="*10)
    
    return loss_xavier, loss


In [23]:
init_val = [random.random() for _ in range(10)]

# モデル
linear_model_xavier = LinearLayer(init_val, xavier=True)
linear_model = LinearLayer(init_val)

# 入力とラベル
input_vector = torch.rand(50, 10)
target_values = torch.reshape(torch.tensor([[i[0]+1 for i in input_vector]]), (50, 1))
train_data = torch.utils.data.TensorDataset(input_vector, target_values)

# ミニバッチサイズ
minibatch_size = 10

# 30回試行して後で平均をとる
loss_xavier_list = []
loss_list = []
for i in range(30):
    loss_xavier, loss = train(train_data, minibatch_size, linear_model_xavier, linear_model)
    loss_xavier_list.append(float(loss_xavier))
    loss_list.append(float(loss))
    print("test {}: loss_xavier {}, loss {}".format(i, float(loss_xavier), float(loss)))


test 0: loss_xavier 1.669778794166632e-05, loss 8.861310925567523e-05
test 1: loss_xavier 4.0351494590140646e-08, loss 5.493638468578865e-07
test 2: loss_xavier 1.7073205349493747e-10, loss 2.4228157258221472e-09
test 3: loss_xavier 1.7621459304749398e-13, loss 2.3291590010854346e-12
test 4: loss_xavier 9.5212723881348e-14, loss 1.4637180627708607e-13
test 5: loss_xavier 8.668621240747951e-14, loss 8.526512829121202e-14
test 6: loss_xavier 1.3500311979441904e-13, loss 1.2789769243681803e-13
test 7: loss_xavier 6.252776210214153e-14, loss 4.831690738693953e-14
test 8: loss_xavier 1.0089706712268151e-13, loss 9.805489889014654e-14
test 9: loss_xavier 4.6895819882540254e-14, loss 4.6895819882540254e-14
test 10: loss_xavier 6.252776210214153e-14, loss 7.531753270107605e-14
test 11: loss_xavier 5.4001247240141256e-14, loss 5.684341886080802e-14
test 12: loss_xavier 4.6895819882540254e-14, loss 5.684341886080802e-14
test 13: loss_xavier 1.7053026335868762e-14, loss 1.9895196262469626e-14
tes

In [24]:
# 平均を出力
print("loss_xavier_ave: {}, loss_ave: {}".format(sum(loss_xavier_list)/len(loss_xavier_list), 
                                                 sum(loss_list)/len(loss_list)))

loss_xavier_ave: 5.579437306362417e-07, loss_ave: 2.9721633281572666e-06
