In [0]:
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
import torch.autograd as autograd
import torch.nn.functional as F

# PyTorchのネットワーク構築

## モジュール化

In [0]:
class MLP(nn.Module):  # nn.Moduleを継承する
    def __init__(self, in_dim, hid_dim, out_dim):  # __init__をoverride
        super(MLP, self).__init__()
        self.linear1 = nn.Linear(in_dim, hid_dim)
        self.linear2 = nn.Linear(hid_dim, out_dim)
    
    def forward(self, x):  # forwardをoverride
        x = F.relu(self.linear1(x))
        x = torch.sigmoid(self.linear2(x))
        return x

In [0]:
mlp = MLP(2, 3, 1)
mlp

MLP(
  (linear1): Linear(in_features=2, out_features=3, bias=True)
  (linear2): Linear(in_features=3, out_features=1, bias=True)
)

In [0]:
x = torch.Tensor([[0, 0], [0, 1], [1, 0], [1, 1]])
y = mlp(x) 
y

tensor([[0.5269],
        [0.5352],
        [0.5818],
        [0.5896]], grad_fn=<SigmoidBackward>)

In [0]:
# mlp.parameters()でモデルのパラメータ取得
list(mlp.parameters())

[Parameter containing:
 tensor([[-0.6887, -0.5148],
         [ 0.2710, -0.3233],
         [ 0.4694,  0.0850]], requires_grad=True), Parameter containing:
 tensor([-0.0259,  0.2843,  0.5544], requires_grad=True), Parameter containing:
 tensor([[0.3888, 0.0212, 0.4611]], requires_grad=True), Parameter containing:
 tensor([-0.1538], requires_grad=True)]

Q．Linear1とLinear2の間にLinear3を挿入して下さい。<br>
　 Linear3の入力層の次元は hid_dim、出力層の次元もhid_dim として下さい。<br>
　 また、活性化関数はreluを用いて順伝播処理を記載して下さい。


In [0]:
mlp = MLP(2, 3, 1)
mlp

MLP(
  (linear1): Linear(in_features=2, out_features=3, bias=True)
  (linear2): Linear(in_features=3, out_features=1, bias=True)
)

In [0]:
x = torch.Tensor([[0, 0], [0, 1], [1, 0], [1, 1]])
y = mlp(x) 
y

tensor([[0.6288],
        [0.6126],
        [0.5831],
        [0.6016]], grad_fn=<SigmoidBackward>)

In [0]:
# mlp.parameters()でモデルのパラメータ取得
list(mlp.parameters())

[Parameter containing:
 tensor([[-0.5066, -0.3044],
         [ 0.0983, -0.1137],
         [ 0.4075, -0.1291]], requires_grad=True), Parameter containing:
 tensor([ 0.2349,  0.1848, -0.2207], requires_grad=True), Parameter containing:
 tensor([[ 0.4135, -0.2487, -0.3735]], requires_grad=True), Parameter containing:
 tensor([0.4758], requires_grad=True)]

## 最適化

In [0]:
# optimizerの定義
#optimizer = optim.SGD([W1, W2], lr=0.1)
optimizer = optim.SGD(mlp.parameters(), lr=0.1)
 
# 勾配のリセット
optimizer.zero_grad()
 
# パラメータの更新
optimizer.step()

# 学習

In [0]:
# XORをMLPで行う
x = torch.Tensor([[0, 0], [0, 1], [1, 0], [1, 1]])
t = torch.Tensor([0, 1, 1, 0])

# モデルの定義
mlp = MLP(2, 3, 1)

# 誤差関数の定義
criterion = nn.BCELoss()  # Binary Cross Entropy Loss

# 最適化の定義
optimizer = optim.SGD(mlp.parameters(), lr=0.1)  # Moduleのパラメータは.parameters()で取得できる

# モデルを訓練モードにする（Dropout等に関係）
mlp.train()

MLP(
  (linear1): Linear(in_features=2, out_features=3, bias=True)
  (linear2): Linear(in_features=3, out_features=1, bias=True)
)

In [0]:
for i in range(1000):
    # 順伝播
    y_pred = mlp(x)

    # 誤差の計算
    loss = criterion(y_pred, t.unsqueeze(1))
    
    # 逆伝播
    optimizer.zero_grad()
    loss.backward()
    
    # パラメータの更新
    optimizer.step()

    if i % 100 == 0:
        print(i, loss.item())

## モデルの保存・読み込み・再学習

In [0]:
print(list(mlp.parameters()))
print()

# state_dictの取得
state_dict = mlp.state_dict()
print(state_dict)

# モデルの保存
torch.save(state_dict, './model.pth')

In [0]:
# モデルの定義
mlp2 = MLP(2, 3, 1)
print(list(mlp2.parameters()))  # ランダムな初期値
print()

# 学習済みパラメータの読み込み
state_dict = torch.load('./model.pth')
mlp2.load_state_dict(state_dict)
print(list(mlp2.parameters()))  # 学習済みパラメータ

# PyTorchを用いたMLPの実装

これまでのコードを参考・活用し、MNISTを実装するコードを作成して下さい。

In [0]:
from torchvision import datasets, transforms, models
from sklearn.metrics import f1_score

In [0]:
# ハイパーパラメータ
in_dim  = 784
hid_dim = 200
out_dim = 10
lr = 0.001
batch_size = 32
n_epochs = 10

In [0]:
#  torchvision & DataLoader
from torchvision import transforms, datasets
 
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Lambda(lambda x: x.view(in_dim))
])
 
dataloader_train = torch.utils.data.DataLoader(
    datasets.MNIST('~/data/mnist', train=True, download=True, transform=transform),
    batch_size=batch_size,
    shuffle=True
)

dataloader_valid = torch.utils.data.DataLoader(
    datasets.MNIST('~/data/mnist', train=False, download=True, transform=transform),
    batch_size=batch_size,
    shuffle=True
)

Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz to /root/data/mnist/MNIST/raw/train-images-idx3-ubyte.gz


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))


Extracting /root/data/mnist/MNIST/raw/train-images-idx3-ubyte.gz to /root/data/mnist/MNIST/raw
Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz to /root/data/mnist/MNIST/raw/train-labels-idx1-ubyte.gz


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))


Extracting /root/data/mnist/MNIST/raw/train-labels-idx1-ubyte.gz to /root/data/mnist/MNIST/raw
Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz to /root/data/mnist/MNIST/raw/t10k-images-idx3-ubyte.gz


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))


Extracting /root/data/mnist/MNIST/raw/t10k-images-idx3-ubyte.gz to /root/data/mnist/MNIST/raw
Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz to /root/data/mnist/MNIST/raw/t10k-labels-idx1-ubyte.gz


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))


Extracting /root/data/mnist/MNIST/raw/t10k-labels-idx1-ubyte.gz to /root/data/mnist/MNIST/raw
Processing...
Done!


In [0]:
class MLP(nn.Module):
    def __init__(self, in_dim, hid_dim, out_dim):
        super(MLP, self).__init__()
        self.linear1 = nn.Linear(in_dim, hid_dim)
        self.linear2 = nn.Linear(hid_dim, out_dim)
    
    def forward(self, x):
        x = F.relu(self.linear1(x))
        x = F.log_softmax(self.linear2(x), dim=-1)
        return x

In [0]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [0]:
mlp = MLP(in_dim, hid_dim, out_dim).to(device)

In [0]:
# 学習
for epoch in range(n_epochs):
    losses_train = []
    losses_valid = []
    preds_train = []
    preds_valid = []
    trues_train = []
    trues_valid = []
    
    mlp.train()
    for x, t in dataloader_train:
        true = t.tolist()
        trues_train.extend(true)

        # 勾配の初期化
        
        
        # テンソルをGPUに移動
        
        
        
        # 順伝播
        
        
        # 誤差の計算
        
        
        # 誤差の逆伝播
        
        
        # パラメータの更新
        
        
        # モデルの出力を予測値のスカラーに変換
        pred = y.argmax(1).tolist()
        preds_train.extend(pred)
        
        losses_train.append(loss.tolist())
    
    mlp.eval()
    for x, t in dataloader_valid:
        true = t.tolist()
        trues_valid.extend(true)

        # テンソルをGPUに移動
        
        
        
        # 順伝播
        

        # 誤差の計算
        
        
        # モデルの出力を予測値のスカラーに変換
        pred = y.argmax(1).tolist()
        preds_valid.extend(pred)
        
        losses_valid.append(loss.tolist())
        
    print('EPOCH: {}, Train [Loss: {:.3f}, F1: {:.3f}], Valid [Loss: {:.3f}, F1: {:.3f}]'.format(
        epoch,
        np.mean(losses_train),
        f1_score(trues_train, preds_train, average='macro'),
        np.mean(losses_valid),
        f1_score(trues_valid, preds_valid, average='macro')
    ))

In [0]:
criterion = nn.NLLLoss()  # Negative Log Liklihood Loss