In [0]:
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
import torch.autograd as autograd
import torch.nn.functional as F

# PyTorchのネットワーク構築

## モジュール化

In [0]:
class MLP(nn.Module):  # nn.Moduleを継承する
    def __init__(self, in_dim, hid_dim, out_dim):  # __init__をoverride
        super(MLP, self).__init__()
        self.linear1 = nn.Linear(in_dim, hid_dim)
        self.linear2 = nn.Linear(hid_dim, out_dim)
    
    def forward(self, x):  # forwardをoverride
        x = F.relu(self.linear1(x))
        x = torch.sigmoid(self.linear2(x))
        return x

In [0]:
mlp = MLP(2, 3, 1)
mlp

MLP(
  (linear1): Linear(in_features=2, out_features=3, bias=True)
  (linear2): Linear(in_features=3, out_features=1, bias=True)
)

In [0]:
x = torch.Tensor([[0, 0], [0, 1], [1, 0], [1, 1]])
y = mlp(x) 
y

tensor([[0.6415],
        [0.6321],
        [0.6839],
        [0.6749]], grad_fn=<SigmoidBackward>)

In [0]:
# mlp.parameters()でモデルのパラメータ取得
list(mlp.parameters())

[Parameter containing:
 tensor([[-0.2056, -0.0538],
         [ 0.6857, -0.0459],
         [-0.0032,  0.2250]], requires_grad=True), Parameter containing:
 tensor([ 0.4665,  0.5142, -0.4793], requires_grad=True), Parameter containing:
 tensor([[ 0.4190,  0.4020, -0.4493]], requires_grad=True), Parameter containing:
 tensor([0.1799], requires_grad=True)]

Q．Linear1とLinear2の間にLiner3を挿入して下さい。<br>
　 Linear3の入力層の次元は hid_dim、出力層の次元もhid_dim として下さい。<br>
　 また、活性化関数はreluを用いて順伝播処理を記載して下さい。


In [0]:
class MLP(nn.Module):  # nn.Moduleを継承する
    def __init__(self, in_dim, hid_dim, out_dim):  # __init__をoverride
        super(MLP, self).__init__()
        self.linear1 = nn.Linear(in_dim, hid_dim)
        self.linear3 = nn.Linear(hid_dim, hid_dim)
        self.linear2 = nn.Linear(hid_dim, out_dim)
    
    def forward(self, x):  # forwardをoverride
        x = F.relu(self.linear1(x))
        x = F.relu(self.linear3(x))
        x = torch.sigmoid(self.linear2(x))
        return x

In [0]:
mlp = MLP(2, 3, 1)
mlp

MLP(
  (linear1): Linear(in_features=2, out_features=3, bias=True)
  (linear3): Linear(in_features=3, out_features=3, bias=True)
  (linear2): Linear(in_features=3, out_features=1, bias=True)
)

In [0]:
x = torch.Tensor([[0, 0], [0, 1], [1, 0], [1, 1]])
y = mlp(x) 
y

tensor([[0.5914],
        [0.6074],
        [0.6032],
        [0.6203]], grad_fn=<SigmoidBackward>)

In [0]:
# mlp.parameters()でモデルのパラメータ取得
list(mlp.parameters())

[Parameter containing:
 tensor([[ 0.4886,  0.4904],
         [-0.1855,  0.3246],
         [ 0.3962, -0.0270]], requires_grad=True), Parameter containing:
 tensor([ 0.2803, -0.1896, -0.4594], requires_grad=True), Parameter containing:
 tensor([[ 0.4381,  0.3960, -0.1258],
         [ 0.4643,  0.4595, -0.2213],
         [-0.2678,  0.2309,  0.1116]], requires_grad=True), Parameter containing:
 tensor([-0.3574, -0.0534, -0.1152], requires_grad=True), Parameter containing:
 tensor([[0.1148, 0.2168, 0.5229]], requires_grad=True), Parameter containing:
 tensor([0.3532], requires_grad=True)]

## 最適化

In [0]:
# optimizerの定義
#optimizer = optim.SGD([W1, W2], lr=0.1)
optimizer = optim.SGD(mlp.parameters(), lr=0.1)
 
# 勾配のリセット
optimizer.zero_grad()
 
# パラメータの更新
optimizer.step()

# 学習

In [0]:
# XORをMLPで行う
x = torch.Tensor([[0, 0], [0, 1], [1, 0], [1, 1]])
t = torch.Tensor([0, 1, 1, 0])

# モデルの定義
mlp = MLP(2, 3, 1)

# 誤差関数の定義
criterion = nn.BCELoss()  # Binary Cross Entropy Loss

# 最適化の定義
optimizer = optim.SGD(mlp.parameters(), lr=0.1)  # Moduleのパラメータは.parameters()で取得できる

# モデルを訓練モードにする（Dropout等に関係）
mlp.train()

MLP(
  (linear1): Linear(in_features=2, out_features=3, bias=True)
  (linear3): Linear(in_features=3, out_features=3, bias=True)
  (linear2): Linear(in_features=3, out_features=1, bias=True)
)

In [0]:
for i in range(1000):
    # 順伝播
    y_pred = mlp(x)

    # 誤差の計算
    loss = criterion(y_pred, t.unsqueeze(1))
    
    # 逆伝播
    optimizer.zero_grad()
    loss.backward()
    
    # パラメータの更新
    optimizer.step()

    if i % 100 == 0:
        print(i, loss.item())

0 0.6942846775054932
100 0.6841785907745361
200 0.6522042155265808
300 0.5632504820823669
400 0.5013538599014282
500 0.48634791374206543
600 0.48212167620658875
700 0.4805053174495697
800 0.47939532995224
900 0.47890934348106384


## モデルの保存・読み込み・再学習

In [0]:
print(list(mlp.parameters()))
print()

# state_dictの取得
state_dict = mlp.state_dict()
print(state_dict)

# モデルの保存
torch.save(state_dict, './model.pth')

[Parameter containing:
tensor([[-0.0951,  0.0600],
        [ 1.2153,  1.4018],
        [-0.5421, -0.2662]], requires_grad=True), Parameter containing:
tensor([-3.4029e-01, -2.3073e-04,  8.6199e-01], requires_grad=True), Parameter containing:
tensor([[-0.0516, -1.6340,  1.0783],
        [ 0.0906, -0.4099,  0.5750],
        [-0.2943,  0.0362, -0.2752]], requires_grad=True), Parameter containing:
tensor([ 1.5578, -0.0737, -0.1527], requires_grad=True), Parameter containing:
tensor([[-2.3417, -0.4039,  0.5517]], requires_grad=True), Parameter containing:
tensor([0.6842], requires_grad=True)]

OrderedDict([('linear1.weight', tensor([[-0.0951,  0.0600],
        [ 1.2153,  1.4018],
        [-0.5421, -0.2662]])), ('linear1.bias', tensor([-3.4029e-01, -2.3073e-04,  8.6199e-01])), ('linear3.weight', tensor([[-0.0516, -1.6340,  1.0783],
        [ 0.0906, -0.4099,  0.5750],
        [-0.2943,  0.0362, -0.2752]])), ('linear3.bias', tensor([ 1.5578, -0.0737, -0.1527])), ('linear2.weight', tensor([[-2

In [0]:
# モデルの定義
mlp2 = MLP(2, 3, 1)
print(list(mlp2.parameters()))  # ランダムな初期値
print()

# 学習済みパラメータの読み込み
state_dict = torch.load('./model.pth')
mlp2.load_state_dict(state_dict)
print(list(mlp2.parameters()))  # 学習済みパラメータ

[Parameter containing:
tensor([[-0.0662,  0.6434],
        [ 0.0107, -0.6635],
        [ 0.4920, -0.3294]], requires_grad=True), Parameter containing:
tensor([-0.2022, -0.3924, -0.4086], requires_grad=True), Parameter containing:
tensor([[ 0.0407,  0.3587, -0.0633],
        [ 0.5212, -0.0252,  0.0422],
        [-0.5043, -0.2827, -0.3099]], requires_grad=True), Parameter containing:
tensor([-0.0831, -0.4740,  0.2923], requires_grad=True), Parameter containing:
tensor([[ 0.0351,  0.4321, -0.1992]], requires_grad=True), Parameter containing:
tensor([0.4525], requires_grad=True)]

[Parameter containing:
tensor([[-0.0951,  0.0600],
        [ 1.2153,  1.4018],
        [-0.5421, -0.2662]], requires_grad=True), Parameter containing:
tensor([-3.4029e-01, -2.3073e-04,  8.6199e-01], requires_grad=True), Parameter containing:
tensor([[-0.0516, -1.6340,  1.0783],
        [ 0.0906, -0.4099,  0.5750],
        [-0.2943,  0.0362, -0.2752]], requires_grad=True), Parameter containing:
tensor([ 1.5578, -0

# PyTorchを用いたMLPの実装

これまでのコードを参考・活用し、MNISTを実装するコードを作成して下さい。

In [0]:
from torchvision import datasets, transforms, models
from sklearn.metrics import f1_score

In [0]:
# 参考
in_dim  = 784
hid_dim = 200
out_dim = 10
lr = 0.001
batch_size = 32
n_epochs = 10

In [9]:
#  torchvision & DataLoader
from torchvision import transforms, datasets
 
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Lambda(lambda x: x.view(in_dim))
])
 
dataloader = torch.utils.data.DataLoader(
    datasets.MNIST('~/data/mnist', train=True, download=True, transform=transform),
    batch_size=batch_size,
    shuffle=True
)

Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz to /root/data/mnist/MNIST/raw/train-images-idx3-ubyte.gz


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))


Extracting /root/data/mnist/MNIST/raw/train-images-idx3-ubyte.gz to /root/data/mnist/MNIST/raw
Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz to /root/data/mnist/MNIST/raw/train-labels-idx1-ubyte.gz


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))


Extracting /root/data/mnist/MNIST/raw/train-labels-idx1-ubyte.gz to /root/data/mnist/MNIST/raw
Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz to /root/data/mnist/MNIST/raw/t10k-images-idx3-ubyte.gz


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))


Extracting /root/data/mnist/MNIST/raw/t10k-images-idx3-ubyte.gz to /root/data/mnist/MNIST/raw
Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz to /root/data/mnist/MNIST/raw/t10k-labels-idx1-ubyte.gz


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))


Extracting /root/data/mnist/MNIST/raw/t10k-labels-idx1-ubyte.gz to /root/data/mnist/MNIST/raw
Processing...
Done!


In [10]:
# 参考
for epoch in range(n_epochs):
    losses_train = []
    losses_valid = []
    preds_train = []
    preds_valid = []
    trues_train = []
    trues_valid = []
    
    mlp.train()
    for x, t in dataloader_train:
        true = t.tolist()
        trues_train.extend(true)

        # 勾配の初期化
        
        
        # テンソルをGPUに移動
        
        
        
        # 順伝播
        
        
        # 誤差の計算
        
        
        # 誤差の逆伝播
        
        
        # パラメータの更新
        
        
        # モデルの出力を予測値のスカラーに変換
        pred = y.argmax(1).tolist()
        preds_train.extend(pred)
        
        losses_train.append(loss.tolist())
    
    mlp.eval()
    for x, t in dataloader_valid:
        true = t.tolist()
        trues_valid.extend(true)

        # テンソルをGPUに移動
        
        
        
        # 順伝播
        

        # 誤差の計算
        
        
        # モデルの出力を予測値のスカラーに変換
        pred = y.argmax(1).tolist()
        preds_valid.extend(pred)
        
        losses_valid.append(loss.tolist())
        
    print('EPOCH: {}, Train [Loss: {:.3f}, F1: {:.3f}], Valid [Loss: {:.3f}, F1: {:.3f}]'.format(
        epoch,
        np.mean(losses_train),
        f1_score(trues_train, preds_train, average='macro'),
        np.mean(losses_valid),
        f1_score(trues_valid, preds_valid, average='macro')
    ))

NameError: ignored

In [0]:
class MLP(nn.Module):
    def __init__(self, in_dim, hid_dim, out_dim):
        super(MLP, self).__init__()
        self.linear1 = nn.Linear(in_dim, hid_dim)
        self.linear2 = nn.Linear(hid_dim, out_dim)
    
    def forward(self, x):
        x = F.relu(self.linear1(x))
        x = F.log_softmax(self.linear2(x), dim=-1)
        return x

In [0]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [0]:
mlp = MLP(in_dim, hid_dim, out_dim).to(device)

In [0]:
in_dim  = 784
hid_dim = 200
out_dim = 10
lr = 0.001
batch_size = 32
n_epochs = 10

mlp = MLP(in_dim, hid_dim, out_dim).to(device)

optimizer = optim.SGD(mlp.parameters(), lr=lr)

criterion = nn.NLLLoss()  # Negative Log Liklihood Loss

In [19]:
# 前処理を定義
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Lambda(lambda x: x.view(in_dim))
])

# torchvisionのdatasetsを使ってMNISTのデータを取得
# ミニバッチ化や前処理などの処理を行ってくれるDataLoaderを定義
dataloader_train = torch.utils.data.DataLoader(
    datasets.MNIST('./data/mnist', train=True, download=True, transform=transform),
    batch_size=batch_size,
    shuffle=True
)

dataloader_valid = torch.utils.data.DataLoader(
    datasets.MNIST('./data/mnist', train=False, download=True, transform=transform),
    batch_size=batch_size,
    shuffle=False
)

Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz to ./data/mnist/MNIST/raw/train-images-idx3-ubyte.gz


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))


Extracting ./data/mnist/MNIST/raw/train-images-idx3-ubyte.gz to ./data/mnist/MNIST/raw
Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz to ./data/mnist/MNIST/raw/train-labels-idx1-ubyte.gz


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))


Extracting ./data/mnist/MNIST/raw/train-labels-idx1-ubyte.gz to ./data/mnist/MNIST/raw
Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz to ./data/mnist/MNIST/raw/t10k-images-idx3-ubyte.gz


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))


Extracting ./data/mnist/MNIST/raw/t10k-images-idx3-ubyte.gz to ./data/mnist/MNIST/raw
Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz to ./data/mnist/MNIST/raw/t10k-labels-idx1-ubyte.gz


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))


Extracting ./data/mnist/MNIST/raw/t10k-labels-idx1-ubyte.gz to ./data/mnist/MNIST/raw
Processing...
Done!


In [22]:
for epoch in range(n_epochs):
    losses_train = []
    losses_valid = []
    preds_train = []
    preds_valid = []
    trues_train = []
    trues_valid = []
    
    mlp.train()
    for x, t in dataloader_train:
        true = t.tolist()
        trues_train.extend(true)

        # 勾配の初期化
        mlp.zero_grad()
        
        # テンソルをGPUに移動
        x = x.to(device)
        t = t.to(device)
        
        # 順伝播
        y = mlp.forward(x)
        
        # 誤差の計算
        loss = criterion(y, t)
        
        # 誤差の逆伝播
        loss.backward()
        
        # パラメータの更新
        optimizer.step()
        
        # モデルの出力を予測値のスカラーに変換
        pred = y.argmax(1).tolist()
        preds_train.extend(pred)
        
        losses_train.append(loss.tolist())
    
    mlp.eval()
    for x, t in dataloader_valid:
        true = t.tolist()
        trues_valid.extend(true)

        # テンソルをGPUに移動
        x = x.to(device)
        t = t.to(device)
        
        # 順伝播
        y = mlp.forward(x)

        # 誤差の計算
        loss = criterion(y, t)
        
        # モデルの出力を予測値のスカラーに変換
        pred = y.argmax(1).tolist()
        preds_valid.extend(pred)
        
        losses_valid.append(loss.tolist())
        
    print('EPOCH: {}, Train [Loss: {:.3f}, F1: {:.3f}], Valid [Loss: {:.3f}, F1: {:.3f}]'.format(
        epoch,
        np.mean(losses_train),
        f1_score(trues_train, preds_train, average='macro'),
        np.mean(losses_valid),
        f1_score(trues_valid, preds_valid, average='macro')
    ))

EPOCH: 0, Train [Loss: 2.138, F1: 0.468], Valid [Loss: 1.909, F1: 0.673]
EPOCH: 1, Train [Loss: 1.616, F1: 0.700], Valid [Loss: 1.302, F1: 0.749]
EPOCH: 2, Train [Loss: 1.098, F1: 0.782], Valid [Loss: 0.900, F1: 0.818]
EPOCH: 3, Train [Loss: 0.812, F1: 0.826], Valid [Loss: 0.703, F1: 0.844]
EPOCH: 4, Train [Loss: 0.666, F1: 0.847], Valid [Loss: 0.596, F1: 0.859]
EPOCH: 5, Train [Loss: 0.580, F1: 0.859], Valid [Loss: 0.529, F1: 0.871]
EPOCH: 6, Train [Loss: 0.525, F1: 0.868], Valid [Loss: 0.483, F1: 0.877]
EPOCH: 7, Train [Loss: 0.486, F1: 0.874], Valid [Loss: 0.451, F1: 0.883]
EPOCH: 8, Train [Loss: 0.457, F1: 0.879], Valid [Loss: 0.426, F1: 0.888]
EPOCH: 9, Train [Loss: 0.435, F1: 0.884], Valid [Loss: 0.407, F1: 0.890]
