In [0]:
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
import torch.autograd as autograd
import torch.nn.functional as F

# PyTorchの基本モジュール

In [0]:
a = torch.ones((2, 3))
a

tensor([[1., 1., 1.],
        [1., 1., 1.]])

In [0]:
a = torch.zeros((2, 3))
a

tensor([[0., 0., 0.],
        [0., 0., 0.]])

In [0]:
a = torch.full((2, 3), fill_value=99)
a

tensor([[99., 99., 99.],
        [99., 99., 99.]])

In [0]:
a = torch.eye(3)
a

tensor([[1., 0., 0.],
        [0., 1., 0.],
        [0., 0., 1.]])

In [0]:
torch.manual_seed(1234)

torch.randn(10)

tensor([ 0.0461,  0.4024, -1.0115,  0.2167, -0.6123,  0.5036,  0.2310,  0.6931,
        -0.2669,  2.1785])

In [0]:
torch.rand(10)

tensor([0.6012, 0.3043, 0.2548, 0.6294, 0.9665, 0.7399, 0.4517, 0.4757, 0.7842,
        0.1525])

In [0]:
a = torch.ones((2, 3), dtype=torch.float)
a.dtype

torch.float32

In [0]:
a = torch.ones((2, 3), dtype=torch.int)
a.dtype

torch.int32

In [0]:
a = torch.ones((2, 3), dtype=torch.long)
a.dtype

torch.int64

In [0]:
a.shape

torch.Size([2, 3])

In [0]:
a = torch.arange(6)
a

tensor([0, 1, 2, 3, 4, 5])

In [0]:
b = a.reshape(2, 3)
b

tensor([[0, 1, 2],
        [3, 4, 5]])

In [0]:
b = a.view(1, 2, 3, 1)
b

tensor([[[[0],
          [1],
          [2]],

         [[3],
          [4],
          [5]]]])

In [5]:
x = torch.randn(10, 3, 2)
x.shape

torch.Size([10, 3, 2])

In [6]:
x.view(30, 2).shape

tensor([[[-1.2173e-03,  2.6448e+00],
         [ 5.7298e-01,  9.8713e-01],
         [-4.6832e-01, -1.0484e+00]],

        [[-1.4515e+00, -1.2559e+00],
         [-1.0035e+00, -1.0795e+00],
         [-6.5369e-01, -8.4635e-01]],

        [[ 2.4921e-01,  2.2780e-01],
         [-1.5847e+00, -1.1936e+00],
         [ 1.3924e+00,  6.5906e-01]],

        [[ 5.0643e-01,  1.2524e+00],
         [-2.1019e+00,  8.2746e-01],
         [ 2.6221e-01, -1.0971e+00]],

        [[-1.2345e+00,  1.1696e+00],
         [-1.5736e+00, -5.6873e-01],
         [ 1.5185e-01,  2.9079e-01]],

        [[ 3.8827e-02, -4.4339e-01],
         [ 2.0334e+00,  3.4284e-01],
         [-1.7585e-01,  2.5590e-01]],

        [[-9.1082e-01,  2.0043e+00],
         [-1.5310e+00,  3.6885e-01],
         [ 1.2198e+00,  1.1759e+00]],

        [[-1.5012e+00, -1.3133e-01],
         [ 3.3896e-01,  4.9207e-01],
         [ 1.5713e+00, -1.7120e+00]],

        [[ 7.4851e-01,  3.7146e-01],
         [ 1.7962e-01, -6.3238e-01],
         [-7.6390e-01,

In [0]:
x.view(3, -1).shape

torch.Size([3, 20])

In [0]:
b = a.unsqueeze(dim=1)
b

tensor([[0],
        [1],
        [2],
        [3],
        [4],
        [5]])

In [0]:
c = b.squeeze()

In [0]:
c = b[:, None]
c

tensor([[[0]],

        [[1]],

        [[2]],

        [[3]],

        [[4]],

        [[5]]])

In [0]:
a = torch.arange(6).reshape(1, 2, 3)
a.shape

torch.Size([1, 2, 3])

In [0]:
b = a.transpose(0, 2)
b.shape

torch.Size([3, 2, 1])

In [0]:
b = a.permute(0, 2, 1)
b.shape

torch.Size([1, 3, 2])

In [14]:
# numpy、list、scalarへの変換
# 変換前
a = torch.arange(6).reshape(1, 2, 3)
a.shape

torch.Size([1, 2, 3])

In [20]:
# numpy.ndarrayへの変換
a.numpy()

array([[0, 1, 2],
       [3, 4, 5]])

In [16]:
# listへの変換
a.tolist()

[[[0, 1, 2], [3, 4, 5]]]

In [17]:
# scalarへの変換
a.sum().item()

15

**Q１. Tensor型の2行3列の行列を作成して下さい。1行目の値は、[0,1,2]、2行目の値は[3,4,5] とする。**

In [23]:
a = torch.arange(6).reshape(2, 3)
a

tensor([[0, 1, 2],
        [3, 4, 5]])

**Q２. Tensor型のデータをNumpy型に変換して下さい。**

In [21]:
a.numpy()

array([[0, 1, 2],
       [3, 4, 5]])

**Q３. Tensor型のデータをlist型に変換して下さい。**

In [24]:
a.tolist()

[[0, 1, 2], [3, 4, 5]]

**Q４. Tensor型のデータをscalar型に変換して下さい。**

In [25]:
a.sum().item()

15

# 演算

In [61]:
a = torch.rand((2, 3))
b = torch.rand((2, 3))
print(a)
print(b)

tensor([[0.5101, 0.8205, 0.1668],
        [0.1752, 0.8641, 0.7018]])
tensor([[0.8925, 0.2991, 0.4761],
        [0.7214, 0.2544, 0.7883]])


In [62]:
a + b

tensor([[1.4026, 1.1196, 0.6429],
        [0.8967, 1.1185, 1.4901]])

In [63]:
a - b

tensor([[-0.3825,  0.5213, -0.3094],
        [-0.5462,  0.6097, -0.0865]])

In [64]:
a * b

tensor([[0.4553, 0.2454, 0.0794],
        [0.1264, 0.2198, 0.5532]])

In [65]:
a / b

tensor([[0.5715, 2.7427, 0.3502],
        [0.2429, 3.3964, 0.8902]])

In [66]:
# log
torch.log(a)

tensor([[-0.6732, -0.1979, -1.7912],
        [-1.7417, -0.1461, -0.3542]])

In [67]:
# exp
torch.exp(a)

tensor([[1.6654, 2.2716, 1.1815],
        [1.1915, 2.3729, 2.0173]])

In [69]:
# ルート
torch.sqrt(a)

tensor([[0.7142, 0.9058, 0.4084],
        [0.4186, 0.9296, 0.8377]])

### 集約演算

In [70]:
a = torch.arange(10, dtype=torch.float32).reshape(2, 5)
print(a)

tensor([[0., 1., 2., 3., 4.],
        [5., 6., 7., 8., 9.]])


In [78]:
# 合計(dim0)
torch.sum(a, dim=0)  # a.sum(0)でも可

tensor([ 5.,  7.,  9., 11., 13.])

In [80]:
# 平均(dim1)
torch.mean(a, dim=1)

tensor([2., 7.])

In [81]:
# 分散(全体)
torch.var(a)    # dimを指定しない場合は全体に対して適用

tensor(9.1667)

In [82]:
# 標準偏差(dim -1, 最後の次元)
torch.std(a, dim=-1)   # dim=-1とすると最後の次元に対して適用

tensor([1.5811, 1.5811])

In [83]:
# 最大値、argmax(dim0)
torch.max(a, dim=0)  # torch.maxは, maxとargmaxの両方を返す(torch.minも同様)

torch.return_types.max(values=tensor([5., 6., 7., 8., 9.]), indices=tensor([1, 1, 1, 1, 1]))

In [84]:
# 最大値
torch.max(a)  # dimを指定しない場合はmax(or min)のみ返す

tensor(9.)

### 行列・テンソル積

In [58]:
a = torch.ones(4)
b = torch.ones(4)

c = torch.dot(a, b)
print(a)
print(c)

tensor([1., 1., 1., 1.])
tensor(4.)


In [60]:
a = torch.ones((2, 3))
b = torch.ones((3, 4))

c = torch.matmul(a, b)
print(a)
print(b)
print(c)

tensor([[1., 1., 1.],
        [1., 1., 1.]])
tensor([[1., 1., 1., 1.],
        [1., 1., 1., 1.],
        [1., 1., 1., 1.]])
tensor([[3., 3., 3., 3.],
        [3., 3., 3., 3.]])


### 行列のノルム

In [85]:
a = torch.arange(3, dtype=torch.float)
print(a)
print(torch.norm(a, p=2))

tensor([0., 1., 2.])
tensor(2.2361)


**Ｑ１．次の行列A,Bの内積A・Ｂを Tensorを用いて計算してください。**

In [87]:
a = torch.arange(1, 5).reshape(2, 2)
b = torch.arange(5, 9).reshape(2, 2)

c = torch.matmul(a, b)
c

tensor([[19, 22],
        [43, 50]])

Ｑ２．次のy , tに関する二乗和誤差をTensor型を用いて求めて下さい。

y = [0.1, 0.05, 0.6, 0.0, 0.05, 0.1, 0.0, 0.1, 0.0, 0.0]

t = [0, 0, 1, 0, 0, 0, 0, 0, 0, 0 ]

In [0]:
y = [0.1, 0.05, 0.6, 0.0, 0.05, 0.1, 0.0, 0.1, 0.0, 0.0]
t = [0, 0, 1, 0, 0, 0, 0, 0, 0, 0 ]

In [0]:
y = torch.Tensor(y)
t = torch.Tensor(t)

In [12]:
loss = 0.5 * torch.sum((y-t)**2)
loss

tensor(0.0975)

# CPU／GPUの切替

In [2]:
a = torch.ones(1)
a

tensor([1.])

In [3]:
b = a.cuda()
b

tensor([1.], device='cuda:0')

In [4]:
b = a.to('cuda')
b

tensor([1.], device='cuda:0')

In [0]:
c = b.cpu()
c

tensor([1.])

In [0]:
c = b.to('cpu')
c

tensor([1.])

In [0]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)
print(torch.ones(1, device=device))

cuda
tensor([1.], device='cuda:0')


**Q１. 以下のTensor型で宣言された変数aについて、GPUデバイスで操作できるように、定義した変数bを作成してください。**

　a = torch.ones((3, 3))

In [31]:
a = torch.ones((3, 3))
a

tensor([[1., 1., 1.],
        [1., 1., 1.],
        [1., 1., 1.]])

In [32]:
b = a.cuda()
b

tensor([[1., 1., 1.],
        [1., 1., 1.],
        [1., 1., 1.]], device='cuda:0')

**Q２. GPUデバイスで操作できるように定義した変数bにについて、CPUデバイスで操作できるように定義した変数cを作成してください。**

In [33]:
c = b.cpu()
c

tensor([[1., 1., 1.],
        [1., 1., 1.],
        [1., 1., 1.]])

**Q３. 新規に定義する変数dについて、GPUデバイスでもCPUデバイスでも操作できるようにtorch.cuda.is_available()を用いて定義して下さい。**

In [35]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
d = torch.ones(1, device=device)
d

tensor([1.], device='cuda:0')

# Autograd

In [0]:
# 順伝播の計算
x = torch.randn(4, 4)
y = torch.randn(4, 1)

w = torch.randn(4, 1, requires_grad=True)
b = torch.randn(1, requires_grad=True)

y_pred = torch.matmul(x, w) + b

In [0]:
# 目的関数の定義
loss = (y_pred - y).pow(2).sum()

In [6]:
# ユーザが作成したTensorはgrad_fn=None
print(x.grad_fn)
print(y.grad_fn)
print(w.grad_fn)
print(b.grad_fn)
print()

None
None
None
None



In [7]:
# Functionによって計算されたTensorはgrad_fnを有する
print(y_pred.grad_fn)

<AddBackward0 object at 0x7f9aaa82d0f0>


In [8]:
# まだ勾配は計算されていない
print(x.grad)
print(y.grad)
print(w.grad)
print(b.grad)

None
None
None
None


In [0]:
# 逆伝播
loss.backward()

In [10]:
# requires_grad=Trueを指定した変数は勾配が計算されている
print(x.grad)
print(y.grad)
print(w.grad)
print(b.grad)

None
None
tensor([[ 0.0584],
        [ 0.0726],
        [ 0.8812],
        [-1.2759]])
tensor([6.1586])


In [12]:
# .detach()を使うことにより、Tensorの勾配計算を行わないようにすることも可能
x = torch.randn(4, 4)
y = torch.randn(4, 1)

w = torch.randn(4, 1, requires_grad=True)
b = torch.randn(1, requires_grad=True)
b = b.detach()  # bの勾配計算を停止

y_pred = torch.matmul(x, w) + b

loss = (y_pred - y).pow(2).sum()

loss.backward()

print(w.grad)  # 勾配を有する
print(b.grad)  # 勾配を有さない

tensor([[  4.7884],
        [ -5.4623],
        [ -9.8271],
        [-33.9275]])
None


In [13]:
# with torch.no_grad():でくくることで、その下で定義したTensorの勾配計算をまとめて停止させることが可能
with torch.no_grad():
    y_eval = torch.matmul(x, w) + b  # y_predと同様の計算を行う

print('requires_grad of y_pred:', y_pred.requires_grad)  # requires_grad=True
print('requires_grad of y_eval:', y_eval.requires_grad)  # requires_grad=False

requires_grad of y_pred: True
requires_grad of y_eval: False


# PyTorchのネットワーク構築

In [36]:
# 線形層
nn.Linear(input_dim, output_dim)

NameError: ignored

In [0]:
# 畳み込み層
nn.Conv1d(input_dim, output_dim, kernel_size)

In [37]:
# LSTM
nn.LSTM(input_dim, hidden_dim, num_layers)

NameError: ignored

## 活性化関数

In [0]:
import torch.nn.functional as F
torch.manual_seed(34)
x = torch.randn((2, 3))

In [39]:
# sigmoid
print(torch.sigmoid(x)) 

tensor([[0.8056, 0.4424, 0.5924],
        [0.3851, 0.3266, 0.5604]])


In [40]:
# relu()
print(F.relu(x))

tensor([[1.4219, 0.0000, 0.3739],
        [0.0000, 0.0000, 0.2429]])


In [41]:
# tanh()
print(torch.tanh(x))

tensor([[ 0.8900, -0.2274,  0.3574],
        [-0.4365, -0.6191,  0.2382]])


In [42]:
# Leakey_relu()
print(F.leaky_relu(x, 0.2))

tensor([[ 1.4219, -0.0463,  0.3739],
        [-0.0936, -0.1447,  0.2429]])


In [44]:
# softplus()
print(F.softplus(x))

tensor([[1.6380, 0.5841, 0.8975],
        [0.4863, 0.3954, 0.8219]])


In [45]:
# softmax()
print(F.softmax(x, dim=-1))

tensor([[0.6485, 0.1241, 0.2274],
        [0.2625, 0.2033, 0.5343]])


## 誤差関数

In [46]:
# L1誤差
nn.L1Loss()

L1Loss()

In [47]:
# 平均二乗誤差
nn.MSELoss()

MSELoss()

In [48]:
# 交差エントロピー誤差
nn.CrossEntropyLoss()

CrossEntropyLoss()

## モジュール化

In [0]:
class MLP(nn.Module):  # nn.Moduleを継承する
    def __init__(self, in_dim, hid_dim, out_dim):  # __init__をoverride
        super(MLP, self).__init__()
        self.linear1 = nn.Linear(in_dim, hid_dim)
        self.linear2 = nn.Linear(hid_dim, out_dim)
    
    def forward(self, x):  # forwardをoverride
        x = F.relu(self.linear1(x))
        x = torch.sigmoid(self.linear2(x))
        return x

In [50]:
mlp = MLP(2, 3, 1)
print(mlp)
print()

x = torch.Tensor([[0, 0], [0, 1], [1, 0], [1, 1]])
y = mlp(x) # forward(x)が呼ばれる
print("# feedforward：")
print(y)
print()

print("# mlp.parameters()でモデルのパラメータ取得：")
print(mlp.parameters())

MLP(
  (linear1): Linear(in_features=2, out_features=3, bias=True)
  (linear2): Linear(in_features=3, out_features=1, bias=True)
)

# feedforward：
tensor([[0.5169],
        [0.5112],
        [0.4913],
        [0.4899]], grad_fn=<SigmoidBackward>)

# mlp.parameters()でモデルのパラメータ取得：
<generator object Module.parameters at 0x7f9a5813f8e0>


## 最適化

In [51]:
# optimizerの定義
optimizer = optim.SGD([W1, W2], lr=0.1)
 
# 勾配のリセット
optimizer.zero_grad()
 
# パラメータの更新
optimizer.step()

NameError: ignored

# 学習

In [52]:
# XORをMLPで行う
x = torch.Tensor([[0, 0], [0, 1], [1, 0], [1, 1]])
t = torch.Tensor([0, 1, 1, 0])

# モデルの定義
mlp = MLP(2, 3, 1)

# 誤差関数の定義
criterion = nn.BCELoss()  # Binary Cross Entropy Loss

# 最適化の定義
optimizer = optim.SGD(mlp.parameters(), lr=0.1)  # Moduleのパラメータは.parameters()で取得できる

# モデルを訓練モードにする（Dropout等に関係）
mlp.train()

MLP(
  (linear1): Linear(in_features=2, out_features=3, bias=True)
  (linear2): Linear(in_features=3, out_features=1, bias=True)
)

In [53]:
for i in range(1000):
    # 順伝播
    y_pred = mlp(x)

    # 誤差の計算
    loss = criterion(y_pred, t.unsqueeze(1))
    
    # 逆伝播
    optimizer.zero_grad()
    loss.backward()
    
    # パラメータの更新
    optimizer.step()

    if i % 100 == 0:
        print(i, loss.item())

0 0.7523696422576904
100 0.6718162894248962
200 0.6125582456588745
300 0.5464031100273132
400 0.5096918344497681
500 0.4900974929332733
600 0.45896485447883606
700 0.3662274479866028
800 0.2544015645980835
900 0.1758209466934204


## モデルの保存・読み込み・再学習

In [54]:
print(list(mlp.parameters()))
print()

# state_dictの取得
state_dict = mlp.state_dict()
print(state_dict)

# モデルの保存
torch.save(state_dict, './model.pth')

[Parameter containing:
tensor([[-0.5302,  0.3653],
        [ 1.5943,  1.6213],
        [ 2.3035,  2.3063]], requires_grad=True), Parameter containing:
tensor([-3.6619e-01, -1.3849e-03, -2.2977e+00], requires_grad=True), Parameter containing:
tensor([[-0.2788,  2.2241, -3.8807]], requires_grad=True), Parameter containing:
tensor([-1.1215], requires_grad=True)]

OrderedDict([('linear1.weight', tensor([[-0.5302,  0.3653],
        [ 1.5943,  1.6213],
        [ 2.3035,  2.3063]])), ('linear1.bias', tensor([-3.6619e-01, -1.3849e-03, -2.2977e+00])), ('linear2.weight', tensor([[-0.2788,  2.2241, -3.8807]])), ('linear2.bias', tensor([-1.1215]))])


In [55]:
# モデルの定義
mlp2 = MLP(2, 3, 1)
print(list(mlp2.parameters()))  # ランダムな初期値
print()

# 学習済みパラメータの読み込み
state_dict = torch.load('./model.pth')
mlp2.load_state_dict(state_dict)
print(list(mlp2.parameters()))  # 学習済みパラメータ

[Parameter containing:
tensor([[-0.1315, -0.1273],
        [ 0.6624, -0.0621],
        [ 0.6638,  0.0511]], requires_grad=True), Parameter containing:
tensor([-0.0189, -0.1175,  0.2932], requires_grad=True), Parameter containing:
tensor([[-0.2391,  0.5006, -0.0370]], requires_grad=True), Parameter containing:
tensor([0.3289], requires_grad=True)]

[Parameter containing:
tensor([[-0.5302,  0.3653],
        [ 1.5943,  1.6213],
        [ 2.3035,  2.3063]], requires_grad=True), Parameter containing:
tensor([-3.6619e-01, -1.3849e-03, -2.2977e+00], requires_grad=True), Parameter containing:
tensor([[-0.2788,  2.2241, -3.8807]], requires_grad=True), Parameter containing:
tensor([-1.1215], requires_grad=True)]


# PyTorchを用いたMLPの実装

In [56]:
#  torchvision & DataLoader
from torchvision import transforms, datasets
 
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Lambda(lambda x: x.view(in_dim))
])
 
dataloader = torch.utils.data.DataLoader(
    datasets.MNIST('~/data/mnist', train=True, download=True, transform=transform),
    batch_size=batch_size,
    shuffle=True
)

  0%|          | 0/9912422 [00:00<?, ?it/s]

Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz to /root/data/mnist/MNIST/raw/train-images-idx3-ubyte.gz


9920512it [00:00, 28213255.78it/s]                            


Extracting /root/data/mnist/MNIST/raw/train-images-idx3-ubyte.gz to /root/data/mnist/MNIST/raw


32768it [00:00, 466334.22it/s]
  1%|          | 16384/1648877 [00:00<00:10, 150656.67it/s]

Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz to /root/data/mnist/MNIST/raw/train-labels-idx1-ubyte.gz
Extracting /root/data/mnist/MNIST/raw/train-labels-idx1-ubyte.gz to /root/data/mnist/MNIST/raw
Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz to /root/data/mnist/MNIST/raw/t10k-images-idx3-ubyte.gz


1654784it [00:00, 8021018.07it/s]                          
8192it [00:00, 191918.46it/s]


Extracting /root/data/mnist/MNIST/raw/t10k-images-idx3-ubyte.gz to /root/data/mnist/MNIST/raw
Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz to /root/data/mnist/MNIST/raw/t10k-labels-idx1-ubyte.gz
Extracting /root/data/mnist/MNIST/raw/t10k-labels-idx1-ubyte.gz to /root/data/mnist/MNIST/raw
Processing...
Done!


NameError: ignored

これまでのコードを参考・活用し、MNISTを実装するコードを作成して下さい。

In [0]:
from torchvision import datasets, transforms, models
from sklearn.metrics import f1_score

In [0]:
# 参考
in_dim  = 784
hid_dim = 200
out_dim = 10
lr = 0.001
batch_size = 32
n_epochs = 10

In [0]:
# 参考
for epoch in range(n_epochs):
    losses_train = []
    losses_valid = []
    preds_train = []
    preds_valid = []
    trues_train = []
    trues_valid = []
    
    mlp.train()
    for x, t in dataloader_train:
        true = t.tolist()
        trues_train.extend(true)

        # 勾配の初期化
        
        
        # テンソルをGPUに移動
        
        
        
        # 順伝播
        
        
        # 誤差の計算
        
        
        # 誤差の逆伝播
        
        
        # パラメータの更新
        
        
        # モデルの出力を予測値のスカラーに変換
        pred = y.argmax(1).tolist()
        preds_train.extend(pred)
        
        losses_train.append(loss.tolist())
    
    mlp.eval()
    for x, t in dataloader_valid:
        true = t.tolist()
        trues_valid.extend(true)

        # テンソルをGPUに移動
        
        
        
        # 順伝播
        

        # 誤差の計算
        
        
        # モデルの出力を予測値のスカラーに変換
        pred = y.argmax(1).tolist()
        preds_valid.extend(pred)
        
        losses_valid.append(loss.tolist())
        
    print('EPOCH: {}, Train [Loss: {:.3f}, F1: {:.3f}], Valid [Loss: {:.3f}, F1: {:.3f}]'.format(
        epoch,
        np.mean(losses_train),
        f1_score(trues_train, preds_train, average='macro'),
        np.mean(losses_valid),
        f1_score(trues_valid, preds_valid, average='macro')
    ))

In [0]:
class MLP(nn.Module):
    def __init__(self, in_dim, hid_dim, out_dim):
        super(MLP, self).__init__()
        self.linear1 = nn.Linear(in_dim, hid_dim)
        self.linear2 = nn.Linear(hid_dim, out_dim)
    
    def forward(self, x):
        x = F.relu(self.linear1(x))
        x = F.log_softmax(self.linear2(x), dim=-1)
        return x

In [0]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [0]:
in_dim  = 784
hid_dim = 200
out_dim = 10
lr = 0.001
batch_size = 32
n_epochs = 10

mlp = MLP(in_dim, hid_dim, out_dim).to(device)

optimizer = optim.SGD(mlp.parameters(), lr=lr)

criterion = nn.NLLLoss()  # Negative Log Liklihood Loss

In [0]:
# 前処理を定義
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Lambda(lambda x: x.view(in_dim))
])

# torchvisionのdatasetsを使ってMNISTのデータを取得
# ミニバッチ化や前処理などの処理を行ってくれるDataLoaderを定義
dataloader_train = torch.utils.data.DataLoader(
    datasets.MNIST('./data/mnist', train=True, download=True, transform=transform),
    batch_size=batch_size,
    shuffle=True
)

dataloader_valid = torch.utils.data.DataLoader(
    datasets.MNIST('./data/mnist', train=False, download=True, transform=transform),
    batch_size=batch_size,
    shuffle=False
)

0it [00:00, ?it/s]

Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz to ./data/mnist/MNIST/raw/train-images-idx3-ubyte.gz


9920512it [00:01, 8642707.92it/s]                            


Extracting ./data/mnist/MNIST/raw/train-images-idx3-ubyte.gz to ./data/mnist/MNIST/raw


  0%|          | 0/28881 [00:00<?, ?it/s]

Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz to ./data/mnist/MNIST/raw/train-labels-idx1-ubyte.gz


32768it [00:00, 130351.61it/s]           
  0%|          | 0/1648877 [00:00<?, ?it/s]

Extracting ./data/mnist/MNIST/raw/train-labels-idx1-ubyte.gz to ./data/mnist/MNIST/raw
Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz to ./data/mnist/MNIST/raw/t10k-images-idx3-ubyte.gz


1654784it [00:00, 2140051.74it/s]                            
0it [00:00, ?it/s]

Extracting ./data/mnist/MNIST/raw/t10k-images-idx3-ubyte.gz to ./data/mnist/MNIST/raw
Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz to ./data/mnist/MNIST/raw/t10k-labels-idx1-ubyte.gz


8192it [00:00, 50004.71it/s]            


Extracting ./data/mnist/MNIST/raw/t10k-labels-idx1-ubyte.gz to ./data/mnist/MNIST/raw
Processing...
Done!


In [0]:
for epoch in range(n_epochs):
    losses_train = []
    losses_valid = []
    preds_train = []
    preds_valid = []
    trues_train = []
    trues_valid = []
    
    mlp.train()
    for x, t in dataloader_train:
        true = t.tolist()
        trues_train.extend(true)

        # 勾配の初期化
        mlp.zero_grad()
        
        # テンソルをGPUに移動
        x = x.to(device)
        t = t.to(device)
        
        # 順伝播
        y = mlp.forward(x)
        
        # 誤差の計算
        loss = criterion(y, t)
        
        # 誤差の逆伝播
        loss.backward()
        
        # パラメータの更新
        optimizer.step()
        
        # モデルの出力を予測値のスカラーに変換
        pred = y.argmax(1).tolist()
        preds_train.extend(pred)
        
        losses_train.append(loss.tolist())
    
    mlp.eval()
    for x, t in dataloader_valid:
        true = t.tolist()
        trues_valid.extend(true)

        # テンソルをGPUに移動
        x = x.to(device)
        t = t.to(device)
        
        # 順伝播
        y = mlp.forward(x)

        # 誤差の計算
        loss = criterion(y, t)
        
        # モデルの出力を予測値のスカラーに変換
        pred = y.argmax(1).tolist()
        preds_valid.extend(pred)
        
        losses_valid.append(loss.tolist())
        
    print('EPOCH: {}, Train [Loss: {:.3f}, F1: {:.3f}], Valid [Loss: {:.3f}, F1: {:.3f}]'.format(
        epoch,
        np.mean(losses_train),
        f1_score(trues_train, preds_train, average='macro'),
        np.mean(losses_valid),
        f1_score(trues_valid, preds_valid, average='macro')
    ))

EPOCH: 0, Train [Loss: 2.104, F1: 0.474], Valid [Loss: 1.848, F1: 0.671]
EPOCH: 1, Train [Loss: 1.557, F1: 0.725], Valid [Loss: 1.244, F1: 0.779]
EPOCH: 2, Train [Loss: 1.055, F1: 0.794], Valid [Loss: 0.866, F1: 0.825]
EPOCH: 3, Train [Loss: 0.789, F1: 0.829], Valid [Loss: 0.683, F1: 0.849]
EPOCH: 4, Train [Loss: 0.651, F1: 0.848], Valid [Loss: 0.582, F1: 0.863]
EPOCH: 5, Train [Loss: 0.571, F1: 0.860], Valid [Loss: 0.518, F1: 0.874]
EPOCH: 6, Train [Loss: 0.518, F1: 0.869], Valid [Loss: 0.474, F1: 0.882]
EPOCH: 7, Train [Loss: 0.480, F1: 0.876], Valid [Loss: 0.443, F1: 0.887]
EPOCH: 8, Train [Loss: 0.452, F1: 0.881], Valid [Loss: 0.419, F1: 0.891]
EPOCH: 9, Train [Loss: 0.431, F1: 0.884], Valid [Loss: 0.400, F1: 0.892]
