In [1]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot  as plt
from sklearn.metrics import accuracy_score
import torch
import torch.nn as nn
import torch.optim as optimizers
from torch.utils.data import DataLoader
from torchvision import datasets
from torch.utils.data import random_split
from callbacks import EarlyStopping
import torchvision.transforms as transforms

np.random.seed(123)
torch.manual_seed(123)

<torch._C.Generator at 0x1ebc8feced0>

In [None]:
# データの準備
path = "D:Statistics/data/deep_leraning/"
root = os.path.join(path, "torch", "mnist")
transform = transforms.Compose([transforms.ToTensor(), lambda x: x.view(-1)])
mnist_train = datasets.MNIST(root=root, download=True, train=True, transform=transform)
mnist_test = datasets.MNIST(root=root, download=True, train=False, transform=transform)

n_samples = len(mnist_train)
n_train = int(n_samples * 0.8)
n_val = n_samples - n_train
mnist_train, mnist_val = random_split(mnist_train, {n_train, n_val})

train_dataloader = DataLoader(mnist_train, batch_size=100, shuffle=True)
val_dataloader = DataLoader(mnist_val, batch_size=100, shuffle=True)
test_dataloader = DataLoader(mnist_test, batch_size=100, shuffle=False)

In [None]:
# DNNを定義
class DNN(nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim, rate):
        super().__init__()
        self.l1 = nn.Linear(input_dim, hidden_dim)
        self.b1 = nn.BatchNorm1d(hidden_dim)
        self.a1 = nn.ReLU()
        self.d1 = nn.Dropout(rate)
        self.l2 = nn.Linear(hidden_dim, hidden_dim)
        self.b2 = nn.BatchNorm1d(hidden_dim)
        self.a2 = nn.ReLU()
        self.d2 = nn.Dropout(rate)
        self.l3 = nn.Linear(hidden_dim, hidden_dim)
        self.b3 = nn.BatchNorm1d(hidden_dim)
        self.a3 = nn.ReLU()
        self.d3 = nn.Dropout(rate)
        self.l4 = nn.Linear(hidden_dim, output_dim)
        self.layers = [self.l1, self.b1, self.a1, self.d1, self.l2, self.b2, self.a2, self.d2, self.l3, self.b3, self.a3, self.d3, self.l4]

    def forward(self, x):
        for layer in self.layers:
            x = layer(x)
        return x

In [None]:
# モデルの学習
# アルゴリズムの定義
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
classes = np.unique(train_dataloader.dataset.dataset.classes).shape[0]
m = np.prod((train_dataloader.dataset.dataset.data).shape[1:])
model = DNN(m, 200, classes, rate).to(device)
criterion = nn.CrossEntropyLoss()

# optimizerを定義
optimizer_sgd = optimizers.SGD(model.parameters(), lr=0.01)   # 確率的勾配法
optimizer_momemtum = optimizers.SGD(model.parameters(), lr=0.01, momentum=0.9)   #モメンタム法
optimizer_nesterov = optimizers.SGD(model.parameters(), lr=0.01, momentum=0.9, nesterov=True)   #N esterovモメンタム法
optimizer_adagrad = optimizers.Adagrad(model.parameters(), lr=0.01)   # Adagrad
optimizer_rms = optimizers.RMSprop(model.parameters(), lr=0.01)   # RMSproop
optimizer_adadelta = optimizers.Adadelta(model.parameters(), rho=0.95)   # Adadelta
optimizer_adam = optimizers.Adam(model.parameters(), lr=0.001, betas=(0.9, 0.999))   # Adam
optimizer_amsgrad = optimizers.Adam(model.parameters(), lr=0.001, betas=(0.9, 0.999), amsgrad=True)   # AMSgrad

def compute_loss(t, y):
    return criterion(y, t)

def train_step(x, t, optimizer):
    model.train()
    preds = model(x)
    loss = compute_loss(t, preds)
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()
    return loss, preds

def val_step(x, t):
    model.eval()
    preds = model(x)
    loss = criterion(preds, t)
    return loss, preds

# アルゴリズムの設定
rate = 0.5
epochs = 30
es = EarlyStopping(patience=5, verbose=1)
disp = 5

In [None]:
# Adamでモデルを学習
# optimizerを定義
model = DNN(m, 200, classes, rate).to(device)
optimizer_adam = optimizers.Adam(model.parameters(), lr=0.001, betas=(0.9, 0.999))   # Adam
hist_adam = {"train_loss": [], "train_accuracy": [], "val_loss" : [], "val_accuracy": []}

# パラメータを推定
for epoch in range(epochs):
    train_loss = 0.0
    train_acc = 0.0
    val_loss = 0.0
    val_acc = 0.0

    for (x, t) in train_dataloader:
        x, t = x.to(device), t.to(device)
        loss, preds = train_step(x, t, optimizer_adam)
        train_loss += loss.item()
        train_acc += accuracy_score(t.tolist(), preds.argmax(dim=-1).tolist())

    train_loss /= len(train_dataloader)
    train_acc /= len(train_dataloader)

    hist_adam["train_loss"].append(train_loss)
    hist_adam["train_accuracy"].append(train_acc)
    
    for (x, t) in val_dataloader:
        x, t = x.to(device), t.to(device)
        loss, preds = val_step(x, t)
        val_loss += loss.item()
        val_acc += accuracy_score(t.tolist(), preds.argmax(dim=-1).tolist())
        
    val_loss /= len(val_dataloader)
    val_acc /= len(val_dataloader)
    
    hist_adam["val_loss"].append(val_loss)
    hist_adam["val_accuracy"].append(val_acc)
    
    if epoch%disp==0:
        print("epoch: {}, loss: {:.3}, acc: {:.3f}, val_loss: {:.3}, val_acc: {:.3f}".format(
            epoch, train_loss, train_acc, val_loss, val_acc))