# fashion_mnist데이터로 MLP구현 \<Torch>
- nn.NLLLoss() 와 nn.CrossEntropyLoss()의 차이
- torch.nn.CrossEntropyLoss는 softmax + NLLLoss이다.
    - 즉 , 모델의 출력층에 softmax()가 없는 모델에 사용해야한다.
- 만약 지금처럼 model의 출력층에 LogSoftmax()가 있다면 NLLLoss()를 사용해야한다.

In [90]:
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from PIL import Image
from tensorflow import keras
from torch.utils.data import DataLoader, TensorDataset
%matplotlib inline


# 데이터셋 적재
fashion_mnist = keras.datasets.fashion_mnist
(X_train_full , y_train_full) , (X_test , y_test) = fashion_mnist.load_data()

# 정규화 값을 0 ~ 1사이로 맞추기
X_valid , X_train = X_train_full[:5000]/255.0 , X_train_full[5000:]/255.0
y_valid , y_train = y_train_full[:5000] , y_train_full[5000:]
X_test = X_test/255.0

X_valid , X_train = torch.tensor(X_valid,dtype=torch.float32) , torch.tensor(X_train,dtype=torch.float32)
y_valid , y_train = torch.tensor(y_valid) , torch.tensor(y_train)
X_test , y_test = torch.tensor(X_test,dtype=torch.float32) , torch.tensor(y_test)

# 클래스 이름 리스트 정의
class_names = ["T-shirt","Trouser","Pullover","Dress","Coat","Sandal","Shirt","Sneaker","Bag","Ankle Boot"]

두개의 은닉층으로 이루어진 MLP
    - 텐서플로의 Dense는 토치의 Linear와 동일
        - 단 Dense의 use_bias=True (default임)가 옵션이지만 Linear은 bias가 포함되어있음

In [98]:
'''
이 방법은 계산 그래프를 생성하지 않고 순전파에서 모든 계산을 수행함
따라서 파이토치의 자동미분 backward()를 수행할 때 참고할 계산 그래프가 없으므로 backward()를 사용할 수 없다.
따라서 이방법은 사용하지 않는다!
즉 , forward()에서 필요한 layer들은 미리 __init__에 선언해놓자!!!!!
class Mlp_Net(nn.module):
    def __init__(self) -> None:
        super(Mlp_Net,self).__init__()

    def forward(self,x):
        x = nn.ReLU(nn.Linear(28*28,300)(x.view(-1,28*28)))
        x = nn.ReLU(nn.Linear(300,100)(x))
        x = nn.Softmax(x,dim=1)
        return x
'''
class Mlp_Net(nn.Module):
    def __init__(self):
        super().__init__()
        self.flatten = nn.Flatten()
        self.layer1 = nn.Linear(28*28,300) # 입력 -> 출력 차원
        self.layer2 = nn.Linear(300,100)
        self.layer3 = nn.Linear(100,10)
        self.relu = nn.ReLU()
        self.softmax = nn.Softmax(dim=1)
    
    def forward(self,x):
        x = self.flatten(x)
        x = self.relu(self.layer1(x))
        x = self.relu(self.layer2(x))
        x = self.relu(self.layer3(x))
        x = self.softmax(x)
        return x


In [99]:
model = Mlp_Net()
print(model)

Mlp_Net(
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (layer1): Linear(in_features=784, out_features=300, bias=True)
  (layer2): Linear(in_features=300, out_features=100, bias=True)
  (layer3): Linear(in_features=100, out_features=10, bias=True)
  (relu): ReLU()
  (softmax): Softmax(dim=1)
)


In [100]:
for name , param in model.named_parameters():
    print(name , param.shape,param.dtype)

layer1.weight torch.Size([300, 784]) torch.float32
layer1.bias torch.Size([300]) torch.float32
layer2.weight torch.Size([100, 300]) torch.float32
layer2.bias torch.Size([100]) torch.float32
layer3.weight torch.Size([10, 100]) torch.float32
layer3.bias torch.Size([10]) torch.float32


In [101]:
# 파라미터에 직접 접근하기
# 그래프에 직접 접근은 지양해야하며 .data로 접근할 수 있다.
'''
print(model.layer1.weight.shape,model.layer2.bias.shape)
print(model.layer1.weight[0,0])
model.layer1.weight.data[0,0] = 0
print(model.layer1.weight[0,0])
'''

'\nprint(model.layer1.weight.shape,model.layer2.bias.shape)\nprint(model.layer1.weight[0,0])\nmodel.layer1.weight.data[0,0] = 0\nprint(model.layer1.weight[0,0])\n'

In [102]:
# 데이터로더
batch_size = 32
train_loader = DataLoader(TensorDataset(X_train, y_train), batch_size=batch_size, shuffle=True)
valid_loader = DataLoader(TensorDataset(X_valid, y_valid), batch_size=batch_size, shuffle=True)

In [105]:
# compile
loss_fn = nn.NLLLoss()
optimizer = optim.SGD(model.parameters(), lr=0.01)
def acc(x,y):
    cnt = 0
    for a,b in zip(x,y):
        if a==b:
            cnt += 1
    return cnt

def train(model, train_loader, optimizer, metric):
    model.train()
    for image, label in train_loader:
        output = model(image)
        optimizer.zero_grad()
        loss = loss_fn(output, label)
        loss.backward()
        optimizer.step()

def evaluate(model, test_loader):
    model.eval()
    test_loss = 0
    correct = 0

    with torch.no_grad(): # 파라미터 업데이트 방지
        for image, label in test_loader:
            image = image
            label = label
            output = model(image)
            test_loss += loss_fn(output, label).item()
            prediction = output.max(1, keepdim=True)[1]
            correct += prediction.eq(label.view_as(prediction)).sum().item()
    
    test_loss /= len(test_loader.dataset)
    test_accuracy = 100. * correct / len(test_loader.dataset)
    return test_loss, test_accuracy

In [107]:
# 학습
from tqdm import tqdm

epoch = 40
for Epoch in tqdm(range(1+epoch)):
    train(model, train_loader, optimizer, acc)
    test_loss, test_accuracy = evaluate(model, valid_loader)
    print(f'epoch : {epoch} val_loss : {test_loss} val_acc : {test_accuracy}')

  2%|▏         | 1/41 [00:02<01:49,  2.73s/it]

epoch : 40 val_loss : -0.003747050677239895 val_acc : 41.9


  5%|▍         | 2/41 [00:05<01:43,  2.65s/it]

epoch : 40 val_loss : -0.012005953964591027 val_acc : 47.12


  7%|▋         | 3/41 [00:07<01:40,  2.64s/it]

epoch : 40 val_loss : -0.014598639488220214 val_acc : 47.48


 10%|▉         | 4/41 [00:10<01:37,  2.63s/it]

epoch : 40 val_loss : -0.015745650386810302 val_acc : 53.94


 12%|█▏        | 5/41 [00:13<01:34,  2.63s/it]

epoch : 40 val_loss : -0.016642165166139603 val_acc : 55.34


 15%|█▍        | 6/41 [00:15<01:31,  2.62s/it]

epoch : 40 val_loss : -0.017077452802658082 val_acc : 56.06


 17%|█▋        | 7/41 [00:18<01:28,  2.62s/it]

epoch : 40 val_loss : -0.017341858434677122 val_acc : 56.48


 20%|█▉        | 8/41 [00:20<01:26,  2.61s/it]

epoch : 40 val_loss : -0.017465568751096724 val_acc : 56.66


 22%|██▏       | 9/41 [00:23<01:23,  2.61s/it]

epoch : 40 val_loss : -0.017554527819156646 val_acc : 56.86


 24%|██▍       | 10/41 [00:26<01:20,  2.61s/it]

epoch : 40 val_loss : -0.01765144989490509 val_acc : 56.8


 27%|██▋       | 11/41 [00:28<01:18,  2.61s/it]

epoch : 40 val_loss : -0.01766821229457855 val_acc : 56.98


 29%|██▉       | 12/41 [00:31<01:15,  2.62s/it]

epoch : 40 val_loss : -0.017724383062124253 val_acc : 57.06


 32%|███▏      | 13/41 [00:34<01:12,  2.60s/it]

epoch : 40 val_loss : -0.01776611030101776 val_acc : 57.08


 34%|███▍      | 14/41 [00:36<01:10,  2.63s/it]

epoch : 40 val_loss : -0.017812043064832687 val_acc : 57.14


 37%|███▋      | 15/41 [00:39<01:08,  2.62s/it]

epoch : 40 val_loss : -0.01779782167673111 val_acc : 57.16


 39%|███▉      | 16/41 [00:41<01:05,  2.63s/it]

epoch : 40 val_loss : -0.017866564816236497 val_acc : 57.06


 41%|████▏     | 17/41 [00:44<01:02,  2.62s/it]

epoch : 40 val_loss : -0.017849369859695433 val_acc : 57.26


 44%|████▍     | 18/41 [00:47<01:00,  2.63s/it]

epoch : 40 val_loss : -0.01787747374176979 val_acc : 57.34


 46%|████▋     | 19/41 [00:49<00:57,  2.62s/it]

epoch : 40 val_loss : -0.017893722343444825 val_acc : 57.34


 49%|████▉     | 20/41 [00:52<00:55,  2.63s/it]

epoch : 40 val_loss : -0.01797194255590439 val_acc : 57.38


 51%|█████     | 21/41 [00:55<00:52,  2.61s/it]

epoch : 40 val_loss : -0.01795368294119835 val_acc : 57.38


 54%|█████▎    | 22/41 [00:57<00:49,  2.63s/it]

epoch : 40 val_loss : -0.017927207136154175 val_acc : 57.38


 56%|█████▌    | 23/41 [01:00<00:47,  2.62s/it]

epoch : 40 val_loss : -0.017922727411985397 val_acc : 57.44


 59%|█████▊    | 24/41 [01:02<00:44,  2.62s/it]

epoch : 40 val_loss : -0.017957025736570358 val_acc : 57.4


 61%|██████    | 25/41 [01:05<00:41,  2.62s/it]

epoch : 40 val_loss : -0.017962166011333467 val_acc : 57.48


 63%|██████▎   | 26/41 [01:08<00:39,  2.63s/it]

epoch : 40 val_loss : -0.017932823574543 val_acc : 57.48


 66%|██████▌   | 27/41 [01:10<00:36,  2.63s/it]

epoch : 40 val_loss : -0.017999548971652984 val_acc : 57.52


 68%|██████▊   | 28/41 [01:13<00:34,  2.64s/it]

epoch : 40 val_loss : -0.017942177510261535 val_acc : 57.48


 71%|███████   | 29/41 [01:16<00:31,  2.62s/it]

epoch : 40 val_loss : -0.018008049458265306 val_acc : 57.52


 73%|███████▎  | 30/41 [01:18<00:28,  2.63s/it]

epoch : 40 val_loss : -0.017985673123598098 val_acc : 57.46


 76%|███████▌  | 31/41 [01:21<00:26,  2.62s/it]

epoch : 40 val_loss : -0.017996062034368515 val_acc : 57.54


 78%|███████▊  | 32/41 [01:23<00:23,  2.63s/it]

epoch : 40 val_loss : -0.018017191165685655 val_acc : 57.54


 80%|████████  | 33/41 [01:26<00:20,  2.62s/it]

epoch : 40 val_loss : -0.017982900536060335 val_acc : 57.6


 83%|████████▎ | 34/41 [01:29<00:18,  2.65s/it]

epoch : 40 val_loss : -0.017995930689573288 val_acc : 57.6


 85%|████████▌ | 35/41 [01:31<00:15,  2.64s/it]

epoch : 40 val_loss : -0.017996872216463088 val_acc : 57.52


 88%|████████▊ | 36/41 [01:34<00:13,  2.64s/it]

epoch : 40 val_loss : -0.01798123550415039 val_acc : 57.6


 90%|█████████ | 37/41 [01:37<00:10,  2.63s/it]

epoch : 40 val_loss : -0.018054481214284896 val_acc : 57.62


 93%|█████████▎| 38/41 [01:39<00:07,  2.64s/it]

epoch : 40 val_loss : -0.018039374619722368 val_acc : 57.46


 95%|█████████▌| 39/41 [01:42<00:05,  2.63s/it]

epoch : 40 val_loss : -0.017999202811717987 val_acc : 57.48


 98%|█████████▊| 40/41 [01:45<00:02,  2.64s/it]

epoch : 40 val_loss : -0.01800446752309799 val_acc : 57.52


100%|██████████| 41/41 [01:47<00:00,  2.63s/it]

epoch : 40 val_loss : -0.017981407716870308 val_acc : 57.66





In [33]:
epochs =30
criterion = nn.NLLLoss()
optimizer = optim.SGD(model.parameters(),lr=0.001)
for e in range(epochs):
  running_loss = 0
  for images, labels in train_loader:
    
    #Flatten the image into a 784 long vector
    
    images = images.view(images.shape[0],-1) #sqash the image in to 784*1 vector
    
    #reset the default gradients
    optimizer.zero_grad()
    
    # forward pass
    output = model(images)
    loss = criterion(output,labels)
    
    #backward pass calculate the gradients for loss
    loss.backward()
    
    # update the parameters
    optimizer.step()
    
    running_loss = running_loss+loss.item()
  else:
    print(f"Training loss: {running_loss/len(train_loader)}")

Training loss: -0.37929072535318853
Training loss: -0.3793148089973922
Training loss: -0.3793513963585194
Training loss: -0.3793505176060972
Training loss: -0.37938417107681677
Training loss: -0.3794169863463835
Training loss: -0.37938999119385375
Training loss: -0.37939091448072365
Training loss: -0.37943798652944905
Training loss: -0.379438490347331
Training loss: -0.3794707592756614
Training loss: -0.3794746707871322
Training loss: -0.37951438854502134


KeyboardInterrupt: 

# 안되서 긁어오기

In [197]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

In [198]:
import torch
from torchvision import datasets, transforms

transform = transforms.Compose([transforms.ToTensor()])
trainset = datasets.FashionMNIST('./data',download=True, train= True, transform=transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size= 64, shuffle=True)

# Download and load test data
testset = datasets.FashionMNIST('./data',download=True, train= False, transform=transform)
testloader = torch.utils.data.DataLoader(testset, batch_size= 64, shuffle=True)

In [199]:
from torch import nn
from torch.nn import NLLLoss
from torch.optim import SGD

model = nn.Sequential(
nn.Flatten(),
nn.Linear(28*28,300),
nn.ReLU(),
nn.Linear(300,100),
nn.ReLU(),
nn.Linear(100,10)
)

model.to(device)
criterion = nn.CrossEntropyLoss()
optimizer = SGD(model.parameters(),lr=0.01)

In [200]:
print(model)

Sequential(
  (0): Flatten(start_dim=1, end_dim=-1)
  (1): Linear(in_features=784, out_features=300, bias=True)
  (2): ReLU()
  (3): Linear(in_features=300, out_features=100, bias=True)
  (4): ReLU()
  (5): Linear(in_features=100, out_features=10, bias=True)
)


In [201]:
epochs =40

for e in range(epochs):
  total_loss , total_acc ,tot = 0 , 0 ,0
  for images, labels in trainloader:
    images, labels = images.to(device), labels.to(device)
    
    output = model(images)
    loss = criterion(output,labels)
    
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()
    
    pred = torch.max(output, 1)[1].to(device)
    total_loss += loss.item() * len(labels)
    total_acc += (pred == labels).sum()
  else:
    print(f"{e+1}epoch Train_loss: {total_loss/len(trainloader.dataset):.2f} Train_acc: {100*total_acc/len(trainloader.dataset):.2f}%",end='|')


  total_loss , total_acc = 0 , 0
  with torch.no_grad():
    for images, labels in testloader:
      images, labels = images.to(device), labels.to(device)
      output = model(images)
      loss = criterion(output,labels)

      pred = torch.max(output, 1)[1].to(device)
      total_loss += loss.item() * len(labels)
      total_acc += (pred == labels).sum()
    else:
      print(f"Test_loss: {total_loss/len(testloader.dataset):.2f} Test_acc: {100*total_acc/len(testloader.dataset):.2f}%")

1epoch Train_loss: 1.43 Train_acc: 53.69%|Test_loss: 0.85 Test_acc: 68.56%
2epoch Train_loss: 0.73 Train_acc: 73.69%|Test_loss: 0.67 Test_acc: 76.39%
3epoch Train_loss: 0.60 Train_acc: 78.79%|Test_loss: 0.59 Test_acc: 79.46%
4epoch Train_loss: 0.54 Train_acc: 81.26%|Test_loss: 0.54 Test_acc: 80.93%
5epoch Train_loss: 0.50 Train_acc: 82.52%|Test_loss: 0.51 Test_acc: 81.92%
6epoch Train_loss: 0.48 Train_acc: 83.22%|Test_loss: 0.52 Test_acc: 80.41%
7epoch Train_loss: 0.46 Train_acc: 83.81%|Test_loss: 0.48 Test_acc: 83.08%
8epoch Train_loss: 0.45 Train_acc: 84.35%|Test_loss: 0.48 Test_acc: 83.07%
9epoch Train_loss: 0.43 Train_acc: 84.82%|Test_loss: 0.47 Test_acc: 83.51%
10epoch Train_loss: 0.42 Train_acc: 85.24%|Test_loss: 0.46 Test_acc: 83.57%
11epoch Train_loss: 0.42 Train_acc: 85.58%|Test_loss: 0.45 Test_acc: 83.92%
12epoch Train_loss: 0.41 Train_acc: 85.75%|Test_loss: 0.47 Test_acc: 83.72%
13epoch Train_loss: 0.40 Train_acc: 86.11%|Test_loss: 0.44 Test_acc: 84.33%
14epoch Train_loss: 0