# fashion_mnist데이터로 MLP구현 \<Torch>
- nn.NLLLoss() 와 nn.CrossEntropyLoss()의 차이
- torch.nn.CrossEntropyLoss는 softmax + NLLLoss이다.
    - 즉 , 모델의 출력층에 softmax()가 없는 모델에 사용해야한다.
- 만약 지금처럼 model의 출력층에 softmax()가 있다면 NLLLoss()를 사용해야한다.

In [23]:
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from PIL import Image
from tensorflow import keras
from torch.utils.data import DataLoader, TensorDataset
%matplotlib inline


# 데이터셋 적재
fashion_mnist = keras.datasets.fashion_mnist
(X_train_full , y_train_full) , (X_test , y_test) = fashion_mnist.load_data()

# 정규화 값을 0 ~ 1사이로 맞추기
X_valid , X_train = X_train_full[:5000]/255.0 , X_train_full[5000:]/255.0
y_valid , y_train = y_train_full[:5000] , y_train_full[5000:]
X_test = X_test/255.0

# 클래스 이름 리스트 정의
class_names = ["T-shirt","Trouser","Pullover","Dress","Coat","Sandal","Shirt","Sneaker","Bag","Ankle Boot"]

두개의 은닉층으로 이루어진 MLP
    - 텐서플로의 Dense는 토치의 Linear와 동일
        - 단 Dense의 use_bias=True (default임)가 옵션이지만 Linear은 bias가 포함되어있음

In [24]:
'''
이 방법은 계산 그래프를 생성하지 않고 순전파에서 모든 계산을 수행함
따라서 파이토치의 자동미분 backward()를 수행할 때 참고할 계산 그래프가 없으므로 backward()를 사용할 수 없다.
따라서 이방법은 사용하지 않는다!
즉 , forward()에서 필요한 layer들은 미리 __init__에 선언해놓자!!!!!
class Mlp_Net(nn.module):
    def __init__(self) -> None:
        super(Mlp_Net,self).__init__()

    def forward(self,x):
        x = nn.ReLU(nn.Linear(28*28,300)(x.view(-1,28*28)))
        x = nn.ReLU(nn.Linear(300,100)(x))
        x = nn.Softmax(x,dim=1)
        return x
'''
class Mlp_Net(nn.Module):
    def __init__(self) -> None:
        super(Mlp_Net,self).__init__()
        self.flatten = nn.Flatten()
        self.layer1 = nn.Linear(28*28,300) # 입력 -> 출력 차원
        self.layer2 = nn.Linear(300,100)
        self.relu = nn.ReLU()
        self.softmax = nn.Softmax(dim=1)
    
    def forward(self,x):
        x = self.flatten(x)
        x = self.relu(self.layer1(x))
        x = self.relu(self.layer2(x))
        x = self.softmax(x)
        return x


In [25]:
import torch.nn.init as init

def weight_init(m):
    if isinstance(m, nn.Linear):
        init.kaiming_uniform_(m.weight.data)

model = Mlp_Net()
model.apply(weight_init)
print(model)

Mlp_Net(
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (layer1): Linear(in_features=784, out_features=300, bias=True)
  (layer2): Linear(in_features=300, out_features=100, bias=True)
  (relu): ReLU()
  (softmax): Softmax(dim=1)
)


In [26]:
for name , param in model.named_parameters():
    print(name , param.shape)

layer1.weight torch.Size([300, 784])
layer1.bias torch.Size([300])
layer2.weight torch.Size([100, 300])
layer2.bias torch.Size([100])


In [22]:
# 파라미터에 직접 접근하기
# 그래프에 직접 접근은 지양해야하며 .data로 접근할 수 있다.
'''
print(model.layer1.weight.shape,model.layer2.bias.shape)
print(model.layer1.weight[0,0])
model.layer1.weight.data[0,0] = 0
print(model.layer1.weight[0,0])
'''

torch.Size([300, 784]) torch.Size([100])
tensor(-0.0080, grad_fn=<SelectBackward0>)
tensor(0., grad_fn=<SelectBackward0>)


In [29]:
# 데이터로더
batch_size = 32
X_train , y_train = torch.tensor(X_train,dtype=torch.float32,requires_grad=False) , torch.tensor(y_train,requires_grad=False)
X_valid , y_valid = torch.tensor(X_valid,dtype=torch.float32,requires_grad=False) , torch.tensor(y_valid,requires_grad=False)
train_loader = DataLoader(TensorDataset(X_train, y_train), batch_size=batch_size, shuffle=True)
valid_loader = DataLoader(TensorDataset(X_valid, y_valid), batch_size=batch_size, shuffle=True)

  X_train , y_train = torch.tensor(X_train,dtype=torch.float32,requires_grad=False) , torch.tensor(y_train,requires_grad=False)
  X_valid , y_valid = torch.tensor(X_valid,dtype=torch.float32,requires_grad=False) , torch.tensor(y_valid,requires_grad=False)


In [30]:
# compile
from tqdm import tqdm

loss_fn = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=0.01)
def acc(x,y):
    cnt = 0
    for a,b in zip(x,y):
        if a==b:
            cnt += 1
    return cnt

def train(model, train_loader, optimizer, log_interval):
    model.train()
    for batch_idx, (image, label) in enumerate(train_loader):
        image = image
        label = label
        optimizer.zero_grad()
        output = model(image)
        loss = loss_fn(output, label)
        loss.backward()
        optimizer.step()

def evaluate(model, test_loader):
    model.eval()
    test_loss = 0
    correct = 0

    with torch.no_grad(): # 파라미터 업데이트 방지
        for image, label in test_loader:
            image = image
            label = label
            output = model(image)
            test_loss += loss_fn(output, label).item()
            prediction = output.max(1, keepdim=True)[1]
            correct += prediction.eq(label.view_as(prediction)).sum().item()
    
    test_loss /= len(test_loader.dataset)
    test_accuracy = 100. * correct / len(test_loader.dataset)
    return test_loss, test_accuracy

In [32]:
# 학습
epoch = 40
for Epoch in range(1, epoch + 1):
    train(model, train_loader, optimizer, log_interval=100)
    test_loss, test_accuracy = evaluate(model, valid_loader)
    print("[EPOCH: {}], \tValid Loss: {:.4f}, \tValid Accuracy: {:.2f} %".format(
        Epoch, test_loss, test_accuracy
    ))

[EPOCH: 1], 	Test Loss: 0.1362, 	Test Accuracy: 28.68 %
[EPOCH: 2], 	Test Loss: 0.1337, 	Test Accuracy: 38.14 %
[EPOCH: 3], 	Test Loss: 0.1334, 	Test Accuracy: 38.22 %
[EPOCH: 4], 	Test Loss: 0.1334, 	Test Accuracy: 38.26 %
[EPOCH: 5], 	Test Loss: 0.1332, 	Test Accuracy: 38.36 %
[EPOCH: 6], 	Test Loss: 0.1332, 	Test Accuracy: 38.48 %
[EPOCH: 7], 	Test Loss: 0.1332, 	Test Accuracy: 38.44 %
[EPOCH: 8], 	Test Loss: 0.1332, 	Test Accuracy: 38.50 %
[EPOCH: 9], 	Test Loss: 0.1331, 	Test Accuracy: 38.62 %
[EPOCH: 10], 	Test Loss: 0.1331, 	Test Accuracy: 38.48 %
[EPOCH: 11], 	Test Loss: 0.1331, 	Test Accuracy: 38.68 %
[EPOCH: 12], 	Test Loss: 0.1331, 	Test Accuracy: 38.70 %
[EPOCH: 13], 	Test Loss: 0.1331, 	Test Accuracy: 38.60 %
[EPOCH: 14], 	Test Loss: 0.1331, 	Test Accuracy: 38.56 %
[EPOCH: 15], 	Test Loss: 0.1331, 	Test Accuracy: 38.52 %
[EPOCH: 16], 	Test Loss: 0.1331, 	Test Accuracy: 38.60 %
[EPOCH: 17], 	Test Loss: 0.1330, 	Test Accuracy: 38.90 %
[EPOCH: 18], 	Test Loss: 0.1331, 	Test A

KeyboardInterrupt: 

In [33]:
epochs =30
criterion = nn.NLLLoss()
optimizer = optim.SGD(model.parameters(),lr=0.001)
for e in range(epochs):
  running_loss = 0
  for images, labels in train_loader:
    
    #Flatten the image into a 784 long vector
    
    images = images.view(images.shape[0],-1) #sqash the image in to 784*1 vector
    
    #reset the default gradients
    optimizer.zero_grad()
    
    # forward pass
    output = model(images)
    loss = criterion(output,labels)
    
    #backward pass calculate the gradients for loss
    loss.backward()
    
    # update the parameters
    optimizer.step()
    
    running_loss = running_loss+loss.item()
  else:
    print(f"Training loss: {running_loss/len(train_loader)}")

Training loss: -0.37929072535318853
Training loss: -0.3793148089973922
Training loss: -0.3793513963585194
Training loss: -0.3793505176060972
Training loss: -0.37938417107681677
Training loss: -0.3794169863463835
Training loss: -0.37938999119385375
Training loss: -0.37939091448072365
Training loss: -0.37943798652944905
Training loss: -0.379438490347331
Training loss: -0.3794707592756614
Training loss: -0.3794746707871322
Training loss: -0.37951438854502134


KeyboardInterrupt: 

# 안되서 긁어오기

In [34]:
import torch
from torchvision import datasets, transforms

# Define a transform to normalize the data
transform = transforms.Compose([transforms.ToTensor()])

#transforms.ToTensor() convert our image to a tensor
#transforms.Normalize() will normalizae our image with provided mean and sd values

# Download and load training data

trainset = datasets.FashionMNIST('./data',download=True, train= True, transform=transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size= 64, shuffle=True)

# Download and load test data
testset = datasets.FashionMNIST('./data',download=True, train= False, transform=transform)
testloader = torch.utils.data.DataLoader(testset, batch_size= 64, shuffle=True)

Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-images-idx3-ubyte.gz
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-images-idx3-ubyte.gz to ./data\FashionMNIST\raw\train-images-idx3-ubyte.gz


100%|██████████| 26421880/26421880 [01:23<00:00, 316014.21it/s]


Extracting ./data\FashionMNIST\raw\train-images-idx3-ubyte.gz to ./data\FashionMNIST\raw

Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-labels-idx1-ubyte.gz
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-labels-idx1-ubyte.gz to ./data\FashionMNIST\raw\train-labels-idx1-ubyte.gz


100%|██████████| 29515/29515 [00:00<00:00, 101044.01it/s]


Extracting ./data\FashionMNIST\raw\train-labels-idx1-ubyte.gz to ./data\FashionMNIST\raw

Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-images-idx3-ubyte.gz
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-images-idx3-ubyte.gz to ./data\FashionMNIST\raw\t10k-images-idx3-ubyte.gz


100%|██████████| 4422102/4422102 [00:11<00:00, 378168.82it/s]


Extracting ./data\FashionMNIST\raw\t10k-images-idx3-ubyte.gz to ./data\FashionMNIST\raw

Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-labels-idx1-ubyte.gz
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-labels-idx1-ubyte.gz to ./data\FashionMNIST\raw\t10k-labels-idx1-ubyte.gz


100%|██████████| 5148/5148 [00:00<00:00, 5153288.06it/s]

Extracting ./data\FashionMNIST\raw\t10k-labels-idx1-ubyte.gz to ./data\FashionMNIST\raw






In [35]:
#Network parameters

input_size = 784 #i.e 28*28*1 
hidden_size = [300,100]
out_size = 10

In [39]:
from torch import nn
from torch.nn import NLLLoss
from torch.optim import SGD

model = nn.Sequential(
nn.Flatten(),
nn.Linear(input_size,hidden_size[0]),
nn.ReLU(),
nn.Linear(hidden_size[0],hidden_size[1]),
nn.ReLU(),
nn.Linear(hidden_size[1],out_size),
nn.LogSoftmax(dim=1)
)

criterion = NLLLoss()
optimizer = SGD(model.parameters(),lr=0.001)

In [40]:
print(model)

Sequential(
  (0): Flatten(start_dim=1, end_dim=-1)
  (1): Linear(in_features=784, out_features=300, bias=True)
  (2): ReLU()
  (3): Linear(in_features=300, out_features=100, bias=True)
  (4): ReLU()
  (5): Linear(in_features=100, out_features=10, bias=True)
  (6): LogSoftmax(dim=1)
)


In [41]:
epochs =30

for e in range(epochs):
  running_loss = 0
  for images, labels in trainloader:
    
    #Flatten the image into a 784 long vector
    
    #images = images.view(images.shape[0],-1) #sqash the image in to 784*1 vector
    
    #reset the default gradients
    optimizer.zero_grad()
    
    # forward pass
    output = model(images)
    loss = criterion(output,labels)
    
    #backward pass calculate the gradients for loss
    loss.backward()
    
    # update the parameters
    optimizer.step()
    
    running_loss = running_loss+loss.item()
  else:
    print(f"Training loss: {running_loss/len(trainloader)}")

Training loss: 2.264903717966222
Training loss: 2.1395309641162976
Training loss: 1.8974709959426668
Training loss: 1.581385413593829
Training loss: 1.3329489269236257


KeyboardInterrupt: 