# fashion_mnist데이터로 MLP구현 \<Torch>
- nn.NLLLoss() 와 nn.CrossEntropyLoss()의 차이
- torch.nn.CrossEntropyLoss는 softmax + NLLLoss이다.
    - 즉 , 모델의 출력층에 softmax()가 없는 모델에 사용해야한다.
- 만약 지금처럼 model의 출력층에 LogSoftmax()가 있다면 NLLLoss()를 사용해야한다.

In [68]:
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from PIL import Image
from tensorflow import keras
from torch.utils.data import DataLoader, TensorDataset
%matplotlib inline


# 데이터셋 적재
fashion_mnist = keras.datasets.fashion_mnist
(X_train_full , y_train_full) , (X_test , y_test) = fashion_mnist.load_data()

# 정규화 값을 0 ~ 1사이로 맞추기
X_valid , X_train = X_train_full[:5000] , X_train_full[5000:]
y_valid , y_train = y_train_full[:5000] , y_train_full[5000:]
X_train = X_train / 255.0
X_valid = X_valid / 255.0
X_test = X_test / 255.0

X_valid , X_train = torch.tensor(X_valid,dtype=torch.float32) , torch.tensor(X_train,dtype=torch.float32)
y_valid , y_train = torch.tensor(y_valid) , torch.tensor(y_train)
X_test , y_test = torch.tensor(X_test,dtype=torch.float32) , torch.tensor(y_test)

# 클래스 이름 리스트 정의
class_names = ["T-shirt","Trouser","Pullover","Dress","Coat","Sandal","Shirt","Sneaker","Bag","Ankle Boot"]

In [69]:
# 데이터로더
batch_size = 32
train_loader = DataLoader(TensorDataset(X_train, y_train), batch_size=batch_size, shuffle=True)
valid_loader = DataLoader(TensorDataset(X_valid, y_valid), batch_size=batch_size, shuffle=True)

두개의 은닉층으로 이루어진 MLP
    - 텐서플로의 Dense는 토치의 Linear와 동일
        - 단 Dense의 use_bias=True (default임)가 옵션이지만 Linear은 bias가 포함되어있음

In [70]:
'''
이 방법은 계산 그래프를 생성하지 않고 순전파에서 모든 계산을 수행함
따라서 파이토치의 자동미분 backward()를 수행할 때 참고할 계산 그래프가 없으므로 backward()를 사용할 수 없다.
따라서 이방법은 사용하지 않는다!
즉 , forward()에서 필요한 layer들은 미리 __init__에 선언해놓자!!!!!
class Mlp_Net(nn.module):
    def __init__(self) -> None:
        super(Mlp_Net,self).__init__()

    def forward(self,x):
        x = nn.ReLU(nn.Linear(28*28,300)(x.view(-1,28*28)))
        x = nn.ReLU(nn.Linear(300,100)(x))
        x = nn.Softmax(x,dim=1)
        return x
'''
class Mlp_Net(nn.Module):
    def __init__(self):
        super().__init__()
        self.flatten = nn.Flatten()
        self.layer1 = nn.Linear(28*28,300) # 입력 -> 출력 차원
        self.layer2 = nn.Linear(300,100)
        self.layer3 = nn.Linear(100,10)
        self.relu = nn.ReLU()
    
    def forward(self,x):
        x = self.flatten(x)
        x = self.relu(self.layer1(x))
        x = self.relu(self.layer2(x))
        x = self.layer3(x)
        return x


In [71]:
model = Mlp_Net()
print(model)

Mlp_Net(
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (layer1): Linear(in_features=784, out_features=300, bias=True)
  (layer2): Linear(in_features=300, out_features=100, bias=True)
  (layer3): Linear(in_features=100, out_features=10, bias=True)
  (relu): ReLU()
)


In [72]:
for name , param in model.named_parameters():
    print(name , param.shape,param.dtype)

layer1.weight torch.Size([300, 784]) torch.float32
layer1.bias torch.Size([300]) torch.float32
layer2.weight torch.Size([100, 300]) torch.float32
layer2.bias torch.Size([100]) torch.float32
layer3.weight torch.Size([10, 100]) torch.float32
layer3.bias torch.Size([10]) torch.float32


In [73]:
# 파라미터에 직접 접근하기
# 그래프에 직접 접근은 지양해야하며 .data로 접근할 수 있다.
'''
print(model.layer1.weight.shape,model.layer2.bias.shape)
print(model.layer1.weight[0,0])
model.layer1.weight.data[0,0] = 0
print(model.layer1.weight[0,0])
'''

'\nprint(model.layer1.weight.shape,model.layer2.bias.shape)\nprint(model.layer1.weight[0,0])\nmodel.layer1.weight.data[0,0] = 0\nprint(model.layer1.weight[0,0])\n'

In [74]:
# compile
def train(model, train_loader, optimizer):
    total_loss , total_acc = 0 , 0
    for image, label in train_loader:
        output = model(image)
        loss = loss_fn(output, label)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        total_loss += loss

        pred = torch.max(output, 1)[1]
        total_acc += (pred == label).sum()
    return (total_loss / len(train_loader)) , (total_acc/len(train_loader.dataset))


def evaluate(model, test_loader):
    total_loss , total_acc = 0 , 0
    with torch.no_grad(): # 파라미터 업데이트 방지
        for image, label in test_loader:
            output = model(image)
            loss = loss_fn(output,label)
            total_loss += loss.item()
            pred = torch.max(output,1)[1]
            total_acc += (pred == label).sum()
        return (total_loss / len(test_loader)) , (total_acc/len(test_loader.dataset))

In [75]:
# 학습
from tqdm import tqdm
model = Mlp_Net()
loss_fn = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=0.01)

epoch = 40
for Epoch in range(1+epoch):
    train_loss , train_acc= train(model, train_loader, optimizer)
    test_loss, test_acc = evaluate(model, valid_loader)
    print(f'epoch : {Epoch+1} train_loss : {train_loss:.2f} val_acc : {train_acc*100:.0f}% val_loss : {test_loss:.2f} val_acc : {test_acc*100:.0f}%')

epoch : 1 train_loss : 1.11 val_acc : 64% val_loss : 0.66 val_acc : 76%
epoch : 2 train_loss : 0.59 val_acc : 79% val_loss : 0.52 val_acc : 82%
epoch : 3 train_loss : 0.51 val_acc : 82% val_loss : 0.47 val_acc : 84%


KeyboardInterrupt: 

# 안되서 긁어오기

In [25]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

In [198]:
import torch
from torchvision import datasets, transforms

'''
transform = transforms.Compose([transforms.ToTensor()])
trainset = datasets.FashionMNIST('./data',download=True, train= True, transform=transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size= 64, shuffle=True)

# Download and load test data
testset = datasets.FashionMNIST('./data',download=True, train= False, transform=transform)
testloader = torch.utils.data.DataLoader(testset, batch_size= 64, shuffle=True)
'''

In [51]:
from torch import nn
from torch.nn import NLLLoss
from torch.optim import SGD

model = nn.Sequential(
nn.Flatten(),
nn.Linear(28*28,300),
nn.ReLU(),
nn.Linear(300,100),
nn.ReLU(),
nn.Linear(100,10)
)

model.to(device)
criterion = nn.CrossEntropyLoss()
optimizer = SGD(model.parameters(),lr=0.01)

In [37]:
print(model)

Mlp_Net(
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (layer1): Linear(in_features=784, out_features=300, bias=True)
  (layer2): Linear(in_features=300, out_features=100, bias=True)
  (layer3): Linear(in_features=100, out_features=10, bias=True)
  (relu): ReLU()
)


In [52]:
epochs =40
model.to(device)
for e in range(epochs):
  total_loss , total_acc ,tot = 0 , 0 ,0
  for images, labels in train_loader:
    images, labels = images.to(device), labels.to(device)
    
    output = model(images)
    loss = criterion(output,labels)
    
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()
    
    pred = torch.max(output, 1)[1].to(device)
    total_loss += loss.item() * len(labels)
    total_acc += (pred == labels).sum()
  else:
    print(f"{e+1}epoch Train_loss: {total_loss/len(train_loader.dataset):.2f} Train_acc: {100*total_acc/len(train_loader.dataset):.2f}%",end='|')


  total_loss , total_acc = 0 , 0
  with torch.no_grad():
    for images, labels in valid_loader:
      images, labels = images.to(device), labels.to(device)
      output = model(images)
      loss = criterion(output,labels)

      pred = torch.max(output, 1)[1].to(device)
      total_loss += loss.item() * len(labels)
      total_acc += (pred == labels).sum()
    else:
      print(f"Test_loss: {total_loss/len(valid_loader.dataset):.2f} Test_acc: {100*total_acc/len(valid_loader.dataset):.2f}%")

1epoch Train_loss: 1.12 Train_acc: 63.01%|Test_loss: 0.67 Test_acc: 75.78%
2epoch Train_loss: 0.61 Train_acc: 78.51%|Test_loss: 0.53 Test_acc: 81.82%


KeyboardInterrupt: 

# 캘리포니아 주택 시작! MLP

In [11]:
from sklearn.datasets import fetch_california_housing
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

%matplotlib inline


import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset

In [12]:
housing = fetch_california_housing()

X_train_full , X_test , y_train_full , y_test = train_test_split(housing.data,housing.target)
X_train , X_valid , y_train , y_valid = train_test_split(X_train_full,y_train_full)

scaler = StandardScaler()

X_train = scaler.fit_transform(X_train)
X_valid = scaler.fit_transform(X_valid)
X_test = scaler.fit_transform(X_test)

X_train = torch.tensor(X_train,dtype=torch.float32)
X_valid = torch.tensor(X_valid,dtype=torch.float32)
X_test = torch.tensor(X_test,dtype=torch.float32)
y_train = torch.tensor(y_train,dtype=torch.float32)
y_valid = torch.tensor(y_valid,dtype=torch.float32)
y_test = torch.tensor(y_test,dtype=torch.float32)

print(X_train.shape , X_valid.shape , X_test.shape)
print(y_train.shape , y_valid.shape , y_test.shape)

torch.Size([11610, 8]) torch.Size([3870, 8]) torch.Size([5160, 8])
torch.Size([11610]) torch.Size([3870]) torch.Size([5160])


In [13]:
# 데이터로더
batch_size = 32
train_loader = DataLoader(TensorDataset(X_train, y_train), batch_size=batch_size, shuffle=True)
valid_loader = DataLoader(TensorDataset(X_valid, y_valid), batch_size=batch_size, shuffle=True)
test_loader = DataLoader(TensorDataset(X_test, y_test), batch_size=batch_size, shuffle=True)

In [8]:
def train(model,data_loader,epochs=10,criterion = torch.nn.MSELoss):
    for e in range(epochs):
        total_train_loss , total_ = 0
        for images, labels in data_loader:
            output = model(images)
            loss = criterion(output,labels)
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
    total_loss += loss.item() * len(labels)




    return total_loss / len(data_loader)

def evaluate(model,data_loader,epochs=10,criterion = torch.nn.MSELoss):
    for e in range(epochs):
        total_loss = 0
        for images, labels in data_loader:
            output = model(images)
            loss = criterion(output,labels)
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
    total_loss += loss.item() * len(labels)
    return total_loss / len(data_loader)



11610