# fashion_mnist데이터로 MLP구현 \<Torch>
- nn.NLLLoss() 와 nn.CrossEntropyLoss()의 차이
- torch.nn.CrossEntropyLoss는 softmax + NLLLoss이다.
    - 즉 , 모델의 출력층에 softmax()가 없는 모델에 사용해야한다.
- 만약 지금처럼 model의 출력층에 LogSoftmax()가 있다면 NLLLoss()를 사용해야한다.

In [68]:
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from PIL import Image
from tensorflow import keras
from torch.utils.data import DataLoader, TensorDataset
%matplotlib inline


# 데이터셋 적재
fashion_mnist = keras.datasets.fashion_mnist
(X_train_full , y_train_full) , (X_test , y_test) = fashion_mnist.load_data()

# 정규화 값을 0 ~ 1사이로 맞추기
X_valid , X_train = X_train_full[:5000] , X_train_full[5000:]
y_valid , y_train = y_train_full[:5000] , y_train_full[5000:]
X_train = X_train / 255.0
X_valid = X_valid / 255.0
X_test = X_test / 255.0

X_valid , X_train = torch.tensor(X_valid,dtype=torch.float32) , torch.tensor(X_train,dtype=torch.float32)
y_valid , y_train = torch.tensor(y_valid) , torch.tensor(y_train)
X_test , y_test = torch.tensor(X_test,dtype=torch.float32) , torch.tensor(y_test)

# 클래스 이름 리스트 정의
class_names = ["T-shirt","Trouser","Pullover","Dress","Coat","Sandal","Shirt","Sneaker","Bag","Ankle Boot"]

In [69]:
# 데이터로더
batch_size = 32
train_loader = DataLoader(TensorDataset(X_train, y_train), batch_size=batch_size, shuffle=True)
valid_loader = DataLoader(TensorDataset(X_valid, y_valid), batch_size=batch_size, shuffle=True)

두개의 은닉층으로 이루어진 MLP
    - 텐서플로의 Dense는 토치의 Linear와 동일
        - 단 Dense의 use_bias=True (default임)가 옵션이지만 Linear은 bias가 포함되어있음

In [70]:
'''
이 방법은 계산 그래프를 생성하지 않고 순전파에서 모든 계산을 수행함
따라서 파이토치의 자동미분 backward()를 수행할 때 참고할 계산 그래프가 없으므로 backward()를 사용할 수 없다.
따라서 이방법은 사용하지 않는다!
즉 , forward()에서 필요한 layer들은 미리 __init__에 선언해놓자!!!!!
class Mlp_Net(nn.module):
    def __init__(self) -> None:
        super(Mlp_Net,self).__init__()

    def forward(self,x):
        x = nn.ReLU(nn.Linear(28*28,300)(x.view(-1,28*28)))
        x = nn.ReLU(nn.Linear(300,100)(x))
        x = nn.Softmax(x,dim=1)
        return x
'''
class Mlp_Net(nn.Module):
    def __init__(self):
        super().__init__()
        self.flatten = nn.Flatten()
        self.layer1 = nn.Linear(28*28,300) # 입력 -> 출력 차원
        self.layer2 = nn.Linear(300,100)
        self.layer3 = nn.Linear(100,10)
        self.relu = nn.ReLU()
    
    def forward(self,x):
        x = self.flatten(x)
        x = self.relu(self.layer1(x))
        x = self.relu(self.layer2(x))
        x = self.layer3(x)
        return x


In [71]:
model = Mlp_Net()
print(model)

Mlp_Net(
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (layer1): Linear(in_features=784, out_features=300, bias=True)
  (layer2): Linear(in_features=300, out_features=100, bias=True)
  (layer3): Linear(in_features=100, out_features=10, bias=True)
  (relu): ReLU()
)


In [72]:
for name , param in model.named_parameters():
    print(name , param.shape,param.dtype)

layer1.weight torch.Size([300, 784]) torch.float32
layer1.bias torch.Size([300]) torch.float32
layer2.weight torch.Size([100, 300]) torch.float32
layer2.bias torch.Size([100]) torch.float32
layer3.weight torch.Size([10, 100]) torch.float32
layer3.bias torch.Size([10]) torch.float32


In [73]:
# 파라미터에 직접 접근하기
# 그래프에 직접 접근은 지양해야하며 .data로 접근할 수 있다.
'''
print(model.layer1.weight.shape,model.layer2.bias.shape)
print(model.layer1.weight[0,0])
model.layer1.weight.data[0,0] = 0
print(model.layer1.weight[0,0])
'''

'\nprint(model.layer1.weight.shape,model.layer2.bias.shape)\nprint(model.layer1.weight[0,0])\nmodel.layer1.weight.data[0,0] = 0\nprint(model.layer1.weight[0,0])\n'

In [74]:
# compile
def train(model, train_loader, optimizer):
    total_loss , total_acc = 0 , 0
    for image, label in train_loader:
        output = model(image)
        loss = loss_fn(output, label)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        total_loss += loss

        pred = torch.max(output, 1)[1]
        total_acc += (pred == label).sum()
    return (total_loss / len(train_loader)) , (total_acc/len(train_loader.dataset))


def evaluate(model, test_loader):
    total_loss , total_acc = 0 , 0
    with torch.no_grad(): # 파라미터 업데이트 방지
        for image, label in test_loader:
            output = model(image)
            loss = loss_fn(output,label)
            total_loss += loss.item()
            pred = torch.max(output,1)[1]
            total_acc += (pred == label).sum()
        return (total_loss / len(test_loader)) , (total_acc/len(test_loader.dataset))

In [75]:
# 학습
from tqdm import tqdm
model = Mlp_Net()
loss_fn = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=0.01)

epoch = 40
for Epoch in range(1+epoch):
    train_loss , train_acc= train(model, train_loader, optimizer)
    test_loss, test_acc = evaluate(model, valid_loader)
    print(f'epoch : {Epoch+1} train_loss : {train_loss:.2f} val_acc : {train_acc*100:.0f}% val_loss : {test_loss:.2f} val_acc : {test_acc*100:.0f}%')

epoch : 1 train_loss : 1.11 val_acc : 64% val_loss : 0.66 val_acc : 76%
epoch : 2 train_loss : 0.59 val_acc : 79% val_loss : 0.52 val_acc : 82%
epoch : 3 train_loss : 0.51 val_acc : 82% val_loss : 0.47 val_acc : 84%


KeyboardInterrupt: 

# 안되서 긁어오기

In [25]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

In [198]:
import torch
from torchvision import datasets, transforms

'''
transform = transforms.Compose([transforms.ToTensor()])
trainset = datasets.FashionMNIST('./data',download=True, train= True, transform=transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size= 64, shuffle=True)

# Download and load test data
testset = datasets.FashionMNIST('./data',download=True, train= False, transform=transform)
testloader = torch.utils.data.DataLoader(testset, batch_size= 64, shuffle=True)
'''

In [51]:
from torch import nn
from torch.nn import NLLLoss
from torch.optim import SGD

model = nn.Sequential(
nn.Flatten(),
nn.Linear(28*28,300),
nn.ReLU(),
nn.Linear(300,100),
nn.ReLU(),
nn.Linear(100,10)
)

model.to(device)
criterion = nn.CrossEntropyLoss()
optimizer = SGD(model.parameters(),lr=0.01)

In [37]:
print(model)

Mlp_Net(
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (layer1): Linear(in_features=784, out_features=300, bias=True)
  (layer2): Linear(in_features=300, out_features=100, bias=True)
  (layer3): Linear(in_features=100, out_features=10, bias=True)
  (relu): ReLU()
)


In [52]:
epochs =40
model.to(device)
for e in range(epochs):
  total_loss , total_acc ,tot = 0 , 0 ,0
  for images, labels in train_loader:
    images, labels = images.to(device), labels.to(device)
    
    output = model(images)
    loss = criterion(output,labels)
    
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()
    
    pred = torch.max(output, 1)[1].to(device)
    total_loss += loss.item() * len(labels)
    total_acc += (pred == labels).sum()
  else:
    print(f"{e+1}epoch Train_loss: {total_loss/len(train_loader.dataset):.2f} Train_acc: {100*total_acc/len(train_loader.dataset):.2f}%",end='|')


  total_loss , total_acc = 0 , 0
  with torch.no_grad():
    for images, labels in valid_loader:
      images, labels = images.to(device), labels.to(device)
      output = model(images)
      loss = criterion(output,labels)

      pred = torch.max(output, 1)[1].to(device)
      total_loss += loss.item() * len(labels)
      total_acc += (pred == labels).sum()
    else:
      print(f"Test_loss: {total_loss/len(valid_loader.dataset):.2f} Test_acc: {100*total_acc/len(valid_loader.dataset):.2f}%")

1epoch Train_loss: 1.12 Train_acc: 63.01%|Test_loss: 0.67 Test_acc: 75.78%
2epoch Train_loss: 0.61 Train_acc: 78.51%|Test_loss: 0.53 Test_acc: 81.82%


KeyboardInterrupt: 

# 캘리포니아 주택 시작! MLP

In [109]:
from sklearn.datasets import fetch_california_housing
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

%matplotlib inline


import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset

In [110]:
housing = fetch_california_housing()

X_train_full , X_test , y_train_full , y_test = train_test_split(housing.data,housing.target)
X_train , X_valid , y_train , y_valid = train_test_split(X_train_full,y_train_full)

scaler = StandardScaler()

X_train = scaler.fit_transform(X_train)
X_valid = scaler.fit_transform(X_valid)
X_test = scaler.fit_transform(X_test)

X_train = torch.tensor(X_train,dtype=torch.float32)
X_valid = torch.tensor(X_valid,dtype=torch.float32)
X_test = torch.tensor(X_test,dtype=torch.float32)
y_train = torch.tensor(y_train,dtype=torch.float32).view(-1,1)          # batch*차원으로 shape을 맞춰주기 위해
y_valid = torch.tensor(y_valid,dtype=torch.float32).view(-1,1)
y_test = torch.tensor(y_test,dtype=torch.float32).view(-1,1)

print(X_train.shape , X_valid.shape , X_test.shape)
print(y_train.shape , y_valid.shape , y_test.shape)

torch.Size([11610, 8]) torch.Size([3870, 8]) torch.Size([5160, 8])
torch.Size([11610, 1]) torch.Size([3870, 1]) torch.Size([5160, 1])


In [111]:
# 데이터로더
batch_size = 32
train_loader = DataLoader(TensorDataset(X_train, y_train), batch_size=batch_size, shuffle=True)
valid_loader = DataLoader(TensorDataset(X_valid, y_valid), batch_size=batch_size, shuffle=True)
test_loader = DataLoader(TensorDataset(X_test, y_test), batch_size=batch_size, shuffle=True)

In [112]:
def train(model,data_loader,valid_data_loader,epochs=10,criterion = nn.MSELoss):
    for e in range(epochs):
        total_train_loss = 0
        for images, labels in data_loader:
            output = model(images)
            loss = criterion(output,labels)
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            total_train_loss += loss.item()
        print(f'{e+1}Epoch - loss : {(total_train_loss/len(data_loader)):.4f}',end=' ')
        with torch.no_grad():
            total_valid_loss = 0
            for images, labels in valid_data_loader:
                output = model(images)
                loss = criterion(output,labels)
                total_valid_loss += loss.item()
            print(f'valid_loss : {(total_valid_loss/len(valid_data_loader)):.4f}')



In [113]:
# 간단한 모델 만들기
model = nn.Sequential(
    nn.Linear(8, 30),
    nn.ReLU(),
    nn.Linear(30, 1)
)
criterion = nn.MSELoss()
optimizer = optim.SGD(model.parameters(),lr=0.01)
train(model,train_loader,valid_loader,20,criterion)

1Epoch - loss : 1.0401 valid_loss : 0.6295
2Epoch - loss : 0.5450 valid_loss : 0.4899
3Epoch - loss : 0.4844 valid_loss : 0.5214
4Epoch - loss : 0.4721 valid_loss : 0.5788
5Epoch - loss : 0.4560 valid_loss : 0.7330
6Epoch - loss : 0.4417 valid_loss : 0.7047
7Epoch - loss : 0.4256 valid_loss : 0.8761
8Epoch - loss : 0.4183 valid_loss : 1.0018
9Epoch - loss : 0.4118 valid_loss : 1.1116
10Epoch - loss : 0.4073 valid_loss : 1.3104
11Epoch - loss : 0.4032 valid_loss : 1.4673
12Epoch - loss : 0.4006 valid_loss : 1.6333
13Epoch - loss : 0.4006 valid_loss : 1.8938
14Epoch - loss : 0.3979 valid_loss : 1.9609
15Epoch - loss : 0.3927 valid_loss : 2.1853
16Epoch - loss : 0.3937 valid_loss : 2.2330
17Epoch - loss : 0.3911 valid_loss : 2.4467
18Epoch - loss : 0.3823 valid_loss : 2.6208
19Epoch - loss : 0.3859 valid_loss : 2.8306
20Epoch - loss : 0.3838 valid_loss : 3.1272


## skip connection 또는 residual connection 구현
ResNet 등에서 사용되는 구조로, 이전 층의 출력을 현재 층의 입력에 더하여 정보의 유실을 방지하고, 학습의 안정성을 높이는 효과가 있습니다.

In [52]:
class Net(nn.Module):
    def __init__(self):
        super().__init__()
        self.hidden1 = nn.Linear(8, 30)
        self.hidden2 = nn.Linear(30,30)
        self.hidden3 = nn.Linear(38,1)
        self.relu = nn.ReLU()

    def forward(self, x):
        x1 = self.relu(self.hidden1(x))
        x1 = self.relu(self.hidden2(x1))
        x_concat = torch.cat((x,x1),dim=1)
        result = self.hidden3(x_concat)
        return result

    
model = Net()
criterion = nn.MSELoss()
optimizer = optim.SGD(model.parameters(),lr=0.005)
train(model,train_loader,valid_loader,20,criterion)

1Epoch - loss : 0.9443 valid_loss : 0.8979
2Epoch - loss : 0.6804 valid_loss : 1.0330
3Epoch - loss : 0.6243 valid_loss : 0.6081
4Epoch - loss : 0.6898 valid_loss : 0.5121
5Epoch - loss : 0.5706 valid_loss : 0.4810
6Epoch - loss : 0.4971 valid_loss : 0.4645
7Epoch - loss : 0.4836 valid_loss : 0.4693
8Epoch - loss : 0.4785 valid_loss : 0.4514
9Epoch - loss : 0.4692 valid_loss : 0.4463
10Epoch - loss : 0.4626 valid_loss : 0.4363
11Epoch - loss : 0.4560 valid_loss : 0.4478
12Epoch - loss : 0.4523 valid_loss : 0.4417
13Epoch - loss : 0.4470 valid_loss : 0.4443
14Epoch - loss : 0.4415 valid_loss : 0.4465
15Epoch - loss : 0.4387 valid_loss : 0.4483
16Epoch - loss : 0.4322 valid_loss : 0.4609
17Epoch - loss : 0.4271 valid_loss : 0.5014
18Epoch - loss : 0.4264 valid_loss : 0.4906
19Epoch - loss : 0.4291 valid_loss : 0.5699
20Epoch - loss : 0.4235 valid_loss : 0.5710


In [62]:
# 입력을 두개로 하는 모델 , 특성 나눈 후 넣기

class Net(nn.Module):
    def __init__(self):
        super().__init__()
        self.hidden1 = nn.Linear(5, 30)
        self.hidden2 = nn.Linear(30,30)
        self.hidden3 = nn.Linear(36,1)
        self.relu = nn.ReLU()

    def forward(self, x):
        input1 , input2 = x[:,:5] , x[:,2:]
        x1 = self.relu(self.hidden1(input1))
        x1 = self.relu(self.hidden2(x1))
        concat = torch.cat((input2,x1),dim=1)
        result = self.hidden3(concat)
        return result

    
model = Net()
criterion = nn.MSELoss()
optimizer = optim.SGD(model.parameters(),lr=0.001)
train(model,train_loader,valid_loader,20,criterion)

1Epoch - loss : 2.1284 valid_loss : 1.0338
2Epoch - loss : 0.8560 valid_loss : 0.7920
3Epoch - loss : 0.7273 valid_loss : 0.7070
4Epoch - loss : 0.6749 valid_loss : 0.6675
5Epoch - loss : 0.6427 valid_loss : 0.6415
6Epoch - loss : 0.6189 valid_loss : 0.6212
7Epoch - loss : 0.5987 valid_loss : 0.6052
8Epoch - loss : 0.5823 valid_loss : 0.5913
9Epoch - loss : 0.5684 valid_loss : 0.5802
10Epoch - loss : 0.5570 valid_loss : 0.5707
11Epoch - loss : 0.5470 valid_loss : 0.5634
12Epoch - loss : 0.5390 valid_loss : 0.5574
13Epoch - loss : 0.5321 valid_loss : 0.5517
14Epoch - loss : 0.5258 valid_loss : 0.5476
15Epoch - loss : 0.5212 valid_loss : 0.5427
16Epoch - loss : 0.5169 valid_loss : 0.5405
17Epoch - loss : 0.5131 valid_loss : 0.5372
18Epoch - loss : 0.5096 valid_loss : 0.5339
19Epoch - loss : 0.5062 valid_loss : 0.5324
20Epoch - loss : 0.5041 valid_loss : 0.5289


여러 개의 출력
- 예로들어 이미지에서 주요 물체를 분류하는 작업을 할 때 혹은 규제를 목적으로 사용됨
    - 하위 layer가 상위 layer를 그대로 출력하나? 너무 의존하나 확인가능
    - 얼굴사진으로 다중 작업 분류를 할 때
        - 한 출력은 감정 분류
        - 한 출력은 안경의 유무 확인
- 여러개의 출력을 갖고 싶을 때 사용할 수 있다.
- 이때 각 출력은 각자의 손실함수를 가지고 있어야 함!

In [66]:
# 출력이 여러개인 모델 ,입력을 두개로 하는 모델 , 특성 나눈 후 넣기

class Net(nn.Module):
    def __init__(self):
        super().__init__()
        self.hidden1 = nn.Linear(5, 30)
        self.hidden2 = nn.Linear(30,30)
        self.hidden3 = nn.Linear(36,1)
        self.relu = nn.ReLU()

    def forward(self, x):
        input1 , input2 = x[:,:5] , x[:,2:]
        x1 = self.relu(self.hidden1(input1))
        x1 = self.relu(self.hidden2(x1))
        concat = torch.cat((input2,x1),dim=1)
        result = self.hidden3(concat)
        return result , x1
    
    def train(self,data_loader,valid_data_loader,epochs=10,criterion = nn.MSELoss):
        for e in range(epochs):
            total_train_loss = 0
            for images, labels in data_loader:
                output1 , output2 = self(images)
                loss1 , loss2 = criterion(output1,labels) , criterion(output2,labels)
                loss = 0.9*loss1 + 0.1*loss2
                optimizer.zero_grad()
                loss.backward()
                optimizer.step()
                total_train_loss += loss.item()
            print(f'{e+1}Epoch - loss : {(total_train_loss/len(data_loader)):.4f}',end=' ')
            with torch.no_grad():
                total_valid_loss = 0
                for images, labels in valid_data_loader:
                    output1 , output2 = self(images)
                    loss1 , loss2 = criterion(output1,labels) , criterion(output2,labels)
                    loss = 0.9*loss1 + 0.1*loss2
                    total_valid_loss += loss.item()
                print(f'valid_loss : {(total_valid_loss/len(valid_data_loader)):.4f}')
    
model = Net()
criterion = nn.MSELoss()
optimizer = optim.SGD(model.parameters(),lr=0.01)
model.train(train_loader,valid_loader,20,criterion)

1Epoch - loss : 1.2815 valid_loss : 1.0629
2Epoch - loss : 1.0725 valid_loss : 0.9390
3Epoch - loss : 0.8470 valid_loss : 0.9446
4Epoch - loss : 0.7969 valid_loss : 0.8212
5Epoch - loss : 0.7372 valid_loss : 0.7868
6Epoch - loss : 0.7675 valid_loss : 0.7664
7Epoch - loss : 0.6980 valid_loss : 0.6973
8Epoch - loss : 0.6572 valid_loss : 0.7084
9Epoch - loss : 0.6712 valid_loss : 0.6895
10Epoch - loss : 0.6781 valid_loss : 0.7365
11Epoch - loss : 0.7641 valid_loss : 0.6171
12Epoch - loss : 0.7358 valid_loss : 3.8050
13Epoch - loss : 0.6623 valid_loss : 0.6396
14Epoch - loss : 0.9069 valid_loss : 0.6205
15Epoch - loss : 0.6496 valid_loss : 0.6980
16Epoch - loss : 1.0644 valid_loss : 0.6257
17Epoch - loss : 0.6230 valid_loss : 0.6101
18Epoch - loss : 0.6501 valid_loss : 0.6124
19Epoch - loss : 0.6290 valid_loss : 0.7340
20Epoch - loss : 0.6491 valid_loss : 1.2392


# 모델 저장 & 로드

https://pytorch.org/tutorials/beginner/saving_loading_models.html#what-is-a-state-dict
1. 스크립트를 사용하면 모델을 고대로 저장하고 불러올 수 있다.
2. 모델의 가중치만 저장하고 불러올 땐 모델을 정의하고 가중치만 불러올 수 있다.
3. 모델의 에포크 등등 하이퍼 파라미터를 같이 저장 할 수 있다.
    - Saving Multiple Models in One File

In [90]:
# 1. 스크립트를 사용하여 모델 저장
model_scripted = torch.jit.script(model) # Export to TorchScript
model_scripted.save('model_scripted.pt') # Save

In [87]:
# 새로운 모델로 불러오기
model_new = torch.jit.load('model_scripted.pt')
model_new.eval()

RecursiveScriptModule(
  original_name=Net
  (hidden1): RecursiveScriptModule(original_name=Linear)
  (hidden2): RecursiveScriptModule(original_name=Linear)
  (hidden3): RecursiveScriptModule(original_name=Linear)
  (relu): RecursiveScriptModule(original_name=ReLU)
)

In [88]:
# 2. 가중치를 저장 (불러올 때 모델 class를 정의해야 함)
torch.save(model.state_dict(), 'weight.pt')
class Net(nn.Module):
    def __init__(self):
        super().__init__()
        self.hidden1 = nn.Linear(5, 30)
        self.hidden2 = nn.Linear(30,30)
        self.hidden3 = nn.Linear(36,1)
        self.relu = nn.ReLU()

    def forward(self, x):
        input1 , input2 = x[:,:5] , x[:,2:]
        x1 = self.relu(self.hidden1(input1))
        x1 = self.relu(self.hidden2(x1))
        concat = torch.cat((input2,x1),dim=1)
        result = self.hidden3(concat)
        return result , x1
model = Net()
model.load_state_dict(torch.load('weight.pt'))
model.eval()

Net(
  (hidden1): Linear(in_features=5, out_features=30, bias=True)
  (hidden2): Linear(in_features=30, out_features=30, bias=True)
  (hidden3): Linear(in_features=36, out_features=1, bias=True)
  (relu): ReLU()
)

In [98]:
# 3.Saving Multiple Models in One File
# 여러 가지 값 저장, 학습 중 진행 상황 저장을 위해 epoch, loss 값 등 일반 scalar값 저장 가능

torch.save({
    'model': model.state_dict(),
    'optimizer': optimizer.state_dict()
}, 'all.tar') 
checkpoint = torch.load('all.tar')   # dict 불러오기

class Net(nn.Module):
    def __init__(self):
        super().__init__()
        self.hidden1 = nn.Linear(5, 30)
        self.hidden2 = nn.Linear(30,30)
        self.hidden3 = nn.Linear(36,1)
        self.relu = nn.ReLU()

    def forward(self, x):
        input1 , input2 = x[:,:5] , x[:,2:]
        x1 = self.relu(self.hidden1(input1))
        x1 = self.relu(self.hidden2(x1))
        concat = torch.cat((input2,x1),dim=1)
        result = self.hidden3(concat)
        return result , x1
model_new = Net()

model_new.load_state_dict(checkpoint['model'])
optimizer = optim.SGD(model.parameters(),lr=0.001)
optimizer.load_state_dict(checkpoint['optimizer'])

# 콜백

In [115]:
# 최상의 모델을 저장 checkpoint 구현
# 조기 종료

model = nn.Sequential(
    nn.Linear(8, 30),
    nn.ReLU(),
    nn.Linear(30, 1)
)
criterion = nn.MSELoss()
optimizer = optim.SGD(model.parameters(),lr=0.005)
def train(model,data_loader,valid_data_loader,epochs=10,patience=10,criterion = nn.MSELoss):
    early_stop_cnt , pre_loss = 0 , float('inf')
    for e in range(epochs):
        total_train_loss = 0
        for images, labels in data_loader:
            output = model(images)
            loss = criterion(output,labels)
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            total_train_loss += loss.item()
        train_loss = total_train_loss/len(data_loader)
        print(f'{e+1}Epoch - loss : {train_loss:.4f}',end=' ')
        with torch.no_grad():
            total_valid_loss = 0
            for images, labels in valid_data_loader:
                output = model(images)
                loss = criterion(output,labels)
                total_valid_loss += loss.item()
            valid_loss = total_valid_loss/len(valid_data_loader)
            print(f'valid_loss : {valid_loss:.4f}')
            try:
                ck_loss = torch.load('ck.pt')['loss'] 
                if valid_loss < ck_loss:
                    print('save ck')
                    torch.save({
                        'epoch' : e,
                        'loss' : valid_loss,
                        'model_state_dict': model.state_dict(),
                    },'ck.pt')
            except:
                torch.save({
                        'epoch' : e,
                        'loss' : valid_loss,
                        'model_state_dict': model.state_dict(),
                    },'ck.pt')
            if train_loss - pre_loss < 1e-3:
                early_stop_cnt += 1
                if early_stop_cnt == patience:
                    torch.save({
                        'epoch' : e,
                        'loss' : valid_loss,
                        'model_state_dict': model.state_dict(),
                    },'ck.pt')
                    print("####Early stop####")
                    return 0
            pre_loss = train_loss

train(model,train_loader,valid_loader,epochs=50,patience=10,criterion=criterion)


1Epoch - loss : 1.0764 valid_loss : 0.5549
2Epoch - loss : 0.5853 valid_loss : 0.4941
3Epoch - loss : 0.5370 valid_loss : 0.4672
save ck
4Epoch - loss : 0.5053 valid_loss : 0.4514
save ck
5Epoch - loss : 0.4912 valid_loss : 0.4854
6Epoch - loss : 0.4878 valid_loss : 0.4717
7Epoch - loss : 0.4755 valid_loss : 0.4716
8Epoch - loss : 0.4650 valid_loss : 0.5141
9Epoch - loss : 0.4577 valid_loss : 0.5240
10Epoch - loss : 0.4521 valid_loss : 0.5604
####Early stop####


0

In [119]:
checkpoint = torch.load('ck.pt')
checkpoint['epoch'] , checkpoint['loss']

(9, 0.5603854461642336)

# 토치에서 텐서보드 활용하기
- 기록은 디폴트로 .runs/에 저장됨
- log_dir로 지정 가능

In [130]:
from torch.utils.tensorboard import SummaryWriter
import os
root_logdir = os.path.join(os.curdir,"my_logs")
def get_run_logdir():
    import time
    run_id = time.strftime('run_%Y_%m_%d-%H_%M_%S')
    return os.path.join(root_logdir, run_id)
run_logdir = get_run_logdir()
writer = SummaryWriter(run_logdir)

model = nn.Sequential(
    nn.Linear(8, 30),
    nn.ReLU(),
    nn.Linear(30, 1)
)

def train(model,data_loader,valid_data_loader,epochs=10,criterion = nn.MSELoss):
    for e in range(epochs):
        total_train_loss = 0
        for images, labels in data_loader:
            output = model(images)
            loss = criterion(output,labels)
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            total_train_loss += loss.item()
        print(f'{e+1}Epoch - loss : {(total_train_loss/len(data_loader)):.4f}',end=' ')
        with torch.no_grad():
            total_valid_loss = 0
            for images, labels in valid_data_loader:
                output = model(images)
                loss = criterion(output,labels)
                total_valid_loss += loss.item()
            print(f'valid_loss : {(total_valid_loss/len(valid_data_loader)):.4f}')
        # 로그 저장
        writer.add_scalar("Loss/train", (total_train_loss/len(data_loader)), e)
    writer.close()

criterion = nn.MSELoss()
optimizer = optim.SGD(model.parameters(),lr=0.01)
train(model,train_loader,valid_loader,20,criterion)


1Epoch - loss : 0.7962 valid_loss : 0.7518
2Epoch - loss : 0.5816 valid_loss : 0.4435
3Epoch - loss : 0.4953 valid_loss : 0.4918
4Epoch - loss : 0.4648 valid_loss : 0.5159
5Epoch - loss : 0.4443 valid_loss : 0.5603
6Epoch - loss : 0.4379 valid_loss : 0.6495
7Epoch - loss : 0.4292 valid_loss : 0.7422
8Epoch - loss : 0.4230 valid_loss : 0.8221
9Epoch - loss : 0.4144 valid_loss : 0.9591
10Epoch - loss : 0.4088 valid_loss : 1.0896
11Epoch - loss : 0.4035 valid_loss : 1.2331
12Epoch - loss : 0.3976 valid_loss : 1.3666
13Epoch - loss : 0.3942 valid_loss : 1.6192
14Epoch - loss : 0.3928 valid_loss : 1.6868
15Epoch - loss : 0.3886 valid_loss : 1.9327
16Epoch - loss : 0.3907 valid_loss : 2.1442
17Epoch - loss : 0.3821 valid_loss : 2.3347
18Epoch - loss : 0.3805 valid_loss : 2.7081
19Epoch - loss : 0.3800 valid_loss : 2.6961
20Epoch - loss : 0.3791 valid_loss : 2.8549


In [131]:
%load_ext tensorboard
%tensorboard --logdir=./my_logs --port=6006