# CH01.5. **Model Checkpoint**

## 00. **작업 환경 설정**

#### 00.0. **사전 변수 설정**

In [1]:
SEED_NUM = 2025
MODEL_PTH = '../../model/bostonReg.pt'

#### 00.1. **라이브러리 호출 및 옵션 설정**

In [2]:
#(1) Import libraries
import os
import random
import numpy as np
import torch

#(2) Set up options
os.environ['PYTHONHASHSEED'] = str(SEED_NUM)
random.seed(a=SEED_NUM)
np.random.seed(seed=SEED_NUM)
torch.use_deterministic_algorithms(mode=True)
torch.manual_seed(seed=SEED_NUM)
torch.set_printoptions(precision=10, sci_mode=False)

#### 00.2. **사용자정의함수 정의**

In [3]:
pass

#### 00.3. **클래스 정의**

In [4]:
class MyRegressionModel(torch.nn.Module) :
    def __init__(self, input_shape:tuple, hidden_dims:list, device:torch.device) :
        super().__init__()
        self.device = device
        dims = [input_shape[0]] + hidden_dims
        for i in range(len(hidden_dims)):
            self.add_module(
                name=f'fc_block_{i}',
                module=torch.nn.Sequential(
                    torch.nn.Linear(in_features=dims[i], out_features=dims[i+1]),
                    torch.nn.BatchNorm1d(num_features=dims[i+1]),
                    torch.nn.ReLU(),
                    torch.nn.Dropout(p=0.2)
                )
            )
        self.add_module(
            name=f'fc_block_{i+1}', 
            module=torch.nn.Sequential(
                torch.nn.Linear(in_features=dims[-1], out_features=input_shape[1])
            )
        )
        self.to(device=device)
    def forward(self, x:torch.Tensor) -> torch.Tensor :
        x = x.to(device=self.device)
        for block in self._modules.values():
            x = block(x)
        return x

<b></b>

## 01. **모델 체크포인트(Model Checkpoint)**

#### 01.1. **정의** : 학습 중간의 결과물(손실, 에포크, 버퍼 등)을 저장한 파일

#### **(`PLUS`)** 버퍼(Buffer) : 옵티마이저가 업데이트하지 않는, 학습 대상이 아닌 모델의 상태 정보

#### 01.2. **특징** : 
##### $ \hspace{0.15cm} $ ① 체크포인트를 통해 학습이 중단된 지점부터 이어서 학습 가능
##### $ \hspace{0.15cm} $ ② 여러 버전의 모델 파라미터를 보존함으로써 실험 결과 비교 용이
##### $ \hspace{0.15cm} $ ③ 성능이 좋았던 특정 에포크의 모델을 저장해 바로 추론에 사용 가능

<b></b>

## 02. **모델 학습 정보 저장 및 로드**

#### 02.1. **모델 정의**

In [5]:
#(1) Define `model`
model = MyRegressionModel(input_shape=(13, 1), hidden_dims=[128, 64, 32], device='mps')

#(2) Display `model`
model

MyRegressionModel(
  (fc_block_0): Sequential(
    (0): Linear(in_features=13, out_features=128, bias=True)
    (1): BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU()
    (3): Dropout(p=0.2, inplace=False)
  )
  (fc_block_1): Sequential(
    (0): Linear(in_features=128, out_features=64, bias=True)
    (1): BatchNorm1d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU()
    (3): Dropout(p=0.2, inplace=False)
  )
  (fc_block_2): Sequential(
    (0): Linear(in_features=64, out_features=32, bias=True)
    (1): BatchNorm1d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU()
    (3): Dropout(p=0.2, inplace=False)
  )
  (fc_block_3): Sequential(
    (0): Linear(in_features=32, out_features=1, bias=True)
  )
)

In [6]:
#(3) Define optimizer(optimization method)
optimizer = torch.optim.Adam(params=model.parameters(), lr=1e-3, weight_decay=1e-7)

In [None]:
#(4) Print parameter of `model`
for name, params in model.named_parameters() :
    print(f'>> Layer Name : {name}')
    print(f'  - Weight Values : \n{params.data}\n')

>> Layer Name : fc_block_0.0.weight
  - Weight Values : 
tensor([[ 0.1026033014,  0.2415883541, -0.1164668947,  ...,
          0.2466686368, -0.0126117039, -0.1186499894],
        [-0.0617195442, -0.2163682431, -0.0773328468,  ...,
          0.2061441392,  0.0997358114, -0.1133674309],
        [ 0.2669299245,  0.2669528425,  0.1729592532,  ...,
          0.1827413291, -0.0905306190,  0.0783521682],
        ...,
        [ 0.1072650105, -0.0613389276,  0.0277455635,  ...,
          0.0013323941,  0.0551764369,  0.0749951825],
        [ 0.2212283313,  0.0268744938,  0.1415645927,  ...,
          0.1468064636,  0.2351855636, -0.0614205934],
        [-0.0549327321,  0.0075067226, -0.2231051773,  ...,
         -0.2565746903,  0.1734197289, -0.0734199062]], device='mps:0')

>> Layer Name : fc_block_0.0.bias
  - Weight Values : 
tensor([     0.0557192601,     -0.2066111118,     -0.2100090384,
             0.0611609519,     -0.0960554630,     -0.2267509252,
            -0.2545593083,     -0.026

#### 02.2. **모델 학습 중 정보 저장** : `torch.save()`

In [8]:
torch.save(
    obj={
        'epoch'     : 0,
        'loss_hist' : 0,
        'model'     : model.state_dict(),
        'optimizer' : optimizer.state_dict() # and so on ..
    }, 
    f='../../model/TempModelState.pt'
)

#### **(`PLUS`)** `.pt` 파일은 PyTorch에서 모델의 상태(가중치, 옵티마이저 상태 등)를 저장하거나 불러올 때 사용하는 딕셔너리 구조임

#### 02.3. **학습된 모델 로드** : `torch.save()`

In [9]:
#(1) load checkpoint
checkpoint = torch.load(f=MODEL_PTH)
model.load_state_dict(state_dict=checkpoint['model'])
print(f'>> Best Epoch : {checkpoint["epoch"]}, Best Loss : {checkpoint["loss_hist"][-1]}')

>> Best Epoch : 966, Best Loss : 0.005921941153549899


In [10]:
#(2) Print parameter of `model`
for name, params in model.named_parameters() :
    print(f'>> Layer Name : {name}')
    print(f'  - Weight Values : \n{params.data}\n')

>> Layer Name : fc_block_0.0.weight
  - Weight Values : 
tensor([[ 0.1330938488,  0.1676050425, -0.1022319421,  ...,
          0.2323826253,  0.0455545112, -0.1297184825],
        [ 0.0575231835, -0.0763843879,  0.0214604586,  ...,
          0.1022378132,  0.0815754011, -0.3119116426],
        [ 0.4090193212,  0.3140781522,  0.0580098256,  ...,
          0.2155340016, -0.0310003590, -0.0273446627],
        ...,
        [ 0.1333041489, -0.0059883799,  0.0947068930,  ...,
         -0.2438832819,  0.1574664563,  0.0893128067],
        [ 0.3040444553,  0.1094021127,  0.1627599150,  ...,
          0.2186575085,  0.0246630609, -0.0052742832],
        [ 0.0150746563,  0.0112289274, -0.2422903627,  ...,
         -0.2414046675,  0.1598390341, -0.1794553250]], device='mps:0')

>> Layer Name : fc_block_0.0.bias
  - Weight Values : 
tensor([     0.0016604043,      0.0015135173,      0.0019560163,
            -0.0001374914,      0.0045228167,      0.0109997429,
             0.0074160034,     -0.044