# CH01.5. **Model Checkpoint**

## 00. **작업 환경 설정**

#### 00.0. **사전 변수 설정**

In [1]:
SEED_NUM = 2025
MODEL_PTH = '../../model/bostonReg.pt'

#### 00.1. **라이브러리 호출 및 옵션 설정**

In [2]:
#(1) Import libraries
import os
import random
import numpy as np
import torch

#(2) Set options
os.environ['PYTHONHASHSEED'] = str(SEED_NUM)
random.seed(a=SEED_NUM)
np.random.seed(seed=SEED_NUM)
torch.use_deterministic_algorithms(mode=True)
torch.manual_seed(seed=SEED_NUM)
torch.mps.manual_seed(seed=SEED_NUM)
torch.set_printoptions(precision=10, sci_mode=False)

#### 00.2. **사용자정의함수 정의**

In [3]:
pass

#### 00.3. **클래스 정의**

In [4]:
class MyRegressionModel(torch.nn.Module) :
    def __init__(self) :
        super().__init__()
        dims = [13, 128, 64, 32]
        self.layers = torch.nn.ModuleDict()
        for i in range(len(dims)-1) :
            fc_block = torch.nn.ModuleList(modules=[
                torch.nn.Linear(in_features=dims[i], out_features=dims[i+1]),
                torch.nn.BatchNorm1d(num_features=dims[i+1]),
                torch.nn.ReLU(),
                torch.nn.Dropout(p=0.2)
            ])
            self.layers[f'fc_{i}'] = fc_block
        layer = torch.nn.Linear(in_features=dims[-1], out_features=1)
        self.layers[f'fc_{i+1}'] = layer
    def forward(self, x:torch.Tensor) :
        _layers = list(self.layers.values())
        for fc_block in _layers[:-1]:
            for layer in fc_block:
                x = layer(x)
        x = _layers[-1](x)
        return x

<b></b>

## 01. **모델 체크포인트(Model Checkpoint)**

#### 01.1. **정의** : 학습 중간의 결과물(손실, 에포크, 버퍼 등)을 저장한 파일

#### **(`PLUS`)** 버퍼(Buffer) : 옵티마이저가 업데이트하지 않는, 학습 대상이 아닌 모델의 상태 정보

#### 01.2. **특징** : 
##### $ \hspace{0.15cm} $ ① 체크포인트를 통해 학습이 중단된 지점부터 이어서 학습 가능
##### $ \hspace{0.15cm} $ ② 여러 버전의 모델 파라미터를 보존함으로써 실험 결과 비교 용이
##### $ \hspace{0.15cm} $ ③ 성능이 좋았던 특정 에포크의 모델을 저장해 바로 추론에 사용 가능

<b></b>

## 02. **모델 학습 정보 저장 및 로드**

#### 02.1. **모델 정의**

In [5]:
#(1) Define `model`
model = MyRegressionModel().to(dtype=torch.float32)

#(2) Display `model`
model

MyRegressionModel(
  (layers): ModuleDict(
    (fc_0): ModuleList(
      (0): Linear(in_features=13, out_features=128, bias=True)
      (1): BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (2): ReLU()
      (3): Dropout(p=0.2, inplace=False)
    )
    (fc_1): ModuleList(
      (0): Linear(in_features=128, out_features=64, bias=True)
      (1): BatchNorm1d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (2): ReLU()
      (3): Dropout(p=0.2, inplace=False)
    )
    (fc_2): ModuleList(
      (0): Linear(in_features=64, out_features=32, bias=True)
      (1): BatchNorm1d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (2): ReLU()
      (3): Dropout(p=0.2, inplace=False)
    )
    (fc_3): Linear(in_features=32, out_features=1, bias=True)
  )
)

In [6]:
#(3) Define loss function
# criterion = torch.nn.MSELoss()

#(4) Define optimizer(optimization method)
optimizer = torch.optim.Adam(params=model.parameters(), lr=1e-2, weight_decay=1e-7)

In [7]:
#(6) Print parameter of `model`
for name, params in model.named_parameters() :
    print(f'>> Layer Name : {name}')
    print(f'  - Weight Values : \n{params.data}\n')

>> Layer Name : layers.fc_0.0.weight
  - Weight Values : 
tensor([[ 0.1026033014,  0.2415883541, -0.1164668947,  ...,
          0.2466686368, -0.0126117039, -0.1186499894],
        [-0.0617195442, -0.2163682431, -0.0773328468,  ...,
          0.2061441392,  0.0997358114, -0.1133674309],
        [ 0.2669299245,  0.2669528425,  0.1729592532,  ...,
          0.1827413291, -0.0905306190,  0.0783521682],
        ...,
        [ 0.1072650105, -0.0613389276,  0.0277455635,  ...,
          0.0013323941,  0.0551764369,  0.0749951825],
        [ 0.2212283313,  0.0268744938,  0.1415645927,  ...,
          0.1468064636,  0.2351855636, -0.0614205934],
        [-0.0549327321,  0.0075067226, -0.2231051773,  ...,
         -0.2565746903,  0.1734197289, -0.0734199062]])

>> Layer Name : layers.fc_0.0.bias
  - Weight Values : 
tensor([     0.0557192601,     -0.2066111118,     -0.2100090384,
             0.0611609519,     -0.0960554630,     -0.2267509252,
            -0.2545593083,     -0.0260495450,      

#### 02.2. **모델 학습 중 정보 저장** : `torch.save()`

In [8]:
torch.save(
    obj={
        'epoch'     : 0,
        'loss_hist' : 0,
        'model'     : model.state_dict(),
        'optimizer' : optimizer.state_dict() # and so on ..
    }, 
    f='../../model/TempModelState.pt'
)

#### **(`PLUS`)** `.pt` 파일은 PyTorch에서 모델의 상태(가중치, 옵티마이저 상태 등)를 저장하거나 불러올 때 사용하는 딕셔너리 구조임

#### 02.3. **학습된 모델 로드** : `torch.save()`

In [None]:
#(1) load checkpoint
checkpoint = torch.load(f=MODEL_PTH)
model.load_state_dict(state_dict=checkpoint['model'])
print(f'>> Epoch : {checkpoint["epoch"]}, Loss : {checkpoint["loss_hist"][-1]}')

>> Epoch : 74, Loss : 0.00872905362242212


In [None]:
#(2) Print parameter of `model`
for name, params in model.named_parameters() :
    print(f'>> Layer Name : {name}')
    print(f'  - Weight Values : \n{params.data}\n')

>> Layer Name : layers.fc_0.0.weight
  - Weight Values : 
tensor([[ 0.0511536188,  0.1724647284,  0.0390866548,  ...,
          0.3271007240, -0.0750861168, -0.1463790983],
        [-0.0873598754, -0.0848669708, -0.1629631966,  ...,
          0.1386610568,  0.1623478234, -0.2532407045],
        [ 0.2504421771,  0.2563405335,  0.2271374911,  ...,
          0.0106048146, -0.1349478066,  0.1465401053],
        ...,
        [ 0.2511723638, -0.1656336635, -0.0859760568,  ...,
         -0.0594360754,  0.0097782798,  0.2271948904],
        [ 0.1708459854, -0.0853803605,  0.0781905949,  ...,
          0.1647127867,  0.2737581134, -0.0048183710],
        [-0.1149299666,  0.1478869766, -0.3266734481,  ...,
         -0.1805072129,  0.2110962719, -0.2810185850]])

>> Layer Name : layers.fc_0.0.bias
  - Weight Values : 
tensor([    -0.0000456529,      0.0000225843,     -0.0000068984,
            -0.0002211666,     -0.0000016902,     -0.0001654439,
             0.0000675317,     -0.0007127478,      