In [2]:
import argparse
import datetime
import json
import numpy as np
import os
import time
from pathlib import Path

import torch
import torch.backends.cudnn as cudnn
from torch.utils.tensorboard import SummaryWriter

import timm

assert timm.__version__ == "0.3.2" # 버전 체크 - timm 0.3.2 버전 확인
from timm.models.layers import trunc_normal_
from timm.data.mixup import Mixup
from timm.loss import LabelSmoothingCrossEntropy, SoftTargetCrossEntropy

# 커스텀 유틸리티 모듈들 임포트
import util.lr_decay as lrd  # 학습률 스케줄링
import util.misc as misc  # 기타 유틸리티 함수들
from util.datasets import build_dataset  # 데이터셋 생성
from util.pos_embed import interpolate_pos_embed  # 위치 임베딩 보간
from util.misc import NativeScalerWithGradNormCount as NativeScaler

import models_vit  # Vision Transformer 모델

from engine_finetune import train_one_epoch, evaluate  # 훈련 및 평가 엔진


In [4]:
from timm.data.constants import IMAGENET_DEFAULT_MEAN, IMAGENET_DEFAULT_STD
from torchvision import datasets, transforms
import PIL
def build_transform(is_train):
    input_size = 224
    mean = IMAGENET_DEFAULT_MEAN
    std = IMAGENET_DEFAULT_STD
    # train transform

    # eval transform
    t = []
    if input_size <= 224:
        crop_pct = 224 / 256
    else:
        crop_pct = 1.0
    size = int(input_size / crop_pct)
    t.append(
        transforms.Resize(size, interpolation=PIL.Image.BICUBIC),  # to maintain same ratio w.r.t. 224 images
    )
    t.append(transforms.CenterCrop(input_size))

    t.append(transforms.ToTensor())
    t.append(transforms.Normalize(mean, std))
    return transforms.Compose(t)


def build_dataset(is_train):
    transform = build_transform(is_train)
    data_path = "/data/docparser_mh/temp/data/ImageNet"
    root = os.path.join(data_path, 'train' if is_train else 'val')
    dataset = datasets.ImageFolder(root, transform=transform)

    print(dataset)

    return dataset

In [None]:
device = torch.device("cuda")
FineTune = "/data/docparser_mh/temp/mae_temp/output_dir/checkpoint-1.pth"
# 재현성을 위한 시드 고정
seed = 0
torch.manual_seed(seed)
np.random.seed(seed)

cudnn.benchmark = True  # CuDNN 성능 최적화

# 훈련 및 검증 데이터셋 구성
# dataset_train = build_dataset(is_train=True, args=args)
dataset_val = build_dataset(is_train=False)

# 분산 훈련을 위한 데이터 샘플러 설정

sampler_val = torch.utils.data.SequentialSampler(dataset_val)

# 텐서보드 로거 설정
log_writer = None

# 데이터 로더 생성
data_loader_val = torch.utils.data.DataLoader(
    dataset_val, sampler=sampler_val,
    batch_size=4,
    num_workers=0,
    pin_memory=False,
    drop_last=False  # 검증 시에는 모든 샘플 사용
)

# Vision Transformer 모델 생성
model = models_vit.__dict__["vit_base_patch16"](
    num_classes=1000,
    drop_path_rate=0.1,
    global_pool=True,
    mask_ratio=0.2,
)

Dataset ImageFolder
    Number of datapoints: 50000
    Root location: /data/docparser_mh/temp/data/ImageNet/val
    StandardTransform
Transform: Compose(
               Resize(size=256, interpolation=bicubic, max_size=None, antialias=True)
               CenterCrop(size=(224, 224))
               ToTensor()
               Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225))
           )


NameError: name 'args' is not defined

In [None]:
checkpoint = torch.load(FineTune, map_location='cpu')
checkpoint_model = checkpoint['model']
state_dict = model.state_dict()

model.to(device)