# C10でFine-tuningしたViTモデルのフォワードパスを実行する

## ライブラリのインポート
transoformersに関しては，.vscode/setting.jsonのextrapathにパスを記載したらうまくインポートできた．

In [6]:
import os, sys, math
sys.path.append("../src")
import numpy as np
import torch
from datasets import load_dataset, load_metric
from transformers import DefaultDataCollator, ViTForImageClassification, TrainingArguments, Trainer
from utils.helper import get_device
from utils.vit_util import processor, transforms, compute_metrics

## 初期設定

In [2]:
# デバイス (cuda, or cpu) の取得
device = get_device()
# datasetをロード (初回の読み込みだけやや時間かかる)
cifar10 = load_dataset("cifar10")
# 読み込まれた時にリアルタイムで前処理を適用するようにする
cifar10_preprocessed = cifar10.with_transform(transforms)
# バッチごとの処理のためのdata_collator
data_collator = DefaultDataCollator()
# ラベルを示す文字列のlist
labels = cifar10_preprocessed["train"].features["label"].names
# pretrained modelのロード
pretrained_dir = "/src/src/out_vit_c10"
model = ViTForImageClassification.from_pretrained(pretrained_dir).to(device)
model.eval()
# 学習時の設定をロード
training_args = torch.load(os.path.join(pretrained_dir, "training_args.bin"))
# Trainerオブジェクトの作成
trainer = Trainer(
    model=model,
    args=training_args,
    data_collator=data_collator,
    compute_metrics=compute_metrics,
    train_dataset=cifar10_preprocessed["train"],
    eval_dataset=cifar10_preprocessed["test"],
    tokenizer=processor,
)

Device: cuda


Found cached dataset parquet (/root/.cache/huggingface/datasets/parquet/plain_text-d4c080360fb556b0/0.0.0/14a00e99c0d15a23649d0db8944380ac81082d4b021f398733dd84f3a6c569a7)


  0%|          | 0/2 [00:00<?, ?it/s]

## 推論の実行

In [3]:
# データセットのサイズとバッチサイズからイテレーション回数を計算
training_args_dict = training_args.to_dict()
train_batch_size = training_args_dict["per_device_train_batch_size"]
eval_batch_size = training_args_dict["per_device_eval_batch_size"]
train_iter = math.ceil(len(cifar10_preprocessed["train"]) / train_batch_size)
eval_iter = math.ceil(len(cifar10_preprocessed["test"]) / eval_batch_size)

# 訓練・テストデータに対する推論の実行
print(f"predict training data... #iter = {train_iter} ({len(cifar10_preprocessed['train'])} samples / {train_batch_size} batches)")
train_pred = trainer.predict(cifar10_preprocessed["train"])
print(f"predict evaluation data... #iter = {eval_iter} ({len(cifar10_preprocessed['test'])} samples / {eval_batch_size} batches)")
test_pred = trainer.predict(cifar10_preprocessed["test"])

predict training data... #iter = 1563 (50000 samples / 32 batches)


predict evaluation data... #iter = 313 (10000 samples / 32 batches)


## 推論結果の確認

In [4]:
train_pred.metrics

{'test_loss': 0.005070374347269535,
 'test_accuracy': {'accuracy': 0.99916},
 'test_f1': {'f1': 0.9991599698695797},
 'test_runtime': 2008.7579,
 'test_samples_per_second': 24.891,
 'test_steps_per_second': 0.778}

In [5]:
test_pred.metrics

{'test_loss': 0.05960104987025261,
 'test_accuracy': {'accuracy': 0.9829},
 'test_f1': {'f1': 0.9829188809430107},
 'test_runtime': 397.1701,
 'test_samples_per_second': 25.178,
 'test_steps_per_second': 0.788}