# C10でFine-tuningしたViTモデルのフォワードパスを実行し，C10を予測する

## ライブラリのインポート
transoformersに関しては，.vscode/setting.jsonのextrapathにパスを記載したらうまくインポートできた．

In [1]:
import os, sys, math
sys.path.append("../src")
import numpy as np
import torch
from datasets import load_from_disk
from transformers import DefaultDataCollator, ViTForImageClassification, Trainer
from utils.helper import get_device
from utils.vit_util import processor, transforms, compute_metrics
from utils.constant import ViTExperiment

2024-04-25 17:07:30.567072: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2024-04-25 17:07:31.626156: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory
2024-04-25 17:07:31.626279: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory


## 初期設定

In [2]:
# デバイス (cuda, or cpu) の取得
device = get_device()
# datasetをロード (初回の読み込みだけやや時間かかる)
dataset_dir = ViTExperiment.DATASET_DIR
cifar10 = load_from_disk(os.path.join(dataset_dir, "c10"))
# 読み込まれた時にリアルタイムで前処理を適用するようにする
cifar10_preprocessed = cifar10.with_transform(transforms)
# バッチごとの処理のためのdata_collator
data_collator = DefaultDataCollator()
# ラベルを示す文字列のlist
labels = cifar10_preprocessed["train"].features["label"].names
# pretrained modelのロード
pretrained_dir = ViTExperiment.OUTPUT_DIR
model = ViTForImageClassification.from_pretrained(pretrained_dir).to(device)
model.eval()
# 学習時の設定をロード
training_args = torch.load(os.path.join(pretrained_dir, "training_args.bin"))
# Trainerオブジェクトの作成
trainer = Trainer(
    model=model,
    args=training_args,
    data_collator=data_collator,
    compute_metrics=compute_metrics,
    train_dataset=cifar10_preprocessed["train"],
    eval_dataset=cifar10_preprocessed["test"],
    tokenizer=processor,
)

Device: cuda


## 推論の実行

In [3]:
# cifar10_preprocessedのうち10件だけランダムに抜き出したデータセットに対して，trainer.predictを実行する
predictions = trainer.predict(cifar10_preprocessed["test"].select(range(33)))

self.label_names=['labels']
dict_keys(['pixel_values', 'labels'])
True


self.label_names=['labels']
dict_keys(['pixel_values', 'labels'])
True


In [4]:
predictions

PredictionOutput(predictions=array([[-0.9750511 , -1.3120453 , -0.68255013,  7.2387214 , -1.4510149 ,
         0.6184644 , -1.412275  , -1.4985118 , -1.291169  , -1.5076542 ],
       [ 0.27925816, -0.02822503, -0.8741759 , -1.0265126 , -1.1006895 ,
        -0.99763364, -0.7431901 , -1.2049388 ,  8.397488  , -0.7810269 ],
       [ 0.9514833 ,  0.42746207, -0.97584736, -1.1565424 , -1.1435704 ,
        -1.1687392 , -0.87914205, -1.0246634 ,  7.980294  , -1.0627933 ],
       [ 7.6908236 , -1.6353428 , -0.5374553 , -0.46603408, -1.3144487 ,
        -1.1595294 , -1.0332788 , -1.0855677 ,  0.0606713 , -1.0656741 ],
       [-0.8904549 , -0.88948923, -0.5693022 , -0.78478754, -0.45565078,
        -1.289307  ,  8.26966   , -1.0143343 , -0.889244  , -0.6667624 ],
       [-0.962662  , -1.1077971 , -1.0339864 , -0.16285956, -0.46044672,
        -0.66822964,  8.15542   , -1.0189737 , -1.2799773 , -0.91285896],
       [-1.2852596 ,  8.031684  , -1.1237736 , -0.551937  , -0.8456878 ,
        -0.73492

In [6]:
predictions.predictions[1].shape

(33, 10)

In [3]:
# データセットのサイズとバッチサイズからイテレーション回数を計算
training_args_dict = training_args.to_dict()
train_batch_size = training_args_dict["per_device_train_batch_size"]
eval_batch_size = training_args_dict["per_device_eval_batch_size"]
train_iter = math.ceil(len(cifar10_preprocessed["train"]) / train_batch_size)
eval_iter = math.ceil(len(cifar10_preprocessed["test"]) / eval_batch_size)

# 訓練・テストデータに対する推論の実行
print(f"predict training data... #iter = {train_iter} ({len(cifar10_preprocessed['train'])} samples / {train_batch_size} batches)")
train_pred = trainer.predict(cifar10_preprocessed["train"])
print(f"predict evaluation data... #iter = {eval_iter} ({len(cifar10_preprocessed['test'])} samples / {eval_batch_size} batches)")
test_pred = trainer.predict(cifar10_preprocessed["test"])

predict training data... #iter = 1563 (50000 samples / 32 batches)


predict evaluation data... #iter = 313 (10000 samples / 32 batches)


In [4]:
train_pred

PredictionOutput(predictions=(array([0.00301972, 0.00218944, 0.00168165, ..., 0.00236561, 0.00414773,
       0.00364854], dtype=float32), array([[ 7.4847946 , -0.9837806 , -1.219498  , ..., -1.3068287 ,
         1.6573851 , -0.6881158 ],
       [-1.0095965 , -0.5473271 , -0.9478028 , ..., -1.1993959 ,
        -0.73019946, -0.61273026],
       [ 7.719038  , -1.345912  , -1.0711552 , ..., -0.7733431 ,
        -0.23427938, -0.9757286 ],
       ...,
       [-1.2854414 ,  7.546252  , -1.419086  , ..., -1.171309  ,
        -0.8146533 ,  2.654647  ],
       [-0.9504717 ,  8.123409  , -0.87898463, ..., -1.1007679 ,
        -0.873811  , -0.03186027],
       [-1.3561383 , -1.3551763 , -1.0653834 , ..., -1.160192  ,
        -1.6577327 , -1.7272881 ]], dtype=float32)), label_ids=None, metrics={'test_runtime': 1993.4852, 'test_samples_per_second': 25.082, 'test_steps_per_second': 0.784})

## 推論結果をnpyで保存する

In [6]:
# just for check
np.unique(np.array(cifar10["train"]["label"]), return_counts=True)

(array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9]),
 array([5000, 5000, 5000, 5000, 5000, 5000, 5000, 5000, 5000, 5000]))

In [7]:
train_labels = np.array(cifar10["train"]["label"])

In [19]:
# train_pred.predictions[1]をsoftmax関数に通して確率に変換
train_pred_proba = torch.nn.functional.softmax(torch.tensor(train_pred.predictions[1]), dim=-1)
# train_pred_probaをnumpy配列に変換
train_pred_proba = train_pred_proba.cpu().numpy()
# ラベルごとに違うファイルとして保存
for c in range(len(labels)):
    tgt_proba = train_pred_proba[train_labels == c]
    # train_pred_probaを保存
    np.save(os.path.join(pretrained_dir, "pred_results", f"train_proba_{c}.npy"), tgt_proba)
    print(f"train_proba_{c}.npy ({tgt_proba.shape}) saved")

train_proba_0.npy ((5000, 10)) saved
train_proba_1.npy ((5000, 10)) saved
train_proba_2.npy ((5000, 10)) saved
train_proba_3.npy ((5000, 10)) saved
train_proba_4.npy ((5000, 10)) saved
train_proba_5.npy ((5000, 10)) saved
train_proba_6.npy ((5000, 10)) saved
train_proba_7.npy ((5000, 10)) saved
train_proba_8.npy ((5000, 10)) saved
train_proba_9.npy ((5000, 10)) saved


In [20]:
# just for check
np.unique(np.array(cifar10["test"]["label"]), return_counts=True)

(array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9]),
 array([1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]))

In [21]:
test_labels = np.array(cifar10["test"]["label"])

In [23]:
test_pred_proba = torch.nn.functional.softmax(torch.tensor(test_pred.predictions[1]), dim=-1)
test_pred_proba = test_pred_proba.cpu().numpy()
# ラベルごとに違うファイルとして保存
for c in range(len(labels)):
    tgt_proba = test_pred_proba[test_labels == c]
    print(tgt_proba.shape)
    # train_pred_probaを保存
    np.save(os.path.join(pretrained_dir, "pred_results", f"test_proba_{c}.npy"), tgt_proba)
    print(f"test_proba_{c}.npy ({tgt_proba.shape}) saved")

(1000, 10)
test_proba_0.npy ((1000, 10)) saved
(1000, 10)
test_proba_1.npy ((1000, 10)) saved
(1000, 10)
test_proba_2.npy ((1000, 10)) saved
(1000, 10)
test_proba_3.npy ((1000, 10)) saved
(1000, 10)
test_proba_4.npy ((1000, 10)) saved
(1000, 10)
test_proba_5.npy ((1000, 10)) saved
(1000, 10)
test_proba_6.npy ((1000, 10)) saved
(1000, 10)
test_proba_7.npy ((1000, 10)) saved
(1000, 10)
test_proba_8.npy ((1000, 10)) saved
(1000, 10)
test_proba_9.npy ((1000, 10)) saved


# C10でFine-tuningしたViTモデルのフォワードパスを実行し，C10Cを予測する

## 初期設定

In [3]:
# デバイス (cuda, or cpu) の取得
device = get_device()
# datasetをロード (初回の読み込みだけやや時間かかる)
dataset_dir = ViTExperiment.DATASET_DIR
c10c = load_from_disk(os.path.join(dataset_dir, "c10c"))
tmp_key = "zoom_blur"
c10c[tmp_key]

Device: cuda


Dataset({
    features: ['img', 'label'],
    num_rows: 50000
})

In [6]:
# 読み込まれた時にリアルタイムで前処理を適用するようにする
cifar10_preprocessed = c10c[tmp_key].with_transform(transforms)
# バッチごとの処理のためのdata_collator
data_collator = DefaultDataCollator()
# ラベルを示す文字列のlist
labels = cifar10_preprocessed.features["label"].names
# pretrained modelのロード
pretrained_dir = ViTExperiment.OUTPUT_DIR
model = ViTForImageClassification.from_pretrained(pretrained_dir).to(device)
model.eval()
# 学習時の設定をロード
training_args = torch.load(os.path.join(pretrained_dir, "training_args.bin"))
# Trainerオブジェクトの作成
trainer = Trainer(
    model=model,
    args=training_args,
    data_collator=data_collator,
    compute_metrics=compute_metrics,
    eval_dataset=cifar10_preprocessed,
    tokenizer=processor,
)

## 推論の実行

In [5]:
# データセットのサイズとバッチサイズからイテレーション回数を計算
training_args_dict = training_args.to_dict()
eval_batch_size = training_args_dict["per_device_eval_batch_size"]
eval_iter = math.ceil(len(cifar10_preprocessed) / eval_batch_size)

# 推論の実行
print(f"predict evaluation data... #iter = {eval_iter} ({len(cifar10_preprocessed)} samples / {eval_batch_size} batches)")
test_pred = trainer.predict(cifar10_preprocessed)

predict evaluation data... #iter = 1563 (50000 samples / 32 batches)


KeyboardInterrupt: 

In [None]:
test_pred

PredictionOutput(predictions=(array([0.00530046, 0.4829699 , 0.14570853, ..., 0.31633762, 0.3146287 ,
       0.2560988 ], dtype=float32), array([[-0.8668216 , -1.2971003 , -0.75566244, ..., -1.4945534 ,
        -1.2481924 , -1.515257  ],
       [ 0.3388922 , -0.08191378, -0.8742339 , ..., -1.2310234 ,
         8.353749  , -0.87099814],
       [ 1.4878969 ,  0.73712045, -1.9341159 , ..., -0.74034   ,
         7.126801  ,  0.6780986 ],
       ...,
       [-0.9040583 , -1.6468737 , -0.5728902 , ..., -0.32825708,
        -1.1810881 , -1.380819  ],
       [ 5.4605    ,  2.0525274 ,  0.1788212 , ..., -1.6962085 ,
        -1.8953818 , -0.3313951 ],
       [-1.1177558 , -1.2211878 , -0.4425445 , ...,  8.182668  ,
        -0.87377185, -0.9647563 ]], dtype=float32)), label_ids=None, metrics={'test_runtime': 1963.0797, 'test_samples_per_second': 25.47, 'test_steps_per_second': 0.796})