In [1]:
"""
C10でFine-tuningしたViTモデルのフォワードパスを実行する
"""

import os, sys, math
sys.path.append("../src")
import numpy as np
import torch
from datasets import load_dataset, load_metric
from transformers import DefaultDataCollator, ViTForImageClassification, TrainingArguments, Trainer
from utils.helper import get_device
from utils.vit_util import processor, transforms, compute_metrics

2024-04-18 21:34:50.598382: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2024-04-18 21:34:51.861626: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory
2024-04-18 21:34:51.861759: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory
  met_acc = load_metric("accuracy")


In [2]:
# デバイス (cuda, or cpu) の取得
device = get_device()
# datasetをロード (初回の読み込みだけやや時間かかる)
cifar10 = load_dataset("cifar10")
# 読み込まれた時にリアルタイムで前処理を適用するようにする
cifar10_preprocessed = cifar10.with_transform(transforms)
# バッチごとの処理のためのdata_collator
data_collator = DefaultDataCollator()
# ラベルを示す文字列のlist
labels = cifar10_preprocessed["train"].features["label"].names
# pretrained modelのロード
pretrained_dir = "/src/src/out_vit_c10"
model = ViTForImageClassification.from_pretrained(pretrained_dir).to(device)
model.eval()
# 学習時の設定をロード
training_args = torch.load(os.path.join(pretrained_dir, "training_args.bin"))
# Trainerオブジェクトの作成
trainer = Trainer(
    model=model,
    args=training_args,
    data_collator=data_collator,
    compute_metrics=compute_metrics,
    train_dataset=cifar10_preprocessed["train"],
    eval_dataset=cifar10_preprocessed["test"],
    tokenizer=processor,
)

Device: cuda


Found cached dataset parquet (/root/.cache/huggingface/datasets/parquet/plain_text-d4c080360fb556b0/0.0.0/14a00e99c0d15a23649d0db8944380ac81082d4b021f398733dd84f3a6c569a7)


  0%|          | 0/2 [00:00<?, ?it/s]

In [3]:
li = 11
key = f"vit.encoder.layer.{li}.intermediate.dense"

for name, param in model.named_parameters():
    if name.startswith(key):
        print(name, param.shape)

vit.encoder.layer.11.intermediate.dense.weight torch.Size([3072, 768])
vit.encoder.layer.11.intermediate.dense.bias torch.Size([3072])


In [4]:
mf = model.forward(cifar10_preprocessed["train"][:5]["pixel_values"].to(device), output_hidden_states=True)
mf

ImageClassifierOutput(loss=None, logits=tensor([[ 7.4848, -0.9838, -1.2195, -1.0950, -1.5244, -0.9806, -1.3396, -1.3068,
          1.6574, -0.6881],
        [-1.0096, -0.5473, -0.9478, -0.7244, -0.4915, -1.1115,  8.1529, -1.1994,
         -0.7302, -0.6127],
        [ 7.7190, -1.3459, -1.0712, -0.8000, -1.6794, -0.7609, -0.8340, -0.7733,
         -0.2343, -0.9757],
        [-0.6484, -1.3911,  7.7926, -0.7089, -0.2967, -1.1537, -0.6101, -1.5104,
         -1.7120, -0.8013],
        [-1.0980, -0.9440, -0.6875, -1.4231, -0.8058,  0.1417, -1.0279,  8.1610,
         -0.8917, -1.0262]], device='cuda:0', grad_fn=<AddmmBackward0>), hidden_states=(tensor([[[-4.1266e-03,  1.6501e-02, -5.0006e-01,  ..., -3.1188e-03,
           1.0603e-03, -7.6647e-01],
         [-3.9816e-02, -2.6767e-01,  1.3697e+00,  ..., -6.7182e-03,
           5.1438e-02,  2.1739e-01],
         [-1.8105e-01, -1.6210e-01,  1.1505e+00,  ...,  2.3543e-02,
           1.5401e-01,  2.8036e-01],
         ...,
         [ 6.9615e-02,  1.

In [57]:
mf.logits.shape

torch.Size([5, 10])

In [64]:
len(mf.hidden_states)

13

In [67]:
for i, hs in enumerate(mf.hidden_states):
    print(f"mf.hidden_states[{i}].shape = {hs.shape}")
    print(f"mf.hidden_states[{i}][:, 0, :].shape = {hs[:, 0, :].shape}")

mf.hidden_states[0].shape = torch.Size([5, 197, 768])
mf.hidden_states[1].shape = torch.Size([5, 197, 768])
mf.hidden_states[2].shape = torch.Size([5, 197, 768])
mf.hidden_states[3].shape = torch.Size([5, 197, 768])
mf.hidden_states[4].shape = torch.Size([5, 197, 768])
mf.hidden_states[5].shape = torch.Size([5, 197, 768])
mf.hidden_states[6].shape = torch.Size([5, 197, 768])
mf.hidden_states[7].shape = torch.Size([5, 197, 768])
mf.hidden_states[8].shape = torch.Size([5, 197, 768])
mf.hidden_states[9].shape = torch.Size([5, 197, 768])
mf.hidden_states[10].shape = torch.Size([5, 197, 768])
mf.hidden_states[11].shape = torch.Size([5, 197, 768])
mf.hidden_states[12].shape = torch.Size([5, 197, 768])


In [5]:
import torch
torch.argmax(mf.logits, axis=1)

tensor([0, 6, 0, 2, 7], device='cuda:0')

In [6]:
cifar10_preprocessed["train"][:5]["labels"]

[0, 6, 0, 2, 7]