<a href="https://colab.research.google.com/github/tb268/Atcoder/blob/main/vit%E5%8D%92%E7%A0%94.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install reformer-pytorch

In [1]:
import os
# フォルダを作成する
os.makedirs('./data')

In [2]:
#フォルダ削除用
import shutil
shutil.rmtree('/content/data')

# フォルダを作成する
os.makedirs('./data')

In [4]:
!nvidia-smi

Wed May 15 07:30:42 2024       
+---------------------------------------------------------------------------------------+
| NVIDIA-SMI 535.104.05             Driver Version: 535.104.05   CUDA Version: 12.2     |
|-----------------------------------------+----------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |         Memory-Usage | GPU-Util  Compute M. |
|                                         |                      |               MIG M. |
|   0  Tesla T4                       Off | 00000000:00:04.0 Off |                    0 |
| N/A   40C    P8              10W /  70W |      0MiB / 15360MiB |      0%      Default |
|                                         |                      |                  N/A |
+-----------------------------------------+----------------------+----------------------+
                                                                    

In [28]:
# reformer版 3D Pose Estimation
# ダミーデータでのコード
import torch
import torch.nn as nn
import torch.optim as optim
from reformer_pytorch import Reformer, Autopadder

# モデルの定義
class PoseReformer3D(nn.Module):
    def __init__(self, patch_size=32, num_keypoints=17, dim=512, depth=6, heads=8, bucket_size=16, n_hashes=4):
        super(PoseReformer3D, self).__init__()
        self.patch_size = patch_size
        self.num_keypoints = num_keypoints
        self.dim = dim
        # パッチ埋め込み (Patch Embedding)
        self.patch_to_embedding = nn.Linear(3 * patch_size * patch_size, dim)
        # クラストークン (Class Token)
        self.cls_token = nn.Parameter(torch.randn(1, 1, dim))
        # Reformer モデルの定義
        self.reformer = Reformer(
            dim=dim,
            depth=depth,
            heads=heads,
            bucket_size=bucket_size,
            n_hashes=n_hashes
        )
        self.reformer = Autopadder(self.reformer)
        self.to_cls_token = nn.Identity()
        # 最終的なMLPヘッド
        self.mlp_head = nn.Sequential(
            nn.LayerNorm(dim),
            nn.Linear(dim, num_keypoints * 3)  # 3次元座標 (x, y, z) なので num_keypoints * 3
        )

    def forward(self, img):
        B, C, H, W = img.shape
        p = self.patch_size
        # 画像ピクセルをパッチサイズに分割
        x = img.unfold(2, p, p).unfold(3, p, p)  # (B, C, H/p, W/p, p, p)
        x = x.permute(0, 2, 3, 1, 4, 5)  # (B, H/p, W/p, C, p, p)
        x = x.reshape(x.shape[0], -1, C * p * p)  # (B, num_patches, patch_dim)
        num_patches = x.shape[1]
        # パッチ埋め込み (Patch Embedding)
        x = self.patch_to_embedding(x)  # (B, num_patches, dim)

        # クラストークン (Class Token)
        cls_tokens = self.cls_token.expand(B, -1, -1)  # (B, 1, dim)
        x = torch.cat((cls_tokens, x), dim=1)  # (B, num_patches + 1, dim)

        # 位置埋め込み (Position Embedding)
        pos_embedding = nn.Parameter(torch.randn(1, num_patches + 1, self.dim))
        x += pos_embedding[:, :(x.size(1))]

        # Reformer での処理
        x = self.reformer(x)  # (B, num_patches + 1, dim)
        x = self.to_cls_token(x[:, 0])  # クラストークンの取得
        # 最終的なMLPヘッドでキーポイントを予測
        keypoints = self.mlp_head(x)  # (B, num_keypoints * 3)
        keypoints = keypoints.view(x.size(0), -1, 3)  # (B, num_keypoints, 3)
        return keypoints

# ダミーデータ生成関数
def generate_dummy_data(num_samples, num_keypoints, img_height, img_width):
    images = torch.randn(num_samples, 3, img_height, img_width)  # ダミー画像データ
    keypoints = torch.randn(num_samples, num_keypoints, 3) * 1000  # ダミー3Dキーポイント（mm単位）
    return images, keypoints

# MPJPE計算関数
def compute_mpjpe(predicted, true):
    return torch.mean(torch.norm(predicted - true, dim=-1))  # 各キーポイント間の距離を計算し、平均を取る

# トレーニング関数
def train_model(model, images, true_keypoints, epochs=10, learning_rate=0.001):
    model.train()  # トレーニングモードに切り替え
    criterion = nn.MSELoss()  # 損失関数
    optimizer = optim.Adam(model.parameters(), lr=learning_rate)  # オプティマイザ

    for epoch in range(epochs):
        optimizer.zero_grad()  # 勾配の初期化
        predicted_keypoints = model(images)  # 推論
        loss = criterion(predicted_keypoints, true_keypoints)  # 損失の計算
        loss.backward()  # 勾配の計算
        optimizer.step()  # パラメータの更新

        print(f'Epoch {epoch + 1}/{epochs}, Loss: {loss.item():.4f}')

# ダミーデータ生成
num_samples = 100
num_keypoints = 17
img_height, img_width = 320, 240  # 任意の画像サイズ
images, true_keypoints = generate_dummy_data(num_samples, num_keypoints, img_height, img_width)

# モデルのインスタンス化
model = PoseReformer3D()

# モデルのトレーニング
train_model(model, images, true_keypoints, epochs=20, learning_rate=0.001)

# 評価モードに切り替え
model.eval()
with torch.no_grad():
    predicted_keypoints = model(images)

# MPJPEの計算
mpjpe = compute_mpjpe(predicted_keypoints, true_keypoints)
print(f'Average MPJPE: {mpjpe.item():.2f} mm')



Epoch 1/20, Loss: 991823.6875
Epoch 2/20, Loss: 991529.7500
Epoch 3/20, Loss: 991393.5000
Epoch 4/20, Loss: 991319.9375
Epoch 5/20, Loss: 991254.1875
Epoch 6/20, Loss: 991175.0625
Epoch 7/20, Loss: 991077.0000
Epoch 8/20, Loss: 990928.1875
Epoch 9/20, Loss: 990676.0000
Epoch 10/20, Loss: 990340.1250
Epoch 11/20, Loss: 990015.9375
Epoch 12/20, Loss: 989773.5000
Epoch 13/20, Loss: 989578.5625
Epoch 14/20, Loss: 989220.6250
Epoch 15/20, Loss: 989109.4375
Epoch 16/20, Loss: 988820.1875
Epoch 17/20, Loss: 988622.4375
Epoch 18/20, Loss: 988411.0000
Epoch 19/20, Loss: 988197.3750
Epoch 20/20, Loss: 987991.3125
Average MPJPE: 1582.11 mm
