In [5]:
# -*- coding: utf-8 -*-
# 2D-CNN(足圧5x7) + 1D-CNN(IMU) → Transformer → 回帰
# 学習・検証・保存・推論まで

import os
import math
import numpy as np
import pandas as pd
from tqdm import tqdm

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from sklearn.preprocessing import StandardScaler


# =========================
# 0) 共通設定
# =========================
WINDOW_SIZE = 10
BATCH_SIZE = 128
EPOCHS = 30
LR = 3e-4
PRESSURE_COLS = [f'P{i}' for i in range(1, 36)]   # 35チャンネル
IMU_COLS = ['Gyro_x','Gyro_y','Gyro_z','Acc_x','Acc_y','Acc_z']
PRESSURE_SHAPE = (5, 7)     # P1..P5が1行目, 行優先で並んでいる前提
OUT_CSV = "output/2DCNNTrans_predicted_skeleton.csv"
WEIGHT_PATH = "weight/2dcnn_trans_pose_regressor.pth"


# =========================
# 1) データ読み込み（既存のあなたの前処理と整合）
#    ここは質問に貼っていただいたコードのロジックを踏襲
# =========================
# 学習データのペア（そのまま流用）
data_pairs = [
    ('./data/20250517old_data/20241212test4/Opti-track/Take 2024-12-12 03.06.59 PM.csv',
     './data/20250517old_data/20241212test4/insoleSensor/20241212_152700_left.csv',
     './data/20250517old_data/20241212test4/insoleSensor/20241212_152700_right.csv'),
    ('./data/20250517old_data/20241212test4/Opti-track/Take 2024-12-12 03.45.00 PM.csv',
     './data/20250517old_data/20241212test4/insoleSensor/20241212_160501_left.csv',
     './data/20250517old_data/20241212test4/insoleSensor/20241212_160501_right.csv'),
    ('./data/20250517old_data/20241212test4/Opti-track/Take 2024-12-12 04.28.00 PM.csv',
     './data/20250517old_data/20241212test4/insoleSensor/20241212_164800_left.csv',
     './data/20250517old_data/20241212test4/insoleSensor/20241212_164800_right.csv'),
    ('./data/20250517old_data/20241212test4/Opti-track/Take 2024-12-12 05.17.59 PM.csv',
     './data/20250517old_data/20241212test4/insoleSensor/20241212_173800_left.csv',
     './data/20250517old_data/20241212test4/insoleSensor/20241212_173800_right.csv')
]

# テストデータ（そのまま流用）
test_data = ('./data/20250517old_data/20241115test3/Opti-track/Take 2024-11-15 03.50.00 PM.csv', 
              './data/20250517old_data/20241115test3/insoleSensor/20241115_155500_left.csv', 
              './data/20250517old_data/20241115test3/insoleSensor/20241115_155500_right.csv')

# 学習セット作成（左右CSVを横結合して1つの特徴行列に）
X_train_list, y_train_list = [], []
for opti_path, left_path, right_path in data_pairs:
    left_df = pd.read_csv(left_path).fillna(0.0)
    right_df = pd.read_csv(right_path).fillna(0.0)
    y_df = pd.read_csv(opti_path).fillna(0.0)

    # 時間長を揃える
    T = min(len(left_df), len(right_df), len(y_df))
    left_df = left_df.iloc[:T]
    right_df = right_df.iloc[:T]
    y_df = y_df.iloc[:T]

    X_df = pd.concat([left_df, right_df], axis=1)

    X_train_list.append(X_df.values)
    y_train_list.append(y_df.values)

X_train = np.concatenate(X_train_list, axis=0)
y_train = np.concatenate(y_train_list, axis=0)

# スケーリング
x_scaler = StandardScaler()
X_train_scaled = x_scaler.fit_transform(X_train)

y_scaler = StandardScaler()
y_train_scaled = y_scaler.fit_transform(y_train)

print("X_train_scaled.shape:", X_train_scaled.shape)
print("y_train_scaled.shape:", y_train_scaled.shape)

# テスト読み込み
opti_path_test, left_path_test, right_path_test = test_data
left_df_test = pd.read_csv(left_path_test).fillna(0.0)
right_df_test = pd.read_csv(right_path_test).fillna(0.0)
y_df_test = pd.read_csv(opti_path_test).fillna(0.0)

Tt = min(len(left_df_test), len(right_df_test), len(y_df_test))
left_df_test = left_df_test.iloc[:Tt]
right_df_test = right_df_test.iloc[:Tt]
y_df_test = y_df_test.iloc[:Tt]

X_test = pd.concat([left_df_test, right_df_test], axis=1).values
y_test = y_df_test.values

X_test_scaled = x_scaler.transform(X_test)
y_test_scaled = y_scaler.transform(y_test)

# =========================
# 2) スライディング窓（X全体を窓切り）
#    ここでは X の列のうち:
#    - 先頭35列を足圧5x7
#    - 末尾6列をIMU
#    とみなす実装にしています。
#    列順が違う場合は適宜抽出方法を変えてください。
# =========================
def create_sliding_window_sequences(X, y, window_size):
    X_seq, y_seq, mask_seq = [], [], []
    N = len(X)
    for i in range(N - window_size + 1):
        xw = X[i:i+window_size]
        yw = y[i+window_size-1]   # 末尾フレームの姿勢を回帰
        mw = np.ones((window_size,), dtype=np.int64)  # 欠損対応するなら修正

        X_seq.append(xw)
        y_seq.append(yw)
        mask_seq.append(mw)
    return np.array(X_seq), np.array(y_seq), np.array(mask_seq)

X_seq_train, y_seq_train, mask_seq_train = create_sliding_window_sequences(
    X_train_scaled, y_train_scaled, WINDOW_SIZE
)
X_seq_test, y_seq_test, mask_seq_test = create_sliding_window_sequences(
    X_test_scaled, y_test_scaled, WINDOW_SIZE
)

print("X_seq_train:", X_seq_train.shape, "y_seq_train:", y_seq_train.shape)


# =========================
# 3) Dataset
#    Dataset内で [B,S,D] → 足圧2D + IMU に分割
# =========================
class PostureDataset2D(Dataset):
    def __init__(self, X_seq, y_seq, mask_seq, press_dim=35, imu_dim=6, grid_hw=(5,7)):
        self.X = torch.tensor(X_seq, dtype=torch.float32)      # [B,S,D]
        self.y = torch.tensor(y_seq, dtype=torch.float32)      # [B,out_dim]
        self.m = torch.tensor(mask_seq, dtype=torch.long)      # [B,S]
        self.press_dim = press_dim
        self.imu_dim = imu_dim
        self.H, self.W = grid_hw
        assert self.press_dim == self.H*self.W, "PRESSURE_SHAPEとP列数が一致しません"

        D = self.X.shape[2]
        assert D >= (press_dim + imu_dim), "特徴次元が不足しています"

    def __len__(self):
        return len(self.X)

    def __getitem__(self, idx):
        x = self.X[idx]           # [S, D]
        y = self.y[idx]
        m = self.m[idx]           # [S]

        # [S, press_dim] と [S, imu_dim] に切り分け
        press = x[:, :self.press_dim]             # [S, 35]
        imu = x[:, -self.imu_dim:]                # [S, 6]

        # 2D reshape → [S, 1, H, W]
        press = press.reshape(-1, 1, self.H, self.W)

        return press, imu, y, m


train_ds = PostureDataset2D(X_seq_train, y_seq_train, mask_seq_train,
                            press_dim=35, imu_dim=6, grid_hw=PRESSURE_SHAPE)
test_ds  = PostureDataset2D(X_seq_test, y_seq_test, mask_seq_test,
                            press_dim=35, imu_dim=6, grid_hw=PRESSURE_SHAPE)

train_loader = DataLoader(train_ds, batch_size=BATCH_SIZE, shuffle=True)
test_loader  = DataLoader(test_ds,  batch_size=BATCH_SIZE, shuffle=False)





X_train_scaled.shape: (47963, 82)
y_train_scaled.shape: (47963, 63)
X_seq_train: (47954, 10, 82) y_seq_train: (47954, 63)


In [6]:

# =========================
# 4) モデル
# =========================
class PressureCNN2D(nn.Module):
    """ 時刻ごとの 1×H×W マップを2D-CNNで埋め込みに圧縮 """
    def __init__(self, out_dim=256, dropout=0.1):
        super().__init__()
        # 入力チャネル1
        self.net = nn.Sequential(
            nn.Conv2d(1, 32, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.Conv2d(32, 64, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.Dropout(dropout),
            nn.Conv2d(64, 64, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.AdaptiveAvgPool2d((1,1))  # [B*S,64,1,1]
        )
        self.fc = nn.Linear(64, out_dim)

    def forward(self, press_seq):
        # press_seq: [B,S,1,H,W]
        B, S, C, H, W = press_seq.shape
        x = press_seq.reshape(B*S, C, H, W)
        h = self.net(x).reshape(B*S, -1)         # [B*S, 64]
        h = self.fc(h)                            # [B*S, out_dim]
        return h.reshape(B, S, -1)                # [B,S,out_dim]


class IMUCNN1D(nn.Module):
    """ IMUの短時間特徴（1D-CNN） """
    def __init__(self, in_dim=6, out_dim=256, k=5, dropout=0.1):
        super().__init__()
        self.net = nn.Sequential(
            nn.Conv1d(in_dim, 128, kernel_size=k, padding=k//2),
            nn.ReLU(),
            nn.Conv1d(128, out_dim, kernel_size=k, padding=k//2),
            nn.ReLU(),
            nn.Dropout(dropout),
            nn.Conv1d(out_dim, out_dim, kernel_size=k, padding=k//2),
            nn.ReLU()
        )

    def forward(self, imu_seq):
        # imu_seq: [B,S,D]
        x = imu_seq.transpose(1, 2)          # [B,D,S]
        h = self.net(x).transpose(1, 2)      # [B,S,out_dim]
        return h


class PositionalEncoding(nn.Module):
    def __init__(self, d_model, max_len=20000):
        super().__init__()
        pe = torch.zeros(max_len, d_model)
        pos = torch.arange(0, max_len, dtype=torch.float32).unsqueeze(1)
        div = torch.exp(torch.arange(0, d_model, 2).float()
                        * (-math.log(10000.0) / d_model))
        pe[:, 0::2] = torch.sin(pos * div)[:, :pe[:,0::2].shape[1]]
        pe[:, 1::2] = torch.cos(pos * div)[:, :pe[:,1::2].shape[1]]
        self.register_buffer('pe', pe.unsqueeze(0))  # [1,L,D]

    def forward(self, x):
        return x + self.pe[:, :x.size(1), :]


class CNN2DTransformerRegressor(nn.Module):
    def __init__(self, d_cnn=256, d_model=512, nhead=8, num_layers=6,
                 dim_ff=1024, dropout=0.1, out_dim=63):
        super().__init__()
        self.cnn2d = PressureCNN2D(out_dim=d_cnn, dropout=dropout)
        self.cnn1d = IMUCNN1D(in_dim=6, out_dim=d_cnn, dropout=dropout)

        self.fuse = nn.Sequential(
            nn.Linear(d_cnn*2, d_model),
            nn.LayerNorm(d_model),
            nn.Dropout(dropout)
        )
        self.posenc = PositionalEncoding(d_model)

        enc_layer = nn.TransformerEncoderLayer(
            d_model=d_model, nhead=nhead, dim_feedforward=dim_ff,
            dropout=dropout, activation='gelu',
            batch_first=True, norm_first=True
        )
        self.encoder = nn.TransformerEncoder(enc_layer, num_layers=num_layers)

        self.head = nn.Sequential(
            nn.Linear(d_model, d_model),
            nn.ReLU(),
            nn.Dropout(dropout),
            nn.Linear(d_model, out_dim)
        )

    @staticmethod
    def last_valid_token(h, mask):
        lengths = mask.sum(dim=1)
        idx = torch.clamp(lengths - 1, min=0)
        b = torch.arange(h.size(0), device=h.device)
        return h[b, idx, :]

    def forward(self, press_seq, imu_seq, mask):
        # press_seq: [B,S,1,H,W], imu_seq: [B,S,6], mask: [B,S]
        hp = self.cnn2d(press_seq)   # [B,S,d_cnn]
        hi = self.cnn1d(imu_seq)     # [B,S,d_cnn]
        h = torch.cat([hp, hi], dim=-1)
        h = self.fuse(h)
        h = self.posenc(h)

        pad_mask = (mask == 0)
        h = self.encoder(h, src_key_padding_mask=pad_mask)

        cls = self.last_valid_token(h, mask)
        yhat = self.head(cls)
        return yhat


# =========================
# 5) 学習
# =========================
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

out_dim = y_seq_train.shape[1]
model = CNN2DTransformerRegressor(
    d_cnn=256, d_model=512, nhead=8, num_layers=6,
    dim_ff=1024, dropout=0.1, out_dim=out_dim
).to(device)

criterion = nn.MSELoss()
optimizer = optim.AdamW(model.parameters(), lr=LR, weight_decay=1e-4)
scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=EPOCHS)

for epoch in range(1, EPOCHS+1):
    model.train()
    total = 0.0
    for press, imu, y, m in tqdm(train_loader, desc=f"Train {epoch}/{EPOCHS}"):
        press = press.to(device)   # [B,S,1,H,W]
        imu   = imu.to(device)     # [B,S,6]
        y     = y.to(device)       # [B,out_dim]
        m     = m.to(device)       # [B,S]

        optimizer.zero_grad()
        yhat = model(press, imu, m)
        loss = criterion(yhat, y)
        loss.backward()
        nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
        optimizer.step()
        total += loss.item()
    scheduler.step()
    train_loss = total / len(train_loader)

    model.eval()
    val_total = 0.0
    with torch.no_grad():
        for press, imu, y, m in test_loader:
            press = press.to(device)
            imu   = imu.to(device)
            y     = y.to(device)
            m     = m.to(device)
            yhat = model(press, imu, m)
            loss = criterion(yhat, y)
            val_total += loss.item()
    val_loss = val_total / len(test_loader)

    print(f"[Epoch {epoch:02d}] Train {train_loss:.5f} | Val {val_loss:.5f}")

# 保存
os.makedirs(os.path.dirname(WEIGHT_PATH), exist_ok=True)
torch.save(model.state_dict(), WEIGHT_PATH)
print("Saved weights:", WEIGHT_PATH)



Train 1/30: 100%|██████████| 375/375 [00:07<00:00, 47.10it/s]


[Epoch 01] Train 0.57609 | Val 1.50263


Train 2/30: 100%|██████████| 375/375 [00:07<00:00, 48.76it/s]


[Epoch 02] Train 0.32793 | Val 1.53029


Train 3/30: 100%|██████████| 375/375 [00:07<00:00, 48.08it/s]


[Epoch 03] Train 0.24561 | Val 1.30728


Train 4/30: 100%|██████████| 375/375 [00:07<00:00, 48.31it/s]


[Epoch 04] Train 0.20466 | Val 1.42405


Train 5/30: 100%|██████████| 375/375 [00:07<00:00, 48.84it/s]


[Epoch 05] Train 0.17218 | Val 1.56545


Train 6/30: 100%|██████████| 375/375 [00:07<00:00, 49.88it/s]


[Epoch 06] Train 0.15160 | Val 1.30964


Train 7/30: 100%|██████████| 375/375 [00:07<00:00, 48.89it/s]


[Epoch 07] Train 0.13407 | Val 1.51664


Train 8/30: 100%|██████████| 375/375 [00:07<00:00, 49.96it/s]


[Epoch 08] Train 0.12344 | Val 1.44438


Train 9/30: 100%|██████████| 375/375 [00:07<00:00, 48.83it/s]


[Epoch 09] Train 0.11159 | Val 1.46569


Train 10/30: 100%|██████████| 375/375 [00:07<00:00, 48.68it/s]


[Epoch 10] Train 0.10192 | Val 1.44488


Train 11/30: 100%|██████████| 375/375 [00:07<00:00, 48.79it/s]


[Epoch 11] Train 0.09412 | Val 1.45045


Train 12/30: 100%|██████████| 375/375 [00:07<00:00, 47.43it/s]


[Epoch 12] Train 0.08866 | Val 1.51036


Train 13/30: 100%|██████████| 375/375 [00:07<00:00, 48.30it/s]


[Epoch 13] Train 0.08202 | Val 1.49408


Train 14/30: 100%|██████████| 375/375 [00:07<00:00, 48.39it/s]


[Epoch 14] Train 0.07655 | Val 1.43623


Train 15/30: 100%|██████████| 375/375 [00:07<00:00, 47.94it/s]


[Epoch 15] Train 0.07124 | Val 1.46637


Train 16/30: 100%|██████████| 375/375 [00:07<00:00, 47.49it/s]


[Epoch 16] Train 0.06727 | Val 1.42822


Train 17/30: 100%|██████████| 375/375 [00:07<00:00, 47.79it/s]


[Epoch 17] Train 0.06274 | Val 1.51052


Train 18/30: 100%|██████████| 375/375 [00:07<00:00, 47.82it/s]


[Epoch 18] Train 0.05933 | Val 1.46984


Train 19/30: 100%|██████████| 375/375 [00:07<00:00, 48.23it/s]


[Epoch 19] Train 0.05578 | Val 1.45888


Train 20/30: 100%|██████████| 375/375 [00:07<00:00, 47.57it/s]


[Epoch 20] Train 0.05402 | Val 1.45265


Train 21/30: 100%|██████████| 375/375 [00:07<00:00, 48.74it/s]


[Epoch 21] Train 0.05027 | Val 1.46845


Train 22/30: 100%|██████████| 375/375 [00:07<00:00, 48.60it/s]


[Epoch 22] Train 0.04775 | Val 1.46641


Train 23/30: 100%|██████████| 375/375 [00:07<00:00, 48.55it/s]


[Epoch 23] Train 0.04595 | Val 1.43773


Train 24/30: 100%|██████████| 375/375 [00:07<00:00, 47.94it/s]


[Epoch 24] Train 0.04435 | Val 1.49703


Train 25/30: 100%|██████████| 375/375 [00:07<00:00, 47.61it/s]


[Epoch 25] Train 0.04314 | Val 1.46717


Train 26/30: 100%|██████████| 375/375 [00:07<00:00, 47.68it/s]


[Epoch 26] Train 0.04212 | Val 1.46491


Train 27/30: 100%|██████████| 375/375 [00:07<00:00, 47.81it/s]


[Epoch 27] Train 0.04165 | Val 1.47389


Train 28/30: 100%|██████████| 375/375 [00:07<00:00, 47.39it/s]


[Epoch 28] Train 0.04094 | Val 1.46967


Train 29/30: 100%|██████████| 375/375 [00:07<00:00, 48.46it/s]


[Epoch 29] Train 0.04060 | Val 1.47226


Train 30/30: 100%|██████████| 375/375 [00:07<00:00, 47.92it/s]


[Epoch 30] Train 0.04025 | Val 1.47249
Saved weights: weight/2dcnn_trans_pose_regressor.pth


In [7]:
# =========================
# 6) 推論
# =========================
model.eval()
preds = []
with torch.no_grad():
    for press, imu, _, m in test_loader:
        press = press.to(device)
        imu   = imu.to(device)
        m     = m.to(device)
        yhat  = model(press, imu, m)
        preds.append(yhat.cpu().numpy())

pred_scaled = np.concatenate(preds, axis=0)
pred = y_scaler.inverse_transform(pred_scaled)

J = pred.shape[1] // 3
cols = [f'{ax}.{j*2 +1}' for j in range(J) for ax in ['X','Y','Z']]
os.makedirs("output", exist_ok=True)
pd.DataFrame(pred, columns=cols).to_csv(OUT_CSV, index=False)
print("Saved:", OUT_CSV)

Saved: output/2DCNNTrans_predicted_skeleton.csv
