In [None]:
# OGVC
# 量産版 + hidden_states 保存版
# 感情次元ベクトル推定器で使用されている wav2vec2 の出力を
# CSV (VA_results_bulk/OGVC_VAD_last_hidden_bulk-re.csv) に保存

import os
import re
import csv
from pathlib import Path

import numpy as np
import soundfile as sf
import librosa
import torch
import torch.nn as nn
from transformers import Wav2Vec2Processor
from transformers.models.wav2vec2.modeling_wav2vec2 import (
    Wav2Vec2Model,
    Wav2Vec2PreTrainedModel,
)
from tqdm import tqdm

# ===== 設定 =====
ROOT_PATH = "/autofs/diamond/share/corpus/OGVC/Vol2/Acted/wav"
OUTPUT_DIR = "VA_results_bulk"
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
SAMPLE_RATE = 16000
MODEL_NAME = "audeering/wav2vec2-large-robust-12-ft-emotion-msp-dim"

# Bagus と同じ正規表現
FILENAME_PATTERN = re.compile(r"^[A-Z0-9]+([A-Z]{3})(\d)\.wav$", re.IGNORECASE)

# ===== モデル定義 =====
class RegressionHead(nn.Module):
    def __init__(self, config):
        super().__init__()
        self.dense = nn.Linear(config.hidden_size, config.hidden_size)
        self.dropout = nn.Dropout(config.final_dropout)
        self.out_proj = nn.Linear(config.hidden_size, config.num_labels)

    def forward(self, features):
        x = self.dropout(features)
        x = self.dense(x)
        x = torch.tanh(x)
        x = self.dropout(x)
        x = self.out_proj(x)
        return x


class EmotionModel(Wav2Vec2PreTrainedModel):
    def __init__(self, config):
        super().__init__(config)
        self.wav2vec2 = Wav2Vec2Model(config)
        self.classifier = RegressionHead(config)
        self.init_weights()

    def forward(self, input_values):
        outputs = self.wav2vec2(input_values)
        hidden_states = outputs[0]              # (B, T, D)
        pooled = torch.mean(hidden_states, dim=1)
        logits = self.classifier(pooled)
        return pooled, logits


# ===== モデルロード =====
processor = Wav2Vec2Processor.from_pretrained(MODEL_NAME)
model = EmotionModel.from_pretrained(MODEL_NAME).to(DEVICE)
model.eval()

# ===== ユーティリティ =====
def collect_wav_files(root: str):
    """wav 以下を再帰探索"""
    return sorted([str(p) for p in Path(root).rglob("*.wav")])


def parse_filename_info(path: str):
    """
    Bagus と同じ仕様
    例: FOY0101ANT0.wav
    -> utt_id=FOY0101, emotion=ANT, intensity=0
    """
    fname = os.path.basename(path)
    match = FILENAME_PATTERN.match(fname)
    if match is None:
        return None, None, None

    emotion = match.group(1).upper()
    intensity = int(match.group(2))

    base = os.path.splitext(fname)[0]
    utt_id = base[: -(len(emotion) + 1)]

    return utt_id, emotion, intensity


def load_audio(path, target_sr=SAMPLE_RATE):
    wav, sr = sf.read(path)

    if wav.ndim > 1:
        wav = wav.mean(axis=1)

    if sr != target_sr:
        wav = librosa.resample(wav, orig_sr=sr, target_sr=target_sr)
        sr = target_sr

    return wav.astype(np.float32), sr


def process_func(x: np.ndarray, sampling_rate: int):
    y = processor(x, sampling_rate=sampling_rate)
    y = torch.tensor(y["input_values"][0]).to(DEVICE).unsqueeze(0)

    with torch.no_grad():
        hidden, logits = model(y)

    return hidden.cpu().numpy()[0], logits.cpu().numpy()[0]


# ===== メイン =====
def extract_vad_bulk(root_dir=ROOT_PATH, output_dir=OUTPUT_DIR):
    os.makedirs(output_dir, exist_ok=True)

    wav_files = collect_wav_files(root_dir)
    print(f"Found {len(wav_files)} wav files")

    # hidden_size 自動取得
    dummy_hidden, _ = process_func(np.zeros(SAMPLE_RATE), SAMPLE_RATE)
    hidden_size = dummy_hidden.shape[0]

    csv_path = os.path.join(output_dir, "OGVC_VAD_last_hidden_bulk-re.csv")

    with open(csv_path, "w", newline="", encoding="utf-8") as wf:
        writer = csv.writer(wf)

        header = [
            "utt_id",
            "filename",
            "valence",
            "arousal",
            "dominance",
            "intensity",
            "emotion",
        ] + [f"h_{i}" for i in range(hidden_size)]
        writer.writerow(header)

        for path in tqdm(wav_files, desc="Processing WAV files"):
            utt_id, emo, inten = parse_filename_info(path)
            if utt_id is None:
                continue

            try:
                wav, sr = load_audio(path)
                hidden_vec, vad = process_func(wav, sr)

                row = [
                    utt_id,
                    os.path.basename(path),
                    float(vad[2]),   # valence
                    float(vad[0]),   # arousal
                    float(vad[1]),   # dominance
                    inten,
                    emo,
                ] + hidden_vec.tolist()

                writer.writerow(row)

            except Exception as e:
                print(f"Error processing {path}: {e}")

    print(f"Saved all results to {csv_path}")


if __name__ == "__main__":
    extract_vad_bulk()

Found 2656 wav files


Processing WAV files: 100%|██████████| 2656/2656 [01:12<00:00, 36.53it/s]

Saved all results to VA_results_bulk/OGVC_VAD_last_hidden_bulk-re.csv





In [1]:
# ======================================================
# OGVC 版：ロジスティック回帰で Emotion / Intensity を分類
# ======================================================

import os
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, classification_report
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.linear_model import LogisticRegression

# ================================
# 設定
# ================================
RESULT_DIR = "results_ogvc_emo_int"
os.makedirs(RESULT_DIR, exist_ok=True)

EMOTION_LABELS = {
    "JOY": 0, "ACC": 1, "FEA": 2, "SUR": 3,
    "SAD": 4, "DIS": 5, "ANG": 6, "ANT": 7,
    "NEU": 8, "OTH": 9
}

# =========================================================
# 混同行列プロット
# =========================================================
def plot_conf_matrix(true, pred, labels, title, save_path, exclude_labels=None):
    cm = confusion_matrix(true, pred, labels=range(len(labels)))

    if exclude_labels is not None:
        exclude_idx = [labels.index(l) for l in exclude_labels]
        cm = np.delete(cm, exclude_idx, axis=0)
        cm = np.delete(cm, exclude_idx, axis=1)
        labels = [l for l in labels if l not in exclude_labels]

    plt.figure(figsize=(8, 6))
    sns.heatmap(cm, annot=True, fmt="d", xticklabels=labels, yticklabels=labels, cmap="Blues", annot_kws={"size": 16})
    plt.title(title)
    plt.xlabel("Predicted")
    plt.ylabel("True")
    plt.tight_layout()
    plt.savefig(save_path)
    plt.close()

# =========================================================
# Step1: データ読み込み
# =========================================================
def step1_load_data(csv_path):
    df = pd.read_csv(csv_path)
    print("=== Step1: データ読み込み完了 ===")
    print(df.head())
    return df

# =========================================================
# Step2: データ分割 & モデル学習
# =========================================================
def step2_train_model(df):
    SPLIT_DIR = "ogvc_split_data"
    TRAIN_DIR = os.path.join(SPLIT_DIR, "train")
    TEST_DIR = os.path.join(SPLIT_DIR, "test")
    os.makedirs(TRAIN_DIR, exist_ok=True)
    os.makedirs(TEST_DIR, exist_ok=True)

    feature_df = df.drop(columns=["emotion", "intensity"])
    numeric_cols = feature_df.select_dtypes(include=[np.number]).columns
    X = feature_df[numeric_cols]

    y_emo = df["emotion"].map(EMOTION_LABELS)
    y_int = df["intensity"]

    X_train, X_test, emo_train, emo_test, int_train, int_test = train_test_split(
        X, y_emo, y_int, test_size=0.2, random_state=42, stratify=y_emo
    )

    train_df = df.loc[X_train.index]
    test_df = df.loc[X_test.index]
    train_df.to_csv(os.path.join(TRAIN_DIR, "train_data-re1.csv"), index=False)
    test_df.to_csv(os.path.join(TEST_DIR, "test_data-re1.csv"), index=False)

    emo_model = LogisticRegression(max_iter=3000).fit(X_train, emo_train)
    int_model = LogisticRegression(max_iter=3000).fit(X_train, int_train)

    print("=== Step2: モデル学習完了 ===")

    return {
        "X_test": X_test,
        "filename_test": df.loc[X_test.index, "filename"] if "filename" in df.columns else None,
        "emo_test": emo_test,
        "int_test": int_test,
        "emo_model": emo_model,
        "int_model": int_model
    }

# =========================================================
# Step3: 評価 & CSV出力
# =========================================================
def step3_evaluate(data):
    X_test = data["X_test"]
    filename_test = data["filename_test"]
    emo_test = data["emo_test"]
    int_test = data["int_test"]
    emo_model = data["emo_model"]
    int_model = data["int_model"]

    emo_pred = emo_model.predict(X_test)
    int_pred = int_model.predict(X_test)

    print("\n=== Step3: 評価 ===")
    print("\nEmotion Classification Report")
    print(classification_report(emo_test, emo_pred))
    print("\nIntensity Classification Report")
    print(classification_report(int_test, int_pred))

    # 予測結果 CSV 保存
    emotion_correct = ["〇" if t == p else "×" for t, p in zip(emo_test, emo_pred)]
    intensity_correct = ["〇" if t == p else "×" for t, p in zip(int_test, int_pred)]
    pred_df = pd.DataFrame({
        "filename": filename_test,
        "emotion_true": emo_test,
        "emotion_pred": emo_pred,
        "intensity_true": int_test,
        "intensity_pred": int_pred,
        "emotion_correct": emotion_correct,
        "intensity_correct": intensity_correct
    })
    pred_path = os.path.join(RESULT_DIR, "prediction_results-re1.csv")
    pred_df.to_csv(pred_path, index=False)
    print("\n予測結果を保存しました →", pred_path)

    # 混同行列
    emo_cm_path = os.path.join(RESULT_DIR, "confusion_emotion-re1.png")
    int_cm_path = os.path.join(RESULT_DIR, "confusion_intensity-re1.png")

    plot_conf_matrix(
        emo_test, emo_pred, list(EMOTION_LABELS.keys()),
        " OGVC Emotion Confusion Matrix", emo_cm_path,
        exclude_labels=["NEU", "OTH"]
    )

    plot_conf_matrix(   # 3を追加
        int_test, int_pred, ["0", "1", "2", "3"],
        "OGVC Intensity Confusion Matrix", int_cm_path
    )

    print("混同行列保存 →", emo_cm_path)
    print("混同行列保存 →", int_cm_path)

# =========================================================
# メイン処理
# =========================================================
if __name__ == "__main__":
    df = step1_load_data("VA_results_bulk/OGVC_VAD_last_hidden_bulk-re.csv")
    data = step2_train_model(df)
    step3_evaluate(data)
    print("\n=== 完了：Step1 → Step2 → Step3 ===")


=== Step1: データ読み込み完了 ===
    utt_id         filename   valence   arousal  dominance  intensity emotion  \
0  FOY0101  FOY0101ANT0.wav  0.343053  0.333032   0.417036          0     ANT   
1  FOY0101  FOY0101ANT1.wav  0.358071  0.425703   0.479033          1     ANT   
2  FOY0101  FOY0101ANT2.wav  0.449238  0.607365   0.616360          2     ANT   
3  FOY0101  FOY0101ANT3.wav  0.427220  0.620275   0.616097          3     ANT   
4  FOY0104  FOY0104FEA0.wav  0.337510  0.301136   0.398823          0     FEA   

        h_0       h_1       h_2  ...    h_1014    h_1015    h_1016    h_1017  \
0 -0.007525  0.006666 -0.010108  ...  0.003701  0.011701 -0.019870  0.007911   
1 -0.007513  0.006847 -0.007410  ...  0.003669  0.011756 -0.013553  0.008053   
2 -0.007452  0.006693 -0.007730  ...  0.003666  0.011478 -0.031439  0.008050   
3 -0.007458  0.006260 -0.007765  ...  0.003586  0.011238 -0.018411  0.008144   
4 -0.007517  0.005590 -0.009640  ...  0.004394  0.011929 -0.033647  0.008054   

     h_

In [2]:
# 強度を回帰で求める
# 保存先；results_ogvc_emo_int/pred_emo-bunrui_int-kaiki-re2.csv

# ======================================================
# OGVC 完全版
# Emotion : 分類（Logistic Regression）
# Intensity : 回帰（Ridge Regression）
# ======================================================

import os
import pandas as pd
import numpy as np

from sklearn.model_selection import train_test_split
from sklearn.metrics import (
    confusion_matrix,
    classification_report,
    mean_absolute_error,
    mean_squared_error
)
from sklearn.linear_model import LogisticRegression, Ridge

import seaborn as sns
import matplotlib.pyplot as plt

# ================================
# 設定
# ================================
RESULT_DIR = "results_ogvc_emo_int"
os.makedirs(RESULT_DIR, exist_ok=True)

EMOTION_LABELS = {
    "JOY": 0, "ACC": 1, "FEA": 2, "SUR": 3,
    "SAD": 4, "DIS": 5, "ANG": 6, "ANT": 7,
    "NEU": 8, "OTH": 9
}

# =========================================================
# 混同行列プロット（Emotion 用）
# =========================================================
def plot_conf_matrix(true, pred, labels, title, save_path, exclude_labels=None):
    cm = confusion_matrix(true, pred, labels=range(len(labels)))

    if exclude_labels is not None:
        exclude_idx = [labels.index(l) for l in exclude_labels]
        cm = np.delete(cm, exclude_idx, axis=0)
        cm = np.delete(cm, exclude_idx, axis=1)
        labels = [l for l in labels if l not in exclude_labels]

    plt.figure(figsize=(8, 6))
    sns.heatmap(
        cm,
        annot=True,
        fmt="d",
        xticklabels=labels,
        yticklabels=labels,
        cmap="Blues",
        annot_kws={"size": 16}      # 数値を大きく
    )
    plt.title(title)
    plt.xlabel("Predicted")
    plt.ylabel("True")
    plt.tight_layout()
    plt.savefig(save_path)
    plt.close()

# =========================================================
# Step1: データ読み込み
# =========================================================
def step1_load_data(csv_path):
    df = pd.read_csv(csv_path)
    print("=== Step1: データ読み込み完了 ===")
    print(df.head())
    return df

# =========================================================
# Step2: データ分割 & モデル学習
# =========================================================
def step2_train_model(df):
    SPLIT_DIR = "ogvc_split_data"
    TRAIN_DIR = os.path.join(SPLIT_DIR, "train")
    TEST_DIR = os.path.join(SPLIT_DIR, "test")
    os.makedirs(TRAIN_DIR, exist_ok=True)
    os.makedirs(TEST_DIR, exist_ok=True)

    # 特徴量
    feature_df = df.drop(columns=["emotion", "intensity"])
    numeric_cols = feature_df.select_dtypes(include=[np.number]).columns
    X = feature_df[numeric_cols]

    # ラベル
    y_emo = df["emotion"].map(EMOTION_LABELS)
    y_int = df["intensity"]  # ← 連続値のまま

    # 分割（Emotion で stratify）
    X_train, X_test, emo_train, emo_test, int_train, int_test = train_test_split(
        X, y_emo, y_int,
        test_size=0.2,
        random_state=42,
        stratify=y_emo
    )

    # 分割データ保存
    train_df = df.loc[X_train.index]
    test_df = df.loc[X_test.index]
    train_df.to_csv(os.path.join(TRAIN_DIR, "train_data-re2.csv"), index=False)
    test_df.to_csv(os.path.join(TEST_DIR, "test_data-re2.csv"), index=False)

    # ===== モデル =====
    emo_model = LogisticRegression(max_iter=3000)
    emo_model.fit(X_train, emo_train)

    int_model = Ridge(alpha=1.0)
    int_model.fit(X_train, int_train)

    print("=== Step2: モデル学習完了 ===")

    return {
        "X_test": X_test,
        "filename_test": df.loc[X_test.index, "filename"] if "filename" in df.columns else None,
        "emo_test": emo_test,
        "int_test": int_test,
        "emo_model": emo_model,
        "int_model": int_model
    }

# =========================================================
# Step3: 評価 & CSV出力
# =========================================================
def step3_evaluate(data):
    X_test = data["X_test"]
    filename_test = data["filename_test"]
    emo_test = data["emo_test"]
    int_test = data["int_test"]
    emo_model = data["emo_model"]
    int_model = data["int_model"]

    # 予測
    emo_pred = emo_model.predict(X_test)
    int_pred = int_model.predict(X_test)  # 回帰（連続値）

    # ========================
    # Emotion（分類）
    # ========================
    print("\n=== Emotion Classification ===")
    print(classification_report(emo_test, emo_pred))

    emo_cm_path = os.path.join(RESULT_DIR, "confusion_emotion-re2.png")
    plot_conf_matrix(
        emo_test,
        emo_pred,
        list(EMOTION_LABELS.keys()),
        "OGVC Emotion Confusion Matrix",
        emo_cm_path,
        exclude_labels=["NEU", "OTH"]
    )
    print("Emotion 混同行列保存 →", emo_cm_path)

    # ========================
    # Intensity（回帰）
    # ========================
    mae = mean_absolute_error(int_test, int_pred)
    mse = mean_squared_error(int_test, int_pred)
    rmse = np.sqrt(mse)

    print("\n=== Intensity Regression ===")
    print(f"MAE  : {mae:.4f}")
    print(f"MSE  : {mse:.4f}")
    print(f"RMSE : {rmse:.4f}")

    # ========================
    # 予測結果 CSV 保存
    # ========================
    pred_df = pd.DataFrame({
        "filename": filename_test,
        "emotion_true": emo_test,
        "emotion_pred": emo_pred,
        "intensity_true": int_test,
        "intensity_pred": int_pred,
        "intensity_error": int_pred - int_test
    })

    pred_path = os.path.join(RESULT_DIR, "pred_emo-bunrui_int-kaiki-re2.csv")
    pred_df.to_csv(pred_path, index=False)
    print("予測結果 CSV 保存 →", pred_path)

# =========================================================
# メイン処理
# =========================================================
if __name__ == "__main__":
    df = step1_load_data("VA_results_bulk/OGVC_VAD_last_hidden_bulk-re.csv")
    data = step2_train_model(df)
    step3_evaluate(data)
    print("\n=== 完了：Emotion=分類 / Intensity=回帰 ===")

=== Step1: データ読み込み完了 ===
    utt_id         filename   valence   arousal  dominance  intensity emotion  \
0  FOY0101  FOY0101ANT0.wav  0.343053  0.333032   0.417036          0     ANT   
1  FOY0101  FOY0101ANT1.wav  0.358071  0.425703   0.479033          1     ANT   
2  FOY0101  FOY0101ANT2.wav  0.449238  0.607365   0.616360          2     ANT   
3  FOY0101  FOY0101ANT3.wav  0.427220  0.620275   0.616097          3     ANT   
4  FOY0104  FOY0104FEA0.wav  0.337510  0.301136   0.398823          0     FEA   

        h_0       h_1       h_2  ...    h_1014    h_1015    h_1016    h_1017  \
0 -0.007525  0.006666 -0.010108  ...  0.003701  0.011701 -0.019870  0.007911   
1 -0.007513  0.006847 -0.007410  ...  0.003669  0.011756 -0.013553  0.008053   
2 -0.007452  0.006693 -0.007730  ...  0.003666  0.011478 -0.031439  0.008050   
3 -0.007458  0.006260 -0.007765  ...  0.003586  0.011238 -0.018411  0.008144   
4 -0.007517  0.005590 -0.009640  ...  0.004394  0.011929 -0.033647  0.008054   

     h_

In [3]:
# 予測結果の感情を数値からラベルに変換
import os
import pandas as pd

# ================================
# 設定
# ================================
RESULT_DIR = "results_ogvc_emo_int"
pred_path = os.path.join(RESULT_DIR, "pred_emo-bunrui_int-kaiki-re2.csv")

out_path = os.path.join(
    RESULT_DIR,
    "pred_results_with_emotion_label-re2.csv"
)

# ================================
# 感情ラベル対応表
# ================================
EMOTION_ID2LABEL = {
    0: "JOY",
    1: "ACC",
    2: "FEA",
    3: "SUR",
    4: "SAD",
    5: "DIS",
    6: "ANG",
    7: "ANT"
}

# ================================
# CSV読み込み
# ================================
df = pd.read_csv(pred_path)

# ================================
# 感情カテゴリのみラベル化
# ================================
df["emotion_true_label"] = df["emotion_true"].map(EMOTION_ID2LABEL)
df["emotion_pred_label"] = df["emotion_pred"].map(EMOTION_ID2LABEL)

# ※ intensity_true / intensity_pred はそのまま残す

# ================================
# 保存
# ================================
df.to_csv(out_path, index=False)

print(f"Saved emotion-labeled results to: {out_path}")

Saved emotion-labeled results to: results_ogvc_emo_int/pred_results_with_emotion_label-re2.csv


In [4]:
# 予測結果(pred_results_with_emotion_label.csv)を読み込んで，全体＋感情別 MAE / RMSEを出力
import os
import pandas as pd
import numpy as np

# ================================
# 設定
# ================================
RESULT_DIR = "results_ogvc_emo_int"
csv_path = os.path.join(
    RESULT_DIR,
    "pred_results_with_emotion_label-re2.csv"
)

out_path = os.path.join(
    RESULT_DIR,
    "eval_emotion_wise_intensity_mae_rmse-re2.csv"
)

# ================================
# CSV読み込み
# ================================
df = pd.read_csv(csv_path)

# ================================
# 誤差計算（既存列を使用）
# ================================
# intensity_error = pred - true
df["abs_error"] = df["intensity_error"].abs()
df["sq_error"] = df["intensity_error"] ** 2

# ================================
# 1️⃣ モデル全体の MAE / RMSE
# ================================
overall_mae = df["abs_error"].mean()
overall_rmse = np.sqrt(df["sq_error"].mean())

print("=== Overall Intensity Performance ===")
print(f"MAE  : {overall_mae:.4f}")
print(f"RMSE : {overall_rmse:.4f}")

# ================================
# 2️⃣ 感情カテゴリ別 MAE / RMSE
# ================================
emotion_metrics = (
    df.groupby("emotion_true_label")
      .agg(
          MAE=("abs_error", "mean"),
          RMSE=("sq_error", lambda x: np.sqrt(x.mean())),
          Count=("abs_error", "count")
      )
      .reset_index()
)

print("\n=== Emotion-wise Intensity Performance ===")
print(emotion_metrics)

# ================================
# 3️⃣ 保存（論文・図表用）
# ================================
emotion_metrics.to_csv(out_path, index=False)
print(f"\nSaved emotion-wise MAE / RMSE to: {out_path}")

=== Overall Intensity Performance ===
MAE  : 0.4987
RMSE : 0.6294

=== Emotion-wise Intensity Performance ===
  emotion_true_label       MAE      RMSE  Count
0                ACC  0.591620  0.730829     64
1                ANG  0.489957  0.639905     64
2                ANT  0.521203  0.641177     64
3                DIS  0.531545  0.687275     64
4                FEA  0.442996  0.553311     64
5                JOY  0.386811  0.471833     67
6                SAD  0.562957  0.696239     68
7                SUR  0.469864  0.585042     77

Saved emotion-wise MAE / RMSE to: results_ogvc_emo_int/eval_emotion_wise_intensity_mae_rmse-re2.csv
