In [None]:
# OGVC
# 産総研のizanamiで使用されているwav2vec2の出力をogvc_izanami_features.csvに保存
# wav2vec2_vad_ogvc.ipynbとの比較, カーネル；ser

import os
import re
import csv
import soundfile as sf
import torch
import numpy as np
from transformers import AutoFeatureExtractor, AutoModel
import librosa
import numpy as np

# ===============================
# 設定
# ===============================
ROOT_WAV_DIR = "/autofs/diamond2/share/diamond/corpus/OGVC/Vol2/Acted/wav"
MODEL_NAME = "imprt/izanami-wav2vec2-large"
OUTPUT_CSV = "ogvc_izanami_features.csv"

DEVICE = "cuda" if torch.cuda.is_available() else "cpu"

# ===============================
# モデルロード
# ===============================
feature_extractor = AutoFeatureExtractor.from_pretrained(MODEL_NAME)
model = AutoModel.from_pretrained(MODEL_NAME).to(DEVICE)
model.eval()

# ===============================
# ファイル名解析用 regex
# FOY0101ANT0.wav → emotion=ANT, intensity=0
# ===============================
FILENAME_PATTERN = re.compile(r"^[A-Z0-9]+([A-Z]{3})(\d)\.wav$")

# ===============================
# CSV ヘッダ作成
# ===============================
rows = []
header_written = False

# ===============================
# wav 以下を再帰探索
# ===============================
for root, _, files in os.walk(ROOT_WAV_DIR):
    for fname in files:
        if not fname.lower().endswith(".wav"):
            continue

        match = FILENAME_PATTERN.match(fname)
        if match is None:
            print(f"Skip (filename mismatch): {fname}")
            continue

        emotion = match.group(1)
        intensity = int(match.group(2))
        wav_path = os.path.join(root, fname)

        # ===============================
        # 音声ロード & リサンプリング
        # ===============================
        audio, sr = sf.read(wav_path)

        # stereo → mono
        if audio.ndim > 1:
            audio = audio.mean(axis=1)

        # resample to 16kHz（←ここが重要）
        if sr != 16000:
            audio = librosa.resample(audio, orig_sr=sr, target_sr=16000)
            sr = 16000

        # ===============================
        # 特徴量抽出
        # ===============================
        inputs = feature_extractor(
            audio,
            sampling_rate=sr,
            return_tensors="pt"
        )

        inputs = {k: v.to(DEVICE) for k, v in inputs.items()}

        with torch.no_grad():
            outputs = model(**inputs)
            hidden_states = outputs.last_hidden_state  # (T, D) or (1, T, D)

        # (1, T, D) → (T, D)
        hidden_states = hidden_states.squeeze(0)

        # ===============================
        # 時間方向 mean pooling
        # ===============================
        feature_vector = hidden_states.mean(dim=0).cpu().numpy()  # (D,)

        # ===============================
        # CSV 用 row 作成
        # ===============================
        row = [fname, emotion, intensity] + feature_vector.tolist()
        rows.append(row)

        # ヘッダは最初の1回だけ作る
        if not header_written:
            header = (
                ["filename", "emotion", "intensity"]
                + [f"ssl_{i}" for i in range(len(feature_vector))]
            )
            header_written = True

# ===============================
# CSV 書き込み
# ===============================
with open(OUTPUT_CSV, "w", newline="", encoding="utf-8") as f:
    writer = csv.writer(f)
    writer.writerow(header)
    writer.writerows(rows)

print(f"Saved to {OUTPUT_CSV}")

  from .autonotebook import tqdm as notebook_tqdm


Saved to ogvc_izanami_features.csv


In [13]:
# ======================================================
# OGVC 版：sklearnで Emotion→分類/intensity→回帰
# ======================================================

import os
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, classification_report
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.linear_model import LogisticRegression
from sklearn.linear_model import Ridge              # 強度用の回帰
from sklearn.metrics import mean_absolute_error, mean_squared_error
import numpy as np

# ================================
# 設定
# ================================
RESULT_DIR = "results_ogvc_emo_int-izanami"     # 結果
os.makedirs(RESULT_DIR, exist_ok=True)

EMOTION_LABELS = {
    "JOY": 0, "ACC": 1, "FEA": 2, "SUR": 3,
    "SAD": 4, "DIS": 5, "ANG": 6, "ANT": 7,
    "NEU": 8, "OTH": 9
}

# =========================================================
# 混同行列プロット
# =========================================================
def plot_conf_matrix(true, pred, labels, title, save_path, exclude_labels=None):
    cm = confusion_matrix(true, pred, labels=range(len(labels)))

    if exclude_labels is not None:
        exclude_idx = [labels.index(l) for l in exclude_labels]
        cm = np.delete(cm, exclude_idx, axis=0)
        cm = np.delete(cm, exclude_idx, axis=1)
        labels = [l for l in labels if l not in exclude_labels]

    plt.figure(figsize=(8, 6))
    sns.heatmap(cm, annot=True, fmt="d", xticklabels=labels, yticklabels=labels, cmap="Blues")
    plt.title(title)
    plt.xlabel("Predicted")
    plt.ylabel("True")
    plt.tight_layout()
    plt.savefig(save_path)
    plt.close()

# =========================================================
# Step1: データ読み込み
# =========================================================
def step1_load_data(csv_path):
    df = pd.read_csv(csv_path)
    print("=== Step1: データ読み込み完了 ===")
    print(df.head())
    return df

# =========================================================
# Step2: データ分割 & モデル学習
# =========================================================
def step2_train_model(df):
    import numpy as np
    from sklearn.model_selection import train_test_split
    from sklearn.preprocessing import StandardScaler
    from sklearn.linear_model import LogisticRegression, Ridge
    import os

    SPLIT_DIR = "ogvc_split_data-izanami"
    TRAIN_DIR = os.path.join(SPLIT_DIR, "train")
    TEST_DIR = os.path.join(SPLIT_DIR, "test")
    os.makedirs(TRAIN_DIR, exist_ok=True)
    os.makedirs(TEST_DIR, exist_ok=True)

    # 特徴量とラベル
    feature_df = df.drop(columns=["emotion", "intensity"])
    numeric_cols = feature_df.select_dtypes(include=[np.number]).columns
    X = feature_df[numeric_cols]

    y_emo = df["emotion"].map(EMOTION_LABELS)
    y_int = df["intensity"]

    filename = df["filename"]

    # 分割
    X_train, X_test, emo_train, emo_test, int_train, int_test, filename_train, filename_test = train_test_split(
        X, y_emo, y_int, filename, test_size=0.2, random_state=42, stratify=y_emo
    )

    # === ここが本質 ===
    X_train_np = X_train.to_numpy(dtype=np.float32)
    X_test_np  = X_test.to_numpy(dtype=np.float32)

    scaler = StandardScaler()
    X_train_np = scaler.fit_transform(X_train_np)
    X_test_np  = scaler.transform(X_test_np)

    # 学習
    emo_model = LogisticRegression(
        max_iter=3000,
        class_weight="balanced"
    ).fit(X_train_np, emo_train)

    int_model = Ridge().fit(X_train_np, int_train)

    return {
        "emo_model": emo_model,
        "int_model": int_model,
        "X_test": X_test_np,
        "emo_test": emo_test,
        "int_test": int_test,
        "filename_test": filename_test.values,
    }

# =========================================================
# Step3: 評価 & CSV出力
# =========================================================
def step3_evaluate(data):
    X_test = data["X_test"]
    filename_test = data["filename_test"]
    emo_test = data["emo_test"]
    int_test = data["int_test"]
    emo_model = data["emo_model"]
    int_model = data["int_model"]

    emo_pred = emo_model.predict(X_test)
    int_pred = int_model.predict(X_test)

    print("\n=== Step3: 評価 ===")

    # ===== 感情（分類）=====
    print("\nEmotion Classification Report")
    print(classification_report(emo_test, emo_pred))

    # ===== 強度（回帰）=====
    mae = mean_absolute_error(int_test, int_pred)
    rmse = np.sqrt(mean_squared_error(int_test, int_pred))

    print("\nIntensity Regression Metrics")
    print(f"MAE : {mae:.4f}")
    print(f"RMSE: {rmse:.4f}")

    # ===== CSV 保存 =====
    emotion_correct = ["〇" if t == p else "×" for t, p in zip(emo_test, emo_pred)]
    intensity_error = np.abs(int_test - int_pred)

    pred_df = pd.DataFrame({
        "filename": filename_test,
        "emotion_true": emo_test,
        "emotion_pred": emo_pred,
        "emotion_correct": emotion_correct,
        "intensity_true": int_test,
        "intensity_pred": int_pred,
        "intensity_abs_error": intensity_error
    })

    pred_path = os.path.join(RESULT_DIR, "prediction_results.csv")
    pred_df.to_csv(pred_path, index=False)
    print("\n予測結果を保存しました →", pred_path)

    # ===== 混同行列（感情のみ）=====
    emo_cm_path = os.path.join(RESULT_DIR, "confusion_emotion.png")

    plot_conf_matrix(
        emo_test,
        emo_pred,
        list(EMOTION_LABELS.keys()),
        "OGVC Emotion Confusion Matrix",
        emo_cm_path,
        exclude_labels=["NEU", "OTH"]
    )

    print("混同行列保存 →", emo_cm_path)

# =========================================================
# 実行部
# =========================================================
if __name__ == "__main__":
    df = step1_load_data("ogvc_izanami_features.csv")
    data = step2_train_model(df)
    step3_evaluate(data)
    print("\n=== 完了：Step1 → Step2 → Step3 ===")

=== Step1: データ読み込み完了 ===
          filename emotion  intensity    ssl_0     ssl_1     ssl_2     ssl_3  \
0  FOY0806ANG2.wav     ANG          2  0.00653 -0.107648 -0.213318 -0.308118   
1  FOY0101ANT2.wav     ANT          2  0.00653 -0.107648 -0.213318 -0.308119   
2  FOY0101ANT3.wav     ANT          3  0.00653 -0.107648 -0.213318 -0.308119   
3  FOY0104FEA0.wav     FEA          0  0.00653 -0.107648 -0.213318 -0.308119   
4  FOY0104FEA3.wav     FEA          3  0.00653 -0.107648 -0.213318 -0.308118   

      ssl_4     ssl_5     ssl_6  ...  ssl_1014  ssl_1015  ssl_1016  ssl_1017  \
0 -0.065852 -0.095156 -0.152033  ... -0.239009  0.015603 -0.278374 -0.082475   
1 -0.065852 -0.095156 -0.152033  ... -0.239009  0.015603 -0.278374 -0.082475   
2 -0.065852 -0.095156 -0.152033  ... -0.239009  0.015603 -0.278374 -0.082475   
3 -0.065852 -0.095156 -0.152033  ... -0.239009  0.015603 -0.278374 -0.082475   
4 -0.065852 -0.095156 -0.152033  ... -0.239009  0.015603 -0.278374 -0.082475   

   ssl_1018  

In [1]:
# 予測結果の感情を数値からラベルに変換
import os
import pandas as pd

# ================================
# 設定
# ================================
RESULT_DIR = "results_ogvc_emo_int-izanami"
pred_path = os.path.join(RESULT_DIR, "prediction_results.csv")

out_path = os.path.join(
    RESULT_DIR,
    "pred_results_with_emotion_label.csv"
)

# ================================
# 感情ラベル対応表
# ================================
EMOTION_ID2LABEL = {
    0: "JOY",
    1: "ACC",
    2: "FEA",
    3: "SUR",
    4: "SAD",
    5: "DIS",
    6: "ANG",
    7: "ANT"
}

# ================================
# CSV読み込み
# ================================
df = pd.read_csv(pred_path)

# ================================
# 感情カテゴリのみラベル化
# ================================
df["emotion_true_label"] = df["emotion_true"].map(EMOTION_ID2LABEL)
df["emotion_pred_label"] = df["emotion_pred"].map(EMOTION_ID2LABEL)

# ※ intensity_true / intensity_pred はそのまま残す

# ================================
# 保存
# ================================
df.to_csv(out_path, index=False)

print(f"Saved emotion-labeled results to: {out_path}")

Saved emotion-labeled results to: results_ogvc_emo_int-izanami/pred_results_with_emotion_label.csv


In [3]:
# 予測結果(pred_results_with_emotion_label.csv)を読み込んで，全体＋感情別 MAE / RMSEを出力
import os
import pandas as pd
import numpy as np

# ================================
# 設定
# ================================
RESULT_DIR = "results_ogvc_emo_int-izanami"
csv_path = os.path.join(
    RESULT_DIR,
    "pred_results_with_emotion_label.csv"
)

out_path = os.path.join(
    RESULT_DIR,
    "eval_emotion_wise_intensity_mae_rmse.csv"
)

# ================================
# CSV読み込み
# ================================
df = pd.read_csv(csv_path)

# ================================
# 誤差計算（RMSE用）
# ================================
signed_error = df["intensity_pred"] - df["intensity_true"]
df["sq_error"] = signed_error ** 2

# ================================
# 1️⃣ モデル全体 MAE / RMSE
# ================================
overall_mae = df["intensity_abs_error"].mean()
overall_rmse = np.sqrt(df["sq_error"].mean())

print("=== Overall Intensity Performance ===")
print(f"MAE  : {overall_mae:.4f}")
print(f"RMSE : {overall_rmse:.4f}")

# ================================
# 2️⃣ 感情カテゴリ別 MAE / RMSE
# ================================
emotion_metrics = (
    df.groupby("emotion_true_label")
      .agg(
          MAE=("intensity_abs_error", "mean"),
          RMSE=("sq_error", lambda x: np.sqrt(x.mean())),
          Count=("filename", "count")
      )
      .reset_index()
)

print("\n=== Emotion-wise Intensity Performance ===")
print(emotion_metrics)

# ================================
# 保存
# ================================
emotion_metrics.to_csv(out_path, index=False)
print(f"\nSaved to: {out_path}")


=== Overall Intensity Performance ===
MAE  : 1.0229
RMSE : 1.2662

=== Emotion-wise Intensity Performance ===
  emotion_true_label       MAE      RMSE  Count
0                ACC  1.101547  1.371877     64
1                ANG  0.972356  1.176386     64
2                ANT  1.247847  1.576385     64
3                DIS  1.080584  1.341359     64
4                FEA  0.936044  1.109407     64
5                JOY  0.908127  1.120762     67
6                SAD  1.112807  1.299792     68
7                SUR  0.857064  1.092550     77

Saved to: results_ogvc_emo_int-izanami/eval_emotion_wise_intensity_mae_rmse.csv


In [None]:
# 強度を回帰ではなく分類してみる(番外編)
import os
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, classification_report
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.linear_model import LogisticRegression
from sklearn.linear_model import Ridge              # 強度用の回帰
from sklearn.metrics import mean_absolute_error, mean_squared_error
import numpy as np

# ================================
# 設定
# ================================
RESULT_DIR = "results_ogvc_emo_int-izanami-bunrui"     # 結果
os.makedirs(RESULT_DIR, exist_ok=True)

EMOTION_LABELS = {
    "JOY": 0, "ACC": 1, "FEA": 2, "SUR": 3,
    "SAD": 4, "DIS": 5, "ANG": 6, "ANT": 7,
    "NEU": 8, "OTH": 9
}

# =========================================================
# 混同行列プロット
# =========================================================
def plot_conf_matrix(true, pred, labels, title, save_path, exclude_labels=None):
    cm = confusion_matrix(true, pred, labels=range(len(labels)))

    if exclude_labels is not None:
        exclude_idx = [labels.index(l) for l in exclude_labels]
        cm = np.delete(cm, exclude_idx, axis=0)
        cm = np.delete(cm, exclude_idx, axis=1)
        labels = [l for l in labels if l not in exclude_labels]

    plt.figure(figsize=(8, 6))
    sns.heatmap(cm, annot=True, fmt="d", xticklabels=labels, yticklabels=labels, cmap="Blues")
    plt.title(title)
    plt.xlabel("Predicted")
    plt.ylabel("True")
    plt.tight_layout()
    plt.savefig(save_path)
    plt.close()

# =========================================================
# Step1: データ読み込み
# =========================================================
def step1_load_data(csv_path):
    df = pd.read_csv(csv_path)
    print("=== Step1: データ読み込み完了 ===")
    print(df.head())
    return df

def step2_train_model(df):
    from sklearn.preprocessing import StandardScaler
    from sklearn.linear_model import LogisticRegression
    import os
    import numpy as np

    SPLIT_DIR = "ogvc_split_data-izanami"
    TRAIN_DIR = os.path.join(SPLIT_DIR, "train")
    TEST_DIR = os.path.join(SPLIT_DIR, "test")
    os.makedirs(TRAIN_DIR, exist_ok=True)
    os.makedirs(TEST_DIR, exist_ok=True)

    # 特徴量
    feature_df = df.drop(columns=["emotion", "intensity"])
    numeric_cols = feature_df.select_dtypes(include=[np.number]).columns
    X = feature_df[numeric_cols]

    # ラベル
    y_emo = df["emotion"].map(EMOTION_LABELS)
    y_int_cls = df["intensity"].astype(int)   # ★ 分類なので int

    filename = df["filename"]

    # 分割
    X_train, X_test, emo_train, emo_test, int_train, int_test, filename_train, filename_test = train_test_split(
        X, y_emo, y_int_cls, filename,
        test_size=0.2,
        random_state=42,
        stratify=y_emo
    )

    # numpy 化 + 標準化
    X_train_np = X_train.to_numpy(dtype=np.float32)
    X_test_np  = X_test.to_numpy(dtype=np.float32)

    scaler = StandardScaler()
    X_train_np = scaler.fit_transform(X_train_np)
    X_test_np  = scaler.transform(X_test_np)

    # ===== モデル学習 =====
    emo_model = LogisticRegression(
        max_iter=3000,
        class_weight="balanced"
    ).fit(X_train_np, emo_train)

    int_model = LogisticRegression(
        max_iter=3000,
        class_weight="balanced"
    ).fit(X_train_np, int_train)

    return {
        "emo_model": emo_model,
        "int_model": int_model,
        "X_test": X_test_np,
        "emo_test": emo_test,
        "int_test": int_test,
        "filename_test": filename_test.values,
    }

def step3_evaluate(data):
    X_test = data["X_test"]
    filename_test = data["filename_test"]
    emo_test = data["emo_test"]
    int_test = data["int_test"]
    emo_model = data["emo_model"]
    int_model = data["int_model"]

    emo_pred = emo_model.predict(X_test)
    int_pred = int_model.predict(X_test)

    print("\n=== Step3: 評価 ===")

    # ===== 感情 =====
    print("\nEmotion Classification Report")
    print(classification_report(emo_test, emo_pred))

    # ===== 強度（分類）=====
    print("\nIntensity Classification Report")
    print(classification_report(int_test, int_pred))

    # ===== CSV 保存 =====
    emotion_correct = ["〇" if t == p else "×" for t, p in zip(emo_test, emo_pred)]
    intensity_correct = ["〇" if t == p else "×" for t, p in zip(int_test, int_pred)]

    pred_df = pd.DataFrame({
        "filename": filename_test,
        "emotion_true": emo_test,
        "emotion_pred": emo_pred,
        "emotion_correct": emotion_correct,
        "intensity_true": int_test,
        "intensity_pred": int_pred,
        "intensity_correct": intensity_correct
    })

    pred_path = os.path.join(RESULT_DIR, "prediction_results.csv")
    pred_df.to_csv(pred_path, index=False)
    print("\n予測結果を保存しました →", pred_path)

    # ===== 混同行列 =====
    emo_cm_path = os.path.join(RESULT_DIR, "confusion_emotion.png")
    int_cm_path = os.path.join(RESULT_DIR, "confusion_intensity.png")

    plot_conf_matrix(
        emo_test,
        emo_pred,
        list(EMOTION_LABELS.keys()),
        "OGVC Emotion Confusion Matrix",
        emo_cm_path,
        exclude_labels=["NEU", "OTH"]
    )

    plot_conf_matrix(       # 3を追加
        int_test,
        int_pred,
        ["0", "1", "2", "3"],
        "OGVC Intensity Classification Confusion Matrix",
        int_cm_path
    )

    print("混同行列保存 →", emo_cm_path)
    print("混同行列保存 →", int_cm_path)

# =========================================================
# 実行部
# =========================================================
if __name__ == "__main__":
    df = step1_load_data("ogvc_izanami_features.csv")
    data = step2_train_model(df)
    step3_evaluate(data)
    print("\n=== 完了：Step1 → Step2 → Step3 ===")

=== Step1: データ読み込み完了 ===
          filename emotion  intensity    ssl_0     ssl_1     ssl_2     ssl_3  \
0  FOY0806ANG2.wav     ANG          2  0.00653 -0.107648 -0.213318 -0.308118   
1  FOY0101ANT2.wav     ANT          2  0.00653 -0.107648 -0.213318 -0.308119   
2  FOY0101ANT3.wav     ANT          3  0.00653 -0.107648 -0.213318 -0.308119   
3  FOY0104FEA0.wav     FEA          0  0.00653 -0.107648 -0.213318 -0.308119   
4  FOY0104FEA3.wav     FEA          3  0.00653 -0.107648 -0.213318 -0.308118   

      ssl_4     ssl_5     ssl_6  ...  ssl_1014  ssl_1015  ssl_1016  ssl_1017  \
0 -0.065852 -0.095156 -0.152033  ... -0.239009  0.015603 -0.278374 -0.082475   
1 -0.065852 -0.095156 -0.152033  ... -0.239009  0.015603 -0.278374 -0.082475   
2 -0.065852 -0.095156 -0.152033  ... -0.239009  0.015603 -0.278374 -0.082475   
3 -0.065852 -0.095156 -0.152033  ... -0.239009  0.015603 -0.278374 -0.082475   
4 -0.065852 -0.095156 -0.152033  ... -0.239009  0.015603 -0.278374 -0.082475   

   ssl_1018  