In [1]:
import os
import pandas as pd

def load_accent_info(accent_file):
    """
    アクセント情報を読み込む関数。
    """
    with open(accent_file, "r") as f:
        accent_lines = f.readlines()
    accent_info = [list(map(int, line.strip().split(","))) for line in accent_lines]
    return accent_info

def combine_features_with_accent(features_dir, accent_info, output_dir):
    """
    音素と特徴量にアクセント情報を組み合わせて保存する関数。
    """
    # 入力ディレクトリのCSVファイルを取得
    feature_files = sorted([f for f in os.listdir(features_dir) if f.endswith(".csv")])
    
    os.makedirs(output_dir, exist_ok=True)  # 出力ディレクトリを作成

    for idx, feature_file in enumerate(feature_files):
        feature_path = os.path.join(features_dir, feature_file)
        output_path = os.path.join(output_dir, feature_file)
        
        if idx >= len(accent_info):
            print(f"Warning: Accent information is missing for {feature_file}. Skipping.")
            continue

        # 特徴量ファイルを読み込む
        features_df = pd.read_csv(feature_path)
        
        # アクセント情報を取得
        accents = accent_info[idx]
        
        # 音素数とアクセント数が一致しない場合のチェック
        if len(features_df) != len(accents):
            print(f"Error: Mismatch in length for {feature_file}. Features: {len(features_df)}, Accents: {len(accents)}")
            continue

        # アクセント情報を結合
        features_df["Accent"] = accents

        # 出力ファイルとして保存
        features_df.to_csv(output_path, index=False)
        print(f"Saved combined features with accent to {output_path}")

# メイン処理
if __name__ == "__main__":
    features_directory = "./basic5000_HuBERT_features_csv"  # 音素と特徴量が保存されたディレクトリ
    accent_file_path = "./jsut_accent_onsoretu/onsoretu_re_re_split.csv"  # アクセント情報ファイル
    output_directory = "./basic5000_features_with_accent"  # 出力先ディレクトリ

    # アクセント情報を読み込み
    accent_information = load_accent_info(accent_file_path)

    # 音素と特徴量にアクセントを結合
    combine_features_with_accent(features_directory, accent_information, output_directory)

Saved combined features with accent to ./basic5000_features_with_accent/alignment_0001.csv
Saved combined features with accent to ./basic5000_features_with_accent/alignment_0002.csv
Saved combined features with accent to ./basic5000_features_with_accent/alignment_0003.csv
Saved combined features with accent to ./basic5000_features_with_accent/alignment_0004.csv
Saved combined features with accent to ./basic5000_features_with_accent/alignment_0005.csv
Saved combined features with accent to ./basic5000_features_with_accent/alignment_0006.csv
Saved combined features with accent to ./basic5000_features_with_accent/alignment_0007.csv
Saved combined features with accent to ./basic5000_features_with_accent/alignment_0008.csv
Saved combined features with accent to ./basic5000_features_with_accent/alignment_0009.csv
Saved combined features with accent to ./basic5000_features_with_accent/alignment_0010.csv
Saved combined features with accent to ./basic5000_features_with_accent/alignment_0011.csv

In [7]:
# エラーを別ファイルとして保存（デバッグ用）
import os
import pandas as pd

def load_accent_info(accent_file):
    """
    アクセント情報を読み込む関数。
    """
    with open(accent_file, "r") as f:
        accent_lines = f.readlines()
    accent_info = [list(map(int, line.strip().split(","))) for line in accent_lines]
    return accent_info

def log_error(error_message, error_file_name):
    """
    エラーメッセージとファイル名をエラーログに保存する関数。
    """
    error_dir = "error_logs"
    os.makedirs(error_dir, exist_ok=True)  # エラーログディレクトリを作成

    error_log_path = os.path.join(error_dir, error_file_name.replace('.csv', '_error.txt'))
    with open(error_log_path, "w") as f:
        f.write(error_message)

def combine_features_with_accent(features_dir, accent_info, output_dir):
    """
    音素と特徴量にアクセント情報を組み合わせて保存する関数。
    """
    # 入力ディレクトリのCSVファイルを取得
    feature_files = sorted([f for f in os.listdir(features_dir) if f.endswith(".csv")])
    
    os.makedirs(output_dir, exist_ok=True)  # 出力ディレクトリを作成

    for idx, feature_file in enumerate(feature_files):
        feature_path = os.path.join(features_dir, feature_file)
        output_path = os.path.join(output_dir, feature_file)
        
        if idx >= len(accent_info):
            print(f"Warning: Accent information is missing for {feature_file}. Skipping.")
            continue

        # 特徴量ファイルを読み込む
        features_df = pd.read_csv(feature_path)
        
        # アクセント情報を取得
        accents = accent_info[idx]
        
        # 音素数とアクセント数が一致しない場合のチェック
        if len(features_df) != len(accents):
            error_message = f"Error: Mismatch in length for {feature_file}. Features: {len(features_df)}, Accents: {len(accents)}"
            print(error_message)
            log_error(error_message, feature_file)  # エラーログを保存
            continue

        # アクセント情報を結合
        features_df["Accent"] = accents

        # 出力ファイルとして保存
        features_df.to_csv(output_path, index=False)
        print(f"Saved combined features with accent to {output_path}")

# メイン処理
if __name__ == "__main__":
    features_directory = "./basic5000_HuBERT_features_csv"  # 音素と特徴量が保存されたディレクトリ
    accent_file_path = "./jsut_accent_onsoretu/onsoretu_re_re_fi_split.csv"  # アクセント情報ファイル,フィは１文字
    output_directory = "./basic5000_features_with_accent"  # 出力先ディレクトリ

    # アクセント情報を読み込み
    accent_information = load_accent_info(accent_file_path)

    # 音素と特徴量にアクセントを結合
    combine_features_with_accent(features_directory, accent_information, output_directory)

Saved combined features with accent to ./basic5000_features_with_accent/alignment_0001.csv
Saved combined features with accent to ./basic5000_features_with_accent/alignment_0002.csv
Saved combined features with accent to ./basic5000_features_with_accent/alignment_0003.csv
Saved combined features with accent to ./basic5000_features_with_accent/alignment_0004.csv
Saved combined features with accent to ./basic5000_features_with_accent/alignment_0005.csv
Saved combined features with accent to ./basic5000_features_with_accent/alignment_0006.csv
Saved combined features with accent to ./basic5000_features_with_accent/alignment_0007.csv
Saved combined features with accent to ./basic5000_features_with_accent/alignment_0008.csv
Saved combined features with accent to ./basic5000_features_with_accent/alignment_0009.csv
Saved combined features with accent to ./basic5000_features_with_accent/alignment_0010.csv
Saved combined features with accent to ./basic5000_features_with_accent/alignment_0011.csv

In [8]:
import os
import pandas as pd

def load_accent_info(accent_file):
    """
    アクセント情報を読み込む関数。
    """
    with open(accent_file, "r") as f:
        accent_lines = f.readlines()
    accent_info = [list(map(int, line.strip().split(","))) for line in accent_lines]
    return accent_info

def log_error(error_message, error_file_name):
    """
    エラーメッセージとファイル名をエラーログに保存する関数。
    """
    error_dir = "error_logs"
    os.makedirs(error_dir, exist_ok=True)  # エラーログディレクトリを作成

    error_log_path = os.path.join(error_dir, error_file_name.replace('.csv', '_error.txt'))
    with open(error_log_path, "w") as f:
        f.write(error_message)

def combine_features_with_accent(features_dir, accent_info, output_dir):
    """
    音素と特徴量にアクセント情報を組み合わせて保存する関数。
    """
    # 入力ディレクトリのCSVファイルを取得
    feature_files = sorted([f for f in os.listdir(features_dir) if f.endswith(".csv")])
    
    os.makedirs(output_dir, exist_ok=True)  # 出力ディレクトリを作成

    error_count = 0  # エラーのカウントを初期化

    for idx, feature_file in enumerate(feature_files):
        feature_path = os.path.join(features_dir, feature_file)
        output_path = os.path.join(output_dir, feature_file)
        
        if idx >= len(accent_info):
            print(f"Warning: Accent information is missing for {feature_file}. Skipping.")
            continue

        # 特徴量ファイルを読み込む
        features_df = pd.read_csv(feature_path)
        
        # アクセント情報を取得
        accents = accent_info[idx]
        
        # 音素数とアクセント数が一致しない場合のチェック
        if len(features_df) != len(accents):
            error_message = f"Error: Mismatch in length for {feature_file}. Features: {len(features_df)}, Accents: {len(accents)}"
            print(error_message)
            log_error(error_message, feature_file)  # エラーログを保存
            error_count += 1  # エラーをカウント
            continue

        # アクセント情報を結合
        features_df["Accent"] = accents

        # 出力ファイルとして保存
        features_df.to_csv(output_path, index=False)
        print(f"Saved combined features with accent to {output_path}")

    # 最終的なエラーの総数を出力
    print(f"Total errors: {error_count}")

# メイン処理
if __name__ == "__main__":
    features_directory = "./basic5000_HuBERT_features_csv"  # 音素と特徴量が保存されたディレクトリ
    accent_file_path = "./jsut_accent_onsoretu/onsoretu_re_re_fi_split.csv"  # アクセント情報ファイル,フィは１文字
    output_directory = "./basic5000_features_with_accent"  # 出力先ディレクトリ

    # アクセント情報を読み込み
    accent_information = load_accent_info(accent_file_path)

    # 音素と特徴量にアクセントを結合
    combine_features_with_accent(features_directory, accent_information, output_directory)

Saved combined features with accent to ./basic5000_features_with_accent/alignment_0001.csv
Saved combined features with accent to ./basic5000_features_with_accent/alignment_0002.csv
Saved combined features with accent to ./basic5000_features_with_accent/alignment_0003.csv
Saved combined features with accent to ./basic5000_features_with_accent/alignment_0004.csv
Saved combined features with accent to ./basic5000_features_with_accent/alignment_0005.csv
Saved combined features with accent to ./basic5000_features_with_accent/alignment_0006.csv
Saved combined features with accent to ./basic5000_features_with_accent/alignment_0007.csv
Saved combined features with accent to ./basic5000_features_with_accent/alignment_0008.csv
Saved combined features with accent to ./basic5000_features_with_accent/alignment_0009.csv
Saved combined features with accent to ./basic5000_features_with_accent/alignment_0010.csv
Saved combined features with accent to ./basic5000_features_with_accent/alignment_0011.csv