In [17]:
import polars as pl
import seaborn as sns

sns.set_theme(style="whitegrid", palette="gray", font_scale=1.4)

In [18]:
df_sample = pl.read_csv("./result/SampleMetaData_rows.csv")
df_sample = df_sample.rename({"id": "sample_meta_data_id"})
df_sample.group_by(["model_id", "model_name", "kind"]).len().sort(["model_id"])

model_id,model_name,kind,len
i64,str,str,u32
-1,"""dummy""","""dummy""",3
0,"""20240621_134621""","""abs""",212
1,"""20240621_134621""","""gt""",212
2,"""20240621_134621""","""pred""",212
3,"""20240621_155144""","""pred""",212
4,"""20240621_202419""","""pred""",212
5,"""20240622_003027""","""pred""",212
6,"""20240622_103111""","""pred""",212
7,"""20240622_161416""","""pred""",212
8,"""20240623_001016""","""pred""",212


In [19]:
df_answer = pl.read_csv("./result/Answers_rows.csv")
df_sample = pl.read_csv("./result/SampleMetaData_rows.csv")
df_sample = df_sample.rename({"id": "sample_meta_data_id"})
df_respondent = pl.read_csv("./result/Respondents_rows.csv")
df_respondent = df_respondent.rename({"id": "respondent_id"})

df_respondent = df_respondent.with_columns(
    pl.col("sex").replace({"男性": "M", "女性": "F", "無回答": "N"}),
    pl.col("audio_device").replace({"イヤホン": "Earphone", "ヘッドホン": "Headphone"}),
)

df_sample = df_sample.with_columns(
    pl.col("model_id").replace(
        {
            0: "(4)",  # 分析合成
            1: "(5)",  # 原音声
            2: "(1)",  # ベースライン
            3: "(2-a)",  # HuBERTなし メル・連続
            4: "(2-b)",  # HuBERTなし メル・離散
            5: "(2-c)",  # HuBERTなし メル・連続・離散
            6: "(3-a)",  # HuBERTあり メル・連続
            7: "(3-c)",  # HuBERTあり メル・連続・離散
            8: "(3-b)",  # HuBERTあり メル・離散
        }
    )
)

df_answer = df_answer.join(
    other=df_sample, on=["sample_meta_data_id"], how="left", coalesce=True
)
df_answer = df_answer.join(
    other=df_respondent,
    on="respondent_id",
    how="left",
    coalesce=True,
)
df_answer = df_answer.filter(
    pl.col("is_dummy").not_()
    & pl.col("is_invalid").not_()
    & (pl.col("sample_page_name") == "eval_1")
)

In [20]:
df_agg = (
    df_answer.group_by(["speaker_name", "sample_name", "model_id"])
    .agg(
        pl.col("naturalness_id").mean().alias("nat_mean"),
        pl.col("intelligibility_id").mean().alias("int_mean"),
    )
    .sort(by=["speaker_name", "sample_name", "model_id"])
)
df_model_id_int_max = (
    df_agg.filter(pl.col("model_id").is_in(["(4)", "(5)"]).not_())
    .sort(["speaker_name", "sample_name", "int_mean"], descending=False)
    .group_by(["speaker_name", "sample_name"])
    .last()
    .select(["speaker_name", "sample_name", "model_id"])
    .rename({"model_id": "model_id_int_max"})
)
df_model_id_nat_max = (
    df_agg.filter(pl.col("model_id").is_in(["(4)", "(5)"]).not_())
    .sort(["speaker_name", "sample_name", "nat_mean"], descending=False)
    .group_by(["speaker_name", "sample_name"])
    .last()
    .select(["speaker_name", "sample_name", "model_id"])
    .rename({"model_id": "model_id_nat_max"})
)
df_model_id_max = df_model_id_int_max.join(
    df_model_id_nat_max, on=["speaker_name", "sample_name"], how="inner"
)
df_agg = df_agg.join(
    other=df_model_id_max,
    on=["speaker_name", "sample_name"],
    how="left",
    coalesce=True,
)
df_agg = df_agg.filter(
    (pl.col("model_id_int_max") == "(2-b)") & (pl.col("model_id_nat_max") == "(2-b)")
)
df_agg = df_agg.join(
    df_sample.select(["model_id", "model_name", "kind"]).unique(),
    on=["model_id"],
    how="left",
    coalesce=True,
)
df_agg = df_agg.sort(
    [
        "speaker_name",
        "sample_name",
        "model_id",
    ]
)
df_agg.write_csv("./df_agg.csv")
df_agg

speaker_name,sample_name,model_id,nat_mean,int_mean,model_id_int_max,model_id_nat_max,model_name,kind
str,str,str,f64,f64,str,str,str,str
"""F01_kablab""","""ATR503_j36""","""(1)""",2.333333,2.0,"""(2-b)""","""(2-b)""","""20240621_134621""","""pred"""
"""F01_kablab""","""ATR503_j36""","""(2-a)""",1.333333,3.0,"""(2-b)""","""(2-b)""","""20240621_155144""","""pred"""
"""F01_kablab""","""ATR503_j36""","""(2-b)""",3.333333,3.666667,"""(2-b)""","""(2-b)""","""20240621_202419""","""pred"""
"""F01_kablab""","""ATR503_j36""","""(2-c)""",2.333333,2.0,"""(2-b)""","""(2-b)""","""20240622_003027""","""pred"""
"""F01_kablab""","""ATR503_j36""","""(3-a)""",1.666667,2.333333,"""(2-b)""","""(2-b)""","""20240622_103111""","""pred"""
…,…,…,…,…,…,…,…,…
"""M04_kablab""","""ATR503_j15""","""(3-a)""",1.666667,1.666667,"""(2-b)""","""(2-b)""","""20240622_103111""","""pred"""
"""M04_kablab""","""ATR503_j15""","""(3-b)""",1.666667,2.333333,"""(2-b)""","""(2-b)""","""20240623_001016""","""pred"""
"""M04_kablab""","""ATR503_j15""","""(3-c)""",2.333333,2.333333,"""(2-b)""","""(2-b)""","""20240622_161416""","""pred"""
"""M04_kablab""","""ATR503_j15""","""(4)""",3.333333,4.0,"""(2-b)""","""(2-b)""","""20240621_134621""","""abs"""
