In [1]:
import os
import pandas as pd

# ---------- Paths ----------
BASE_DIR = r"C:\PG, IELTS, DOCS\research paper\poetry project"
IN_FILE = os.path.join(BASE_DIR, "data_processed", "poetry_features_with_scores_v1.csv")
OUT_FILE = os.path.join(BASE_DIR, "data_processed", "participant_profiles_step1.csv")


def main():
    df = pd.read_csv(IN_FILE)

    # Core participant metrics
    base = (
        df.groupby("participant_id")
        .agg(
            avg_beauty_given=("aesthetic_appeal", "mean"),
            beauty_std=("aesthetic_appeal", "std"),
            avg_order_preference=("order_score", "mean"),
            avg_surprise_preference=("surprise_score", "mean"),
        )
        .reset_index()
    )

    # Average rating by poem type per participant
    type_pivot = (
        df.pivot_table(
            index="participant_id",
            columns="PoemType",
            values="aesthetic_appeal",
            aggfunc="mean"
        )
        .reset_index()
    )

    # Rename type columns if present
    rename_map = {}
    if "H" in type_pivot.columns:
        rename_map["H"] = "avg_rating_H"
    if "S" in type_pivot.columns:
        rename_map["S"] = "avg_rating_S"
    if "C" in type_pivot.columns:
        rename_map["C"] = "avg_rating_C"

    type_pivot = type_pivot.rename(columns=rename_map)

    # Merge
    profiles = base.merge(type_pivot, on="participant_id", how="left")

    # Preference gap features (nice for segmentation)
    if {"avg_rating_H", "avg_rating_C"}.issubset(profiles.columns):
        profiles["pref_H_minus_C"] = profiles["avg_rating_H"] - profiles["avg_rating_C"]

    if {"avg_rating_S", "avg_rating_C"}.issubset(profiles.columns):
        profiles["pref_S_minus_C"] = profiles["avg_rating_S"] - profiles["avg_rating_C"]

    # Save
    profiles.to_csv(OUT_FILE, index=False)

    print("Done!")
    print("Saved participant profiles to:", OUT_FILE)
    print("Participants:", len(profiles))

    print("\nPreview:")
    print(profiles.head())

    print("\nSummary:")
    print(profiles.describe(include="all").T)


if __name__ == "__main__":
    main()

Done!
Saved participant profiles to: C:\PG, IELTS, DOCS\research paper\poetry project\data_processed\participant_profiles_step1.csv
Participants: 51

Preview:
  participant_id  avg_beauty_given  beauty_std  avg_order_preference  \
0           P101          4.523810    1.394420         -1.900596e-16   
1           P102          2.990476    1.528017         -1.924387e-16   
2           P103          4.709524    1.715788         -1.913813e-16   
3           P104          3.890476    0.519850         -1.913813e-16   
4           P105          3.828571    1.039480         -1.913813e-16   

   avg_surprise_preference  avg_rating_C  avg_rating_H  avg_rating_S  \
0            -2.643388e-19      3.185714      5.357143      5.028571   
1            -3.172066e-18      1.185714      4.042857      3.742857   
2             0.000000e+00      3.771429      5.314286      5.042857   
3            -4.758099e-18      3.842857      3.985714      3.842857   
4             4.493760e-18      3.085714      4.