In [8]:
import os
import pandas as pd

# Directory containing merged user info files
merged_data_dir = os.path.abspath("../genre_user_info")

# Ensure directory exists
if not os.path.exists(merged_data_dir):
    raise FileNotFoundError(f"⚠️ The directory '{merged_data_dir}' does not exist. Check the path!")

# Get only the valid genre user files (exclude summary files)
merged_files = [f for f in os.listdir(merged_data_dir) if f.startswith("genre_user_info_merged_") and f.endswith(".csv")]

# Dictionary to store stats genre-wise
genre_stats = {}

# DataFrame to store all users with negative discrimination across all genres
negative_users_all_genres = pd.DataFrame()

# Dictionary to store discrimination and difficulty values for users across genres
user_discrimination_difficulty = {}

# ------------------ 1) Read and Process Each Genre ------------------
for file in merged_files:
    genre = file.replace("genre_user_info_merged_", "").replace(".csv", "")
    file_path = os.path.join(merged_data_dir, file)

    # Read the merged CSV file
    df = pd.read_csv(file_path)

    # Ensure correct column formatting
    df.columns = df.columns.str.strip()  # Remove leading/trailing spaces in column names

    # Skip if "discrimination" column is missing
    if "discrimination" not in df.columns:
        continue  

    # Filter users with negative discrimination values
    negative_users = df[df["discrimination"] < 0].copy()

    # Compute the percentage of users with negative discrimination
    negative_percentage = (len(negative_users) / len(df)) * 100 if len(df) > 0 else 0

    # Store genre-wise stats
    genre_stats[genre] = {
        "total_users": len(df),
        "negative_users": len(negative_users),
        "negative_percentage": negative_percentage,
    }

    # Keep track of users with negative discrimination
    negative_users["genre"] = genre  # Add genre column for reference
    negative_users_all_genres = pd.concat([negative_users_all_genres, negative_users], ignore_index=True)

    # Store discrimination & difficulty values for users across genres
    for _, row in negative_users.iterrows():
        user_id = row["user_id"]
        if user_id not in user_discrimination_difficulty:
            user_discrimination_difficulty[user_id] = {"user_id": user_id}
        user_discrimination_difficulty[user_id][f"{genre}_discrimination"] = row["discrimination"]
        user_discrimination_difficulty[user_id][f"{genre}_difficulty"] = row["difficulty"]

# ------------------ 2) Save Genre-Wise Stats ------------------
stats_df = pd.DataFrame.from_dict(genre_stats, orient="index")
stats_df.reset_index(inplace=True)
stats_df.rename(columns={"index": "genre"}, inplace=True)

# Save genre-wise stats
stats_output_path = os.path.join(merged_data_dir, "genre_wise_negative_discrimination_stats.csv")
stats_df.to_csv(stats_output_path, index=False)

# ------------------ 3) Save Negative Users Data ------------------
negative_users_output_path = os.path.join(merged_data_dir, "users_with_negative_discrimination.csv")
negative_users_all_genres.to_csv(negative_users_output_path, index=False)

# ------------------ 4) Save Users' Discrimination & Difficulty Across Genres ------------------
user_discrimination_difficulty_df = pd.DataFrame.from_dict(user_discrimination_difficulty, orient="index")
user_discrimination_difficulty_df.reset_index(drop=True, inplace=True)

user_stats_output_path = os.path.join(merged_data_dir, "negative_users_discrimination_difficulty_across_genres.csv")
user_discrimination_difficulty_df.to_csv(user_stats_output_path, index=False)

# ------------------ 5) Display Results ------------------
# import ace_tools as tools
# tools.display_dataframe_to_user(name="Genre-Wise Negative Discrimination Stats", dataframe=stats_df)
# tools.display_dataframe_to_user(name="Users with Negative Discrimination", dataframe=negative_users_all_genres)
# tools.display_dataframe_to_user(name="Negative Users' Discrimination & Difficulty Across Genres", dataframe=user_discrimination_difficulty_df)

print(f"\n✅ Process completed! Data saved in '{merged_data_dir}'")
print(f"📊 Genre-wise stats saved at: {stats_output_path}")
print(f"📄 Users with negative discrimination saved at: {negative_users_output_path}")
print(f"📊 Users' discrimination & difficulty across genres saved at: {user_stats_output_path}")



✅ Process completed! Data saved in 'D:\master_reserch\genre_user_info'
📊 Genre-wise stats saved at: D:\master_reserch\genre_user_info\genre_wise_negative_discrimination_stats.csv
📄 Users with negative discrimination saved at: D:\master_reserch\genre_user_info\users_with_negative_discrimination.csv
📊 Users' discrimination & difficulty across genres saved at: D:\master_reserch\genre_user_info\negative_users_discrimination_difficulty_across_genres.csv
