In [None]:
# Megu
# Goal: find patterns between participants who edit a specific body part vs their BDD values 


# setup
import pandas as pd
from pathlib import Path
from collections import Counter
from functools import reduce

object_list = [
    "HeadSliderBackgroundY", "HeadSliderBackgroundZ", "HeadSliderBackgroundXYZ",
    "BustSliderBackgroundX", "BustSliderBackgroundY", "BustSliderBackgroundZ", "BustSliderBackgroundXYZ",
    "ArmsSliderBackgroundX", "ArmsSliderBackgroundY", "ArmsSliderBackgroundZ", "ArmsSliderBackgroundXYZ",
    "WaistSliderBackgroundX", "WaistSliderBackgroundY", "WaistSliderBackgroundZ", "WaistSliderBackgroundXYZ",
    "HipsSliderBackgroundX", "HipsSliderBackgroundY", "HipsSliderBackgroundZ", "HipsSliderBackgroundXYZ",
    "LegsSliderBackgroundX", "LegsSliderBackgroundY", "LegsSliderBackgroundZ", "LegsSliderBackgroundXYZ"
]

# I have a folder called BP_AllL_S
    # /P1_L_S.....P43_L_S 
filenames = ['1', '4', '5', '6', '7', '11', '13', '14', '17', '18', '21', '22', '23', '24', '31', '32', '33', '34', '35', '36', '37', '39', '40', '42', '43']
root_folder="BP_AllL_S"

cols = []
for x in filenames:
    cols.append(f"Count_{x}")
print(cols)

qualtrics = pd.read_csv("[BP-Winter25]-RawData.csv", header = 0, usecols = ["Participant ID", "Pre_BDD_1", "Pre_BDD_2", "Pre_BDD_3", "Pre_BDD_4", "Pre_BDD_5", "Pre_BDD_6", "Pre_BDD_7"])
qualtrics = qualtrics.drop([0,1])

# Create the composite variables
bdd_col = []
p_col = []
qualtrics['Participant ID'] = qualtrics['Participant ID'].str.replace('P', '', regex=False)
qualtrics['Participant ID'] = qualtrics['Participant ID'].str.replace('(DISCARD)', '', regex=False)
qualtrics['Participant ID'] = qualtrics['Participant ID'].str.replace('DISCARD', '', regex=False)
qualtrics = qualtrics.dropna()
for index, row in qualtrics.iterrows():
    bdd_sum = pd.to_numeric(row["Pre_BDD_1"]) + pd.to_numeric(row["Pre_BDD_2"]) + pd.to_numeric(row["Pre_BDD_3"]) + pd.to_numeric(row["Pre_BDD_4"]) + pd.to_numeric(row["Pre_BDD_5"]) + pd.to_numeric(row["Pre_BDD_6"]) + pd.to_numeric(row["Pre_BDD_7"])
    bdd_col.append(bdd_sum)
        
print("bdd_col: ", bdd_col)
qualtrics["Pre_BDD_Composite"] = bdd_col


bp_simplified = qualtrics[['Participant ID', 'Pre_BDD_Composite']]
bp_simplified = bp_simplified.T
bp_simplified


['Count_1', 'Count_4', 'Count_5', 'Count_6', 'Count_7', 'Count_11', 'Count_13', 'Count_14', 'Count_17', 'Count_18', 'Count_21', 'Count_22', 'Count_23', 'Count_24', 'Count_31', 'Count_32', 'Count_33', 'Count_34', 'Count_35', 'Count_36', 'Count_37', 'Count_39', 'Count_40', 'Count_42', 'Count_43']
bdd_col:  [15, 11, 18, 19, 15, 13, 16, 10, 11, 14, 16, 15, 17, 13, 13, 8, 10, 17, 8, 13, 13, 16, 25, 10, 22, 23, 9, 21, 16, 13, 15, 13, 14, 13, 12, 13, 13, 12]


Unnamed: 0,2,3,4,5,6,7,8,9,10,11,...,30,31,32,33,35,36,37,38,39,40
Participant ID,1,4,5,7,8,9,11,5,13,14,...,34,35,36,37,38,39,40,41,42,43
Pre_BDD_Composite,15,11,18,19,15,13,16,10,11,14,...,16,13,15,13,14,13,12,13,13,12


In [2]:
# make the count dataframe

def make_countdf():
    root_dir = Path(root_folder)
    total_dfx = pd.DataFrame()
    dfs = []
    
    for x in filenames:
        # read the csv for all files
        file_path = root_dir / f"P{x}_L_S.csv"
        px = pd.read_csv(file_path, header = 0, delimiter=",", usecols = [2,6])
                
        # create a frequency count
        countsx = px['Slider Name'].value_counts()
        selected_countsx = {object:countsx.get(object, 0) for object in object_list}
        
        # create a dataframe with rows = slider parts, col = individual count
        total_dfx = pd.DataFrame(list(selected_countsx.items()), columns = ['Slider Name', "Counts"])
        total_dfx.rename(columns={"Counts":f"Count_{x}"}, inplace=True)
        dfs.append(total_dfx)
                
        # merge all of the individual columns into one, so row = slider parts, col = all counts
        merged_df = reduce(lambda left, right: pd.merge(left, right, on='Slider Name', how='outer'), dfs)

    # return all
    return merged_df    
    
# put it in a master dataframe
master_df = make_countdf()        
master_df.head()

Unnamed: 0,Slider Name,Count_1,Count_4,Count_5,Count_6,Count_7,Count_11,Count_13,Count_14,Count_17,...,Count_32,Count_33,Count_34,Count_35,Count_36,Count_37,Count_39,Count_40,Count_42,Count_43
0,ArmsSliderBackgroundX,0,224,0,0,0,0,172,0,26,...,42,15,72,100,344,57,157,0,16,56
1,ArmsSliderBackgroundXYZ,0,146,0,0,70,0,311,59,198,...,114,0,239,52,418,0,163,0,0,133
2,ArmsSliderBackgroundY,0,111,191,0,0,0,149,0,19,...,34,28,33,82,317,124,222,0,136,11
3,ArmsSliderBackgroundZ,0,198,1778,0,0,159,94,0,22,...,9,26,102,272,121,131,170,56,212,149
4,BustSliderBackgroundX,0,0,0,0,57,0,43,0,10,...,135,25,0,0,168,0,71,9,0,85


In [3]:
# combine the body part categories 

body_categories = ['Head', 'Bust', 'Arms', 'Waist', 'Hips', 'Legs']

def get_categ(slider_name):
    combined_df = pd.DataFrame()
    for categ in body_categories:
        if categ in slider_name:
            return categ
    return "Other"

master_df["Group"] = master_df["Slider Name"].apply(get_categ)
grouped_df = master_df.groupby("Group")[cols].sum()
grouped_df.head()
    

Unnamed: 0_level_0,Count_1,Count_4,Count_5,Count_6,Count_7,Count_11,Count_13,Count_14,Count_17,Count_18,...,Count_32,Count_33,Count_34,Count_35,Count_36,Count_37,Count_39,Count_40,Count_42,Count_43
Group,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Arms,0,679,1969,0,70,159,726,59,265,184,...,199,69,446,506,1200,312,712,56,364,349
Bust,0,128,0,0,168,0,190,148,82,145,...,403,43,0,58,407,0,71,34,141,370
Head,0,58,4,0,105,0,267,0,124,0,...,207,70,103,120,203,83,102,0,0,0
Hips,0,260,90,482,340,0,271,111,41,41,...,645,228,35,198,151,0,144,13,300,0
Legs,0,278,656,385,205,0,387,156,158,113,...,501,204,787,515,343,241,482,42,101,127


In [None]:
# add a BDD value to the bottom of this 

count_col = grouped_df.columns
count_col

# NOT UPDATED CODE BEYOND THIS POINT

# bdd_count_df = bp_simplified
# bdd_count_df

# for col in grouped_df:
#     col['Participant ID'] = 


SyntaxError: invalid syntax (2440982272.py, line 12)