In [1]:
import pandas as pd
import os
import glob
import re 

In [2]:
# Since the collider names are too detailed, here we create a dictionary with patterns to classify them into our categories of interest

patterns = {'\d{2}_Sa':'Passive_Agent', '\d{2}_Cma':'Active_Agent', 'Building_\d+': 'Building'}
patterns.update(dict.fromkeys(['Castle-TaskBuilding_56', 'Crane_59','HighSilo-TaskBuilding_49', 'Windmill-TaskBuilding_10_1', 'Church-TaskBuilding_16'], 'Global_Landmark'))
patterns.update(dict.fromkeys(['^TaskBuilding_2$','^TaskBuilding_3$', '^TaskBuilding_5$', '^TaskBuilding_8$', '^TaskBuilding_9$', '^TaskBuilding_11$', '^TaskBuilding_13$', '^TaskBuilding_14$', '^TaskBuilding_20$', 
                               '^TaskBuilding_21$', '^TaskBuilding_23$','^TaskBuilding_27$', '^TaskBuilding_29$', '^TaskBuilding_32$', '^TaskBuilding_34$',  '^TaskBuilding_38$', '^TaskBuilding_41$', '^TaskBuilding_42$', 
                               '^TaskBuilding_44$', '^TaskBuilding_45$', '^TaskBuilding_47$', '^TaskBuilding_50$', '^TaskBuilding_51$', '^TaskBuilding_52$', 'BasketballCourt_58', 'Construction_57', 
                               '^Graffity_02$', '^Graffity_03$', '^Graffity_05$', '^Graffity_08$', '^Graffity_09$', '^Graffity_11$', '^Graffity_13$', '^Graffity_14$', '^Graffity_20$', 
                               '^Graffity_21$', '^Graffity_23$', '^Graffity_27$', '^Graffity_29$', '^Graffity_32$', '^Graffity_34$', '^Graffity_38$', '^Graffity_41$', '^Graffity_42$', 
                               '^Graffity_44$', '^Graffity_45$', '^Graffity_47$',  '^Graffity_50$', '^Graffity_51$', '^Graffity_52$'], 'TaskBuilding_Public'))

patterns.update(dict.fromkeys(['^TaskBuilding_1$','^TaskBuilding_4$', '^TaskBuilding_6$', '^TaskBuilding_7$', '^TaskBuilding_12$', '^TaskBuilding_15$', '^TaskBuilding_17$', '^TaskBuilding_18$', '^TaskBuilding_19$', 
                               '^TaskBuilding_22$', '^TaskBuilding_24$','^TaskBuilding_25$', '^TaskBuilding_26$', '^TaskBuilding_28$', '^TaskBuilding_30$',  '^TaskBuilding_31$', '^TaskBuilding_33$', '^TaskBuilding_35$', 
                               '^TaskBuilding_36$', '^TaskBuilding_37$', '^TaskBuilding_39$', '^TaskBuilding_40$', '^TaskBuilding_43$', '^TaskBuilding_48$', '^TaskBuilding_54$','^TaskBuilding_55$',
                               '^Graffity_01$','^Graffity_04$', '^Graffity_06$', '^Graffity_07$', '^Graffity_12$', '^Graffity_15$', '^Graffity_17$', '^Graffity_18$', '^Graffity_19$', '^Graffity_22$', 
                               '^Graffity_24$','^Graffity_25$', '^Graffity_26$', '^Graffity_28$', '^Graffity_30$',  '^Graffity_31$', '^Graffity_33$', '^Graffity_35$', '^Graffity_36$', '^Graffity_37$', '^Graffity_39$', 
                               '^Graffity_40$', '^Graffity_43$', '^Graffity_48$', '^Graffity_54$', '^Graffity_55$' ], 'TaskBuilding_Residential'))
default_val = 'Background'

In [3]:
# Define the directory where individual entropy results are saved
output_entropy_dir = "/Volumes/TwoTeras/0_Experiment_1/Entropy_Results/Window/entropy_results/"
entropy_log_path = "/Volumes/TwoTeras/0_Experiment_1/Entropy_Results/entropy_log_Window_reduced.csv"

# Get list of all entropy result CSV files
entropy_files = glob.glob(os.path.join(output_entropy_dir, "*.csv"))

# Initialize a list to collect DataFrames
dfs = []

# Loop through each file and append its DataFrame with additional columns
for file in entropy_files:
    try:
        # Extract Participant_ID and Session from the file name (assuming format participantID_Session.csv)
        filename = os.path.basename(file)
        participant_id, session = filename.split("_")[0], filename.split("_")[1].split(".")[0]
        
        # Read the file
        df = pd.read_csv(file)

        # Add Participant_ID and Session columns
        df['Participant_ID'] = participant_id
        df['Session'] = session

        # Append to the list of DataFrames
        dfs.append(df)
    except Exception as e:
        print(f"Error reading file {file}: {e}")

# Concatenate all DataFrames into one
if dfs:
    general_entropy_df = pd.concat(dfs, ignore_index=True)

    # Create the new column Agent_ID
    general_entropy_df['Agent_ID'] = general_entropy_df['Collider_Name'].str[:2]
    # Apply the patterns to classify Collider_CategoricalN
    #general_entropy_df['Category'] = general_entropy_df['Collider_Name'].apply( lambda x: next((val for key, val in patterns.items() if re.match(key, x)), default_val))
    
    # Create the new column Agent_Type
    general_entropy_df['Agent_Type'] = general_entropy_df['Agent_ID'].astype(int).apply(lambda x: 'Active' if x < 28 else 'Passive')
    general_entropy_df['Experiment'] = 1

    # Save the concatenated DataFrame
    general_entropy_df.to_csv(entropy_log_path, index=False)
    print(f"General entropy log saved to {entropy_log_path}")
else:
    print("No entropy result files found or unable to concatenate.")


General entropy log saved to /Volumes/TwoTeras/0_Experiment_1/Entropy_Results/entropy_log_Window_reduced.csv


In [4]:
general_entropy_df.tail()

Unnamed: 0,Trial_ID,Collider_Name,Occurrence_Order,Gaze_Sequence_Length,Overall_Transition_Entropy,Transition_Entropy_Background,Stationary_Entropy_Background,Transition_Entropy_Task_Building,Stationary_Entropy_Task_Building,Transition_Entropy_Agent_Body,...,Stationary_Entropy_Agent_Face,Transition_Entropy_Building,Stationary_Entropy_Building,Transition_Entropy_Global_Landmark,Stationary_Entropy_Global_Landmark,Participant_ID,Session,Agent_ID,Agent_Type,Experiment
4843,40_Sa_Trial_3,40_Sa,3,150,0.562394,0.499885,0.257607,0.637925,0.255293,0.0,...,,0.607563,0.253149,,,9586,5,40,Passive,1
4844,46_Sa_Trial_16,46_Sa,16,151,0.444617,0.384948,0.164654,0.75,0.123695,0.0,...,,0.583826,0.232115,,,9586,5,46,Passive,1
4845,51_Sa_Trial_1,51_Sa,1,194,0.425357,0.397248,0.195825,0.541615,0.179259,0.580279,...,0.014425,0.395843,0.199684,0.0,0.015225,9586,5,51,Passive,1
4846,52_Sa_Trial_18,52_Sa,18,177,0.530779,0.476561,0.228094,0.693172,0.175534,0.623971,...,0.018253,0.522522,0.226794,,,9586,5,52,Passive,1
4847,55_Sa_Trial_14,55_Sa,14,120,0.393099,0.354238,0.163678,,,0.0,...,,0.596645,0.284069,,,9586,5,55,Passive,1


In [5]:
# Create the new column Agent_ID
general_entropy_df['Agent_ID'] = general_entropy_df['Collider_Name'].str[:2]

In [6]:
general_entropy_df.head()

Unnamed: 0,Trial_ID,Collider_Name,Occurrence_Order,Gaze_Sequence_Length,Overall_Transition_Entropy,Transition_Entropy_Background,Stationary_Entropy_Background,Transition_Entropy_Task_Building,Stationary_Entropy_Task_Building,Transition_Entropy_Agent_Body,...,Stationary_Entropy_Agent_Face,Transition_Entropy_Building,Stationary_Entropy_Building,Transition_Entropy_Global_Landmark,Stationary_Entropy_Global_Landmark,Participant_ID,Session,Agent_ID,Agent_Type,Experiment
0,01_Cma_Trial_29,01_Cma,29,67,0.555086,0.493764,0.220533,0.457068,0.213448,0.853095,...,0.135445,0.430677,0.065833,,,479,2,1,Active,1
1,01_Cma_Trial_30,01_Cma,30,162,0.510063,0.402488,0.191296,0.703778,0.152449,0.70946,...,0.10613,0.621311,0.175451,0.0,0.016861,479,2,1,Active,1
2,01_Cma_Trial_4,01_Cma,4,209,0.412627,0.382464,0.165358,0.294292,0.163017,0.637429,...,0.014322,0.699894,0.101369,0.384543,0.086765,479,2,1,Active,1
3,02_Cma_Trial_15,02_Cma,15,216,0.410557,0.314163,0.165863,0.680774,0.144406,0.355245,...,0.073459,0.525876,0.173873,0.0,0.014112,479,2,2,Active,1
4,03_Cma_Trial_6,03_Cma,6,232,0.465332,0.454917,0.201529,0.409081,0.200927,0.470101,...,0.19939,0.424315,0.086615,,,479,2,3,Active,1
