In [1]:
import pandas as pd
import os
import glob
import re

In [2]:
# Since the collider names are too detailed, here we create a dictionary with patterns to classify them into our categories of interest

patterns = {'\d{2}_Sa':'Passive_Agent', '\d{2}_Cma':'Active_Agent', 'Building_\d+': 'Building'}
patterns.update(dict.fromkeys(['Castle-TaskBuilding_56', 'Crane_59','HighSilo-TaskBuilding_49', 'Windmill-TaskBuilding_10_1', 'Church-TaskBuilding_16'], 'Global_Landmark'))
patterns.update(dict.fromkeys(['^TaskBuilding_2$','^TaskBuilding_3$', '^TaskBuilding_5$', '^TaskBuilding_8$', '^TaskBuilding_9$', '^TaskBuilding_11$', '^TaskBuilding_13$', '^TaskBuilding_14$', '^TaskBuilding_20$', 
                               '^TaskBuilding_21$', '^TaskBuilding_23$','^TaskBuilding_27$', '^TaskBuilding_29$', '^TaskBuilding_32$', '^TaskBuilding_34$',  '^TaskBuilding_38$', '^TaskBuilding_41$', '^TaskBuilding_42$', 
                               '^TaskBuilding_44$', '^TaskBuilding_45$', '^TaskBuilding_47$', '^TaskBuilding_50$', '^TaskBuilding_51$', '^TaskBuilding_52$', 'BasketballCourt_58', 'Construction_57', 
                               '^Graffity_02$', '^Graffity_03$', '^Graffity_05$', '^Graffity_08$', '^Graffity_09$', '^Graffity_11$', '^Graffity_13$', '^Graffity_14$', '^Graffity_20$', 
                               '^Graffity_21$', '^Graffity_23$', '^Graffity_27$', '^Graffity_29$', '^Graffity_32$', '^Graffity_34$', '^Graffity_38$', '^Graffity_41$', '^Graffity_42$', 
                               '^Graffity_44$', '^Graffity_45$', '^Graffity_47$',  '^Graffity_50$', '^Graffity_51$', '^Graffity_52$'], 'TaskBuilding_Public'))

patterns.update(dict.fromkeys(['^TaskBuilding_1$','^TaskBuilding_4$', '^TaskBuilding_6$', '^TaskBuilding_7$', '^TaskBuilding_12$', '^TaskBuilding_15$', '^TaskBuilding_17$', '^TaskBuilding_18$', '^TaskBuilding_19$', 
                               '^TaskBuilding_22$', '^TaskBuilding_24$','^TaskBuilding_25$', '^TaskBuilding_26$', '^TaskBuilding_28$', '^TaskBuilding_30$',  '^TaskBuilding_31$', '^TaskBuilding_33$', '^TaskBuilding_35$', 
                               '^TaskBuilding_36$', '^TaskBuilding_37$', '^TaskBuilding_39$', '^TaskBuilding_40$', '^TaskBuilding_43$', '^TaskBuilding_48$', '^TaskBuilding_54$','^TaskBuilding_55$',
                               '^Graffity_01$','^Graffity_04$', '^Graffity_06$', '^Graffity_07$', '^Graffity_12$', '^Graffity_15$', '^Graffity_17$', '^Graffity_18$', '^Graffity_19$', '^Graffity_22$', 
                               '^Graffity_24$','^Graffity_25$', '^Graffity_26$', '^Graffity_28$', '^Graffity_30$',  '^Graffity_31$', '^Graffity_33$', '^Graffity_35$', '^Graffity_36$', '^Graffity_37$', '^Graffity_39$', 
                               '^Graffity_40$', '^Graffity_43$', '^Graffity_48$', '^Graffity_54$', '^Graffity_55$' ], 'TaskBuilding_Residential'))
default_val = 'Background'

In [3]:
# Define the directory where individual entropy results are saved
output_entropy_dir = "/Volumes/TwoTeras/1_Experiment_2/Entropy_Results/Window/entropy_results/Chao_Shen/"
entropy_log_path = "/Volumes/TwoTeras/1_Experiment_2/Entropy_Results/entropy_log_Window_reduced.csv"

# Get list of all entropy result CSV files
entropy_files = glob.glob(os.path.join(output_entropy_dir, "*.csv"))

# Initialize a list to collect DataFrames
dfs = []

# Loop through each file and append its DataFrame with additional columns
for file in entropy_files:
    try:
        # Extract Participant_ID and Session from the file name (assuming format participantID_Session.csv)
        filename = os.path.basename(file)
        participant_id, session = filename.split("_")[2], filename.split("_")[3].split(".")[0]
        
        # Read the file
        df = pd.read_csv(file)

        # Add Participant_ID and Session columns
        df['Participant_ID'] = participant_id
        df['Session'] = session

        # Append to the list of DataFrames
        dfs.append(df)
    except Exception as e:
        print(f"Error reading file {file}: {e}")

# Concatenate all DataFrames into one
if dfs:
    general_entropy_df = pd.concat(dfs, ignore_index=True)

    # Create the new column Agent_ID
    general_entropy_df['Agent_ID'] = general_entropy_df['Collider_Name'].str[:2]
    # Apply the patterns to classify Collider_CategoricalN
    #general_entropy_df['Category'] = general_entropy_df['Collider_Name'].apply( lambda x: next((val for key, val in patterns.items() if re.match(key, x)), default_val))
    
    # Create the new column Agent_Type
    general_entropy_df['Agent_Type'] = general_entropy_df['Agent_ID'].astype(int).apply(lambda x: 'Active' if x < 28 else 'Passive')
    general_entropy_df['Experiment'] = 2

    # Save the concatenated DataFrame
    general_entropy_df.to_csv(entropy_log_path, index=False)
    print(f"General entropy log saved to {entropy_log_path}")
else:
    print("No entropy result files found or unable to concatenate.")



General entropy log saved to /Volumes/TwoTeras/1_Experiment_2/Entropy_Results/entropy_log_Window_reduced.csv


In [4]:
general_entropy_df.head()

Unnamed: 0,Trial_ID,Collider_Name,Occurrence_Order,Gaze_Sequence_Length,Chao_Shen_Overall_Transition_Entropy,Window_Start,Window_End,Chao_Shen_Transition_Entropy_Agent,Chao_Shen_Transition_Entropy_Background,Chao_Shen_Transition_Entropy_Task_Building,Chao_Shen_Transition_Entropy_Building,Chao_Shen_Transition_Entropy_Global_Landmark,Participant_ID,Session,Agent_ID,Agent_Type,Experiment
0,01_Cma_Trial_22,01_Cma,22,101,0.656785,2022-11-14 15:05:49.643743488,2022-11-14 15:06:19.643743488,0.413817,0.796446,0.542213,0.638921,,1031,1,1,Active,2
1,03_Cma_Trial_16,03_Cma,16,105,0.42018,2022-11-14 15:05:49.643743488,2022-11-14 15:06:19.643743488,0.593945,0.383358,,0.578749,,1031,1,3,Active,2
2,09_Cma_Trial_14,09_Cma,14,120,0.338976,2022-11-14 15:05:49.643743488,2022-11-14 15:06:19.643743488,0.440854,0.250277,0.774386,0.44822,,1031,1,9,Active,2
3,09_Cma_Trial_24,09_Cma,24,103,0.388994,2022-11-14 15:05:49.643743488,2022-11-14 15:06:19.643743488,0.0,0.307365,0.829424,0.581089,0.662497,1031,1,9,Active,2
4,09_Cma_Trial_26,09_Cma,26,104,0.420628,2022-11-14 15:05:49.643743488,2022-11-14 15:06:19.643743488,0.662497,0.367091,0.781214,0.605045,0.0,1031,1,9,Active,2


In [5]:
string = "/Volumes/TwoTeras/1_Experiment_2/Entropy_Results/Window/entropy_results/Chao_Shen_1031_1_entropy_results.csv"
filename = os.path.basename(string)
participant_id, session = filename.split("_")[2], filename.split("_")[3].split(".")[0]
session

'1'

In [6]:
general_entropy_df.Gaze_Sequence_Length.describe()

count    5613.000000
mean       94.851416
std        17.434421
min         1.000000
25%        87.000000
50%        97.000000
75%       106.000000
max       143.000000
Name: Gaze_Sequence_Length, dtype: float64

In [7]:
# Create the new column Agent_ID
general_entropy_df['Agent_ID'] = general_entropy_df['Collider_Name'].str[:2]

In [8]:
general_entropy_df.tail()

Unnamed: 0,Trial_ID,Collider_Name,Occurrence_Order,Gaze_Sequence_Length,Chao_Shen_Overall_Transition_Entropy,Window_Start,Window_End,Chao_Shen_Transition_Entropy_Agent,Chao_Shen_Transition_Entropy_Background,Chao_Shen_Transition_Entropy_Task_Building,Chao_Shen_Transition_Entropy_Building,Chao_Shen_Transition_Entropy_Global_Landmark,Participant_ID,Session,Agent_ID,Agent_Type,Experiment
5608,50_Sa_Trial_29,50_Sa,29,95,0.401273,2023-08-24 12:30:36.971539456,2023-08-24 12:31:06.971539456,0.0,0.378452,0.36965,0.498757,,5191,5,50,Passive,2
5609,52_Sa_Trial_22,52_Sa,22,103,0.650696,2023-08-24 12:30:36.971539456,2023-08-24 12:31:06.971539456,1.142857,0.64051,0.712898,0.631254,,5191,5,52,Passive,2
5610,53_Sa_Trial_12,53_Sa,12,107,0.552651,2023-08-24 12:30:36.971539456,2023-08-24 12:31:06.971539456,0.0,0.538624,0.760766,0.515511,,5191,5,53,Passive,2
5611,54_Sa_Trial_32,54_Sa,32,93,0.617715,2023-08-24 12:30:36.971539456,2023-08-24 12:31:06.971539456,0.906962,0.503092,0.800225,0.708914,,5191,5,54,Passive,2
5612,56_Sa_Trial_14,56_Sa,14,100,0.676142,2023-08-24 12:30:36.971539456,2023-08-24 12:31:06.971539456,0.978478,0.566974,0.841168,0.693578,,5191,5,56,Passive,2
