In [1]:
import pandas as pd
import numpy as np
from math import log2
from collections import Counter
import glob
import os

In [2]:
# Define paths
path = "/Volumes/TwoTeras/1_Experiment_2/Eye_Tracking/Pre_processed/05_Debbies_gaze" 
save_matrix_path = "/Volumes/TwoTeras/1_Experiment_2/Entropy_Results/Matrices/Reduced/"
entropy_log_path = "/Volumes/TwoTeras/1_Experiment_2/Entropy_Results/entropy_log_reduced.csv"

# Ensure directories exist
os.makedirs(save_matrix_path, exist_ok=True)

# Load all CSV files in the path
files = glob.glob(path + "/*.csv")

# Mapping dictionary
mapping = {
    "TaskBuilding_Public": "Task_Building",
    "TaskBuilding_Residential": "Task_Building",
    "Active_Agent": "Agent_Body",
    "Passive_Agent": "Agent_Body",
    "Active_Agent_Face": "Agent_Face",
    "Passive_Agent_Face": "Agent_Face"
}



# Initialize list to store entropy results
entropy_results = []
failed_files = []

# Function to calculate conditional entropy H(X|Y=y) for each row
def calculate_transition_entropy(matrix, stationary_distribution):
    total_entropy = 0
    category_entropies = {}
    for i, row in matrix.iterrows():
        row_entropy = 0
        for p in row:
            if p > 0:
                row_entropy -= p * log2(p)
        category_entropies[i] = row_entropy
        # Weight each row entropy by its stationary probability
        total_entropy += row_entropy * stationary_distribution[i]
    return total_entropy, category_entropies

# Loop through all files
for filename in files:
    try:
        print(f"Processing file: {filename}")  # Debugging statement

        # Read participant data
        One_participant = pd.read_csv(filename)
        if 'timeStampDataPointEnd' not in One_participant.columns or 'Collider_CategoricalN' not in One_participant.columns:
            print(f"Skipping {filename} due to missing columns.")
            failed_files.append({'filename': filename[-10:-4], 'error': 'Missing required columns'})
            continue

        # Convert timestamps (if required)
        try:
            One_participant['date_seconds'] = pd.to_datetime(One_participant['timeStampDataPointEnd'], unit='s')
        except Exception as e:
            print(f"Timestamp conversion error in {filename}: {e}")
            failed_files.append({'filename': filename[-10:-4], 'error': str(e)})
            continue

        # Filter for the desired gaze events
        data_Reduced = One_participant[One_participant['events'] == -2]
        
        # Check if data_Reduced is empty to avoid errors in transition matrix creation
        if data_Reduced.empty:
            print(f"No valid events for {filename[-10:-4]}, skipping.")
            failed_files.append({'filename': filename[-10:-4], 'error': 'No valid events in filtered data'})
            continue

        # Get gaze sequence for category transitions
        # Apply mapping
        data_Reduced = data_Reduced.copy()
        data_Reduced.loc[:, "Mapped_Column_Collider_Categorical"] = data_Reduced["Collider_CategoricalN"].replace(mapping)
        gaze_sequence = data_Reduced['Mapped_Column_Collider_Categorical'].reset_index(drop=True)

        # Create transition matrix
        categories = gaze_sequence.unique()
        transition_matrix = pd.DataFrame(0, index=categories, columns=categories, dtype=float)

        # Build the transition matrix by counting transitions
        for i in range(len(gaze_sequence) - 1):
            current_category = gaze_sequence.iloc[i]
            next_category = gaze_sequence.iloc[i + 1]
            transition_matrix.loc[current_category, next_category] += 1

        # Normalize the transition matrix so each row sums to 1 (row normalization only)
        transition_matrix = transition_matrix.div(transition_matrix.sum(axis=1), axis=0).fillna(0)

        # Save the normalized transition matrix
        matrix_filename = os.path.join(save_matrix_path, f"{filename[-10:-4]}_transition_matrix.csv")
        transition_matrix.to_csv(matrix_filename, index=True)
        print(f"Saved transition matrix for {filename[-10:-4]}")  # Debugging statement

        # Calculate the stationary distribution (normalized left eigenvector of eigenvalue 1)
        eigvals, eigvecs = np.linalg.eig(transition_matrix.T)
        stationary_distribution = np.real(eigvecs[:, np.isclose(eigvals, 1)].flatten())
        stationary_distribution /= stationary_distribution.sum()

        # Ensure stationary distribution matches the order of `categories`
        stationary_distribution_dict = {categories[i]: stationary_distribution[i] for i in range(len(categories))}

        # Calculate overall transition entropy and per-category entropies using stationary distribution
        overall_transition_entropy, transition_entropy_per_category = calculate_transition_entropy(transition_matrix, stationary_distribution_dict)

        # Calculate stationary entropy per category (based on stationary distribution values)
        stationary_entropy_per_category = {
            category: (-stationary_distribution_dict[category] * log2(stationary_distribution_dict[category])).item() if stationary_distribution_dict[category] > 0 else 0
            for category in categories
        }

        # Normalize entropies by the number of categories
        num_categories = len(transition_matrix)
        normalized_overall_transition_entropy = overall_transition_entropy / log2(num_categories) if num_categories > 1 else 0

        # Add entropy values to results
        result = {
            'Filename': filename[-10:-4],
            'Overall_Transition_Entropy': normalized_overall_transition_entropy
        }

        # Add each category's transition and stationary entropy to the result, ensuring scalar values
        for category in categories:
            result[f'Transition_Entropy_{category}'] = transition_entropy_per_category.get(category, 0) / log2(num_categories) if num_categories > 1 else 0
            result[f'Stationary_Entropy_{category}'] = stationary_entropy_per_category.get(category, 0) / log2(num_categories) if num_categories > 1 else 0

        entropy_results.append(result)

    except Exception as e:
        # Log any failed files
        print(f"Error processing {filename}: {e}")  # Debugging statement
        failed_files.append({'filename': filename[-10:-4], 'error': str(e)})

# Save entropy results to a CSV
entropy_df = pd.DataFrame(entropy_results)
entropy_df.to_csv(entropy_log_path, index=False)

# Log any failed files if they exist
if failed_files:
    failed_df = pd.DataFrame(failed_files)
    failed_df.to_csv("/Volumes/TwoTeras/1_Experiment_2/Proxemics/failed_files_log.csv", index=False)
    print(f"Failures logged for {len(failed_files)} files.")
else:
    print("No failures detected.")


Processing file: /Volumes/TwoTeras/1_Experiment_2/Eye_Tracking/Pre_processed/05_Debbies_gaze/1031_1.csv
Saved transition matrix for 1031_1
Processing file: /Volumes/TwoTeras/1_Experiment_2/Eye_Tracking/Pre_processed/05_Debbies_gaze/1031_2.csv
Saved transition matrix for 1031_2
Processing file: /Volumes/TwoTeras/1_Experiment_2/Eye_Tracking/Pre_processed/05_Debbies_gaze/1031_3.csv
Saved transition matrix for 1031_3
Processing file: /Volumes/TwoTeras/1_Experiment_2/Eye_Tracking/Pre_processed/05_Debbies_gaze/1031_4.csv
Saved transition matrix for 1031_4
Processing file: /Volumes/TwoTeras/1_Experiment_2/Eye_Tracking/Pre_processed/05_Debbies_gaze/1031_5.csv
Saved transition matrix for 1031_5
Processing file: /Volumes/TwoTeras/1_Experiment_2/Eye_Tracking/Pre_processed/05_Debbies_gaze/1268_1.csv
Saved transition matrix for 1268_1
Processing file: /Volumes/TwoTeras/1_Experiment_2/Eye_Tracking/Pre_processed/05_Debbies_gaze/1268_2.csv
Saved transition matrix for 1268_2
Processing file: /Volumes/T

Saved transition matrix for 5161_5
Processing file: /Volumes/TwoTeras/1_Experiment_2/Eye_Tracking/Pre_processed/05_Debbies_gaze/5189_1.csv
Saved transition matrix for 5189_1
Processing file: /Volumes/TwoTeras/1_Experiment_2/Eye_Tracking/Pre_processed/05_Debbies_gaze/5189_2.csv
Saved transition matrix for 5189_2
Processing file: /Volumes/TwoTeras/1_Experiment_2/Eye_Tracking/Pre_processed/05_Debbies_gaze/5189_3.csv
Saved transition matrix for 5189_3
Processing file: /Volumes/TwoTeras/1_Experiment_2/Eye_Tracking/Pre_processed/05_Debbies_gaze/5189_4.csv
Saved transition matrix for 5189_4
Processing file: /Volumes/TwoTeras/1_Experiment_2/Eye_Tracking/Pre_processed/05_Debbies_gaze/5189_5.csv
Saved transition matrix for 5189_5
Processing file: /Volumes/TwoTeras/1_Experiment_2/Eye_Tracking/Pre_processed/05_Debbies_gaze/5743_1.csv
Saved transition matrix for 5743_1
Processing file: /Volumes/TwoTeras/1_Experiment_2/Eye_Tracking/Pre_processed/05_Debbies_gaze/5743_2.csv
Saved transition matrix for

Saved transition matrix for 9297_4
Processing file: /Volumes/TwoTeras/1_Experiment_2/Eye_Tracking/Pre_processed/05_Debbies_gaze/9297_5.csv
Saved transition matrix for 9297_5
Processing file: /Volumes/TwoTeras/1_Experiment_2/Eye_Tracking/Pre_processed/05_Debbies_gaze/9627_1.csv
Saved transition matrix for 9627_1
Processing file: /Volumes/TwoTeras/1_Experiment_2/Eye_Tracking/Pre_processed/05_Debbies_gaze/9627_2.csv
Saved transition matrix for 9627_2
Processing file: /Volumes/TwoTeras/1_Experiment_2/Eye_Tracking/Pre_processed/05_Debbies_gaze/9627_3.csv
Saved transition matrix for 9627_3
Processing file: /Volumes/TwoTeras/1_Experiment_2/Eye_Tracking/Pre_processed/05_Debbies_gaze/9627_4.csv
Saved transition matrix for 9627_4
Processing file: /Volumes/TwoTeras/1_Experiment_2/Eye_Tracking/Pre_processed/05_Debbies_gaze/9627_5.csv
Saved transition matrix for 9627_5
Processing file: /Volumes/TwoTeras/1_Experiment_2/Eye_Tracking/Pre_processed/05_Debbies_gaze/1142_1.csv
Saved transition matrix for

In [3]:
data = pd.read_csv("/Volumes/TwoTeras/1_Experiment_2/Eye_Tracking/Pre_processed/05_Debbies_gaze/1031_1.csv")
data['date_seconds'] = pd.to_datetime(data['timeStampDataPointEnd'], unit='s')      

In [17]:
data.names.unique()

array(['terrain_O.001', 'terrain_S.001', 'pavement_O.002', 'Fence_34',
       'Building_161', 'CollisionObject1', 'TaskBuilding_35',
       'Graffity_35', '23_Cma', 'road_base_network.004',
       'pavement_Vb.003', 'pavement_T.002', 'Building_100',
       'TaskBuilding_27', 'pavement_S.002', 'terrain_T.001', 'body',
       'Lamppost_v1 (14)', 'road.009', 'Fence_10', 'Complete_fence.002',
       '20_Cma', 'TaskBuilding_30', 'Fence_12', 'Graffity_30', 'Hedge_9',
       'Complete_fence.001', 'Bench_v1', 'Fence_31', 'Bench_v1 (1)',
       'Trash_v1_1', 'Lamppost_v2', 'Trash_v2', 'Fence.001',
       'Building_102', 'road.016', 'Building_101', 'Building_158',
       'Euro_v7_3_Roof_floor', 'Building_92', 'Fence_14', 'Fence_35',
       'Tree_Planters_v1_3HR (1)', 'Cypress_v1_1 (3)', 'Cypress_v1_2 (4)',
       'Cypress_v1_2 (3)', 'Cypress_v1_2 (5)', 'Cypress_v1_1 (2)',
       'TaskBuilding_29', 'Body', 'pavement_F.004', 'OldCityWall_9',
       'Wall', 'CollisionObject0', 'terrain_A.001', 'Old

In [5]:
data_Reduced = data[data['events'] == -2]

In [6]:
data_Reduced[['date_seconds','names', 'Collider_CategoricalN']].head(30)

Unnamed: 0,date_seconds,names,Collider_CategoricalN
2,2022-11-14 14:27:48.858779136,pavement_O.002,Background
12,2022-11-14 14:27:49.063130880,pavement_O.002,Background
35,2022-11-14 14:27:49.447531264,Wall,Background
52,2022-11-14 14:27:49.703467520,building01_LOD1,Background
70,2022-11-14 14:27:49.964859904,TaskBuilding_27,TaskBuilding_Public
97,2022-11-14 14:27:50.322476288,Building_100,Building
137,2022-11-14 14:27:50.812525056,Building_97,Building
168,2022-11-14 14:27:51.215277312,barbwire0,Background
206,2022-11-14 14:27:51.802046720,Building_161,Building
217,2022-11-14 14:27:52.023759104,TaskBuilding_35,TaskBuilding_Residential


In [7]:
data_Reduced.Collider_CategoricalN.unique()

array(['Background', 'TaskBuilding_Public', 'Building',
       'TaskBuilding_Residential', 'Active_Agent', 'Global_Landmark',
       'Active_Agent_Face', 'Passive_Agent', 'Passive_Agent_Face'],
      dtype=object)

In [12]:
import pandas as pd
import numpy as np
from math import log2

# Load the data
data = pd.read_csv("/Volumes/TwoTeras/1_Experiment_2/Eye_Tracking/Pre_processed/05_Debbies_gaze/1031_1.csv")
data['date_seconds'] = pd.to_datetime(data['timeStampDataPointEnd'], unit='s')

# Filter for the desired gaze events
data_Reduced = data[data['events'] == -2]

# Collider list
collider_list = [
    '56_Sa', '39_Sa', '19_Cma', '55_Sa', '25_Cma', '40_Sa', '41_Sa',
    '17_Cma', '47_Sa', '03_Cma', '13_Cma', '24_Cma', '01_Cma', '54_Sa',
    '15_Cma', '29_Sa', '04_Cma', '49_Sa', '30_Sa', '02_Cma', '51_Sa',
    '08_Cma', '28_Cma', '26_Cma', '44_Sa', '06_Cma', '53_Sa', '37_Sa',
    '32_Sa', '20_Cma', '16_Cma', '50_Sa', '34_Sa', '11_Cma', '38_Sa',
    '33_Sa', '12_Cma', '22_Cma', '42_Sa', '05_Cma', '23_Cma', '18_Cma',
    '27_Cma', '45_Sa', '43_Sa', '09_Cma', '31_Sa', '48_Sa', '10_Cma',
    '52_Sa', '07_Cma', '46_Sa', '35_Sa', '36_Sa', '21_Cma', '14_Cma'
]

# Filter and label rows with colliders
filtered_df = data_Reduced[data_Reduced['names'].isin(collider_list)].copy()
filtered_df['Occurrence_Order'] = filtered_df.groupby('names').cumcount() + 1

# Maintain a dictionary to track the last processed time for each collider
last_processed_time = {}

# Segment data by each occurrence of colliders
trials = []
for index, row in filtered_df.iterrows():
    collider_name = row['names']
    occurrence_time = row['date_seconds']

    # Check if this occurrence falls within the active window
    if (
        collider_name in last_processed_time
        and (occurrence_time - last_processed_time[collider_name]).total_seconds() <= 30
    ):
        # Skip this occurrence since it's within the 30-second window
        continue

    # Update the last processed time for this collider
    last_processed_time[collider_name] = occurrence_time

    # Constrain the 30-second window to the dataset bounds
    window_start = max(data_Reduced['date_seconds'].min(), occurrence_time)
    window_end = min(data_Reduced['date_seconds'].max(), occurrence_time + pd.Timedelta(seconds=30))

    # Extract the constrained window
    trial_segment = data_Reduced[
        (data_Reduced['date_seconds'] >= window_start) &
        (data_Reduced['date_seconds'] <= window_end)
    ].copy()

    if trial_segment.empty:
        print(f"No data found in window for {collider_name}. Skipping.")
        continue

    # Add trial-specific labels
    trial_segment['Collider_Name'] = collider_name
    trial_segment['Occurrence_Order'] = len(trials) + 1  # Increment trial count
    trial_segment['Trial_ID'] = f"{collider_name}_Trial_{len(trials) + 1}"
    trials.append(trial_segment)

# Combine all trials into a single DataFrame
if trials:
    trials_df = pd.concat(trials, ignore_index=True)
else:
    print("No trials found. Exiting.")
    trials_df = pd.DataFrame()  # Create an empty DataFrame for safety

# Continue with entropy calculation if trials_df is not empty
if not trials_df.empty:
    # Initialize list to store entropy results
    entropy_results = []

    # Function to calculate conditional entropy
    def calculate_transition_entropy(matrix, stationary_distribution):
        total_entropy = 0
        category_entropies = {}
        for i, row in matrix.iterrows():
            row_entropy = 0
            for p in row:
                if p > 0:
                    row_entropy -= p * log2(p)
            category_entropies[i] = row_entropy
            total_entropy += row_entropy * stationary_distribution.get(i, 0)
        return total_entropy, category_entropies

    # Calculate entropy for each Trial_ID
    for trial_id, trial_data in trials_df.groupby('Trial_ID'):
        trial_data.loc[:, "Mapped_Column_Collider_Categorical"] = trial_data["Collider_CategoricalN"].replace(mapping)
        gaze_sequence = trial_data['Mapped_Column_Collider_Categorical'].reset_index(drop=True)
        categories = gaze_sequence.unique()
        transition_matrix = pd.DataFrame(0, index=categories, columns=categories, dtype=float)

        # Build the transition matrix
        for i in range(len(gaze_sequence) - 1):
            current_category = gaze_sequence.iloc[i]
            next_category = gaze_sequence.iloc[i + 1]
            transition_matrix.loc[current_category, next_category] += 1

        # Normalize the transition matrix
        transition_matrix = transition_matrix.div(transition_matrix.sum(axis=1), axis=0).fillna(0)

        # Check if the transition matrix is valid
        if transition_matrix.sum().sum() == 0 or transition_matrix.isnull().values.any():
            print(f"Skipping Trial_ID {trial_id}: Transition matrix is invalid or empty.")
            continue

        # Calculate stationary distribution
        try:
            eigvals, eigvecs = np.linalg.eig(transition_matrix.T)
            stationary_distribution = np.real(eigvecs[:, np.isclose(eigvals, 1)].flatten())
            stationary_distribution /= stationary_distribution.sum()

            stationary_distribution_dict = {categories[i]: stationary_distribution[i] for i in range(len(categories))}
        except Exception as e:
            print(f"Error calculating stationary distribution for Trial_ID {trial_id}: {e}")
            stationary_distribution_dict = {category: 1 / len(categories) for category in categories}

        # Calculate entropy
        overall_transition_entropy, transition_entropy_per_category = calculate_transition_entropy(
            transition_matrix, stationary_distribution_dict
        )

        stationary_entropy_per_category = {
            category: (-stationary_distribution_dict[category] * log2(stationary_distribution_dict[category]))
            if stationary_distribution_dict[category] > 0 else 0
            for category in categories
        }

        num_categories = len(transition_matrix)
        normalized_overall_entropy = overall_transition_entropy / log2(num_categories) if num_categories > 1 else 0

        result = {
            'Trial_ID': trial_id,
            'Overall_Transition_Entropy': normalized_overall_entropy
        }

        for category in categories:
            result[f'Transition_Entropy_{category}'] = (
                transition_entropy_per_category.get(category, 0) / log2(num_categories)
            ) if num_categories > 1 else 0
            result[f'Stationary_Entropy_{category}'] = (
                stationary_entropy_per_category.get(category, 0) / log2(num_categories)
            ) if num_categories > 1 else 0

        entropy_results.append(result)

    # Convert results into a DataFrame
    entropy_df = pd.DataFrame(entropy_results)


In [14]:
entropy_df.head()

Unnamed: 0,Trial_ID,Overall_Transition_Entropy,Transition_Entropy_Agent_Face,Stationary_Entropy_Agent_Face,Transition_Entropy_Agent_Body,Stationary_Entropy_Agent_Body,Transition_Entropy_Background,Stationary_Entropy_Background,Transition_Entropy_Task_Building,Stationary_Entropy_Task_Building,Transition_Entropy_Building,Stationary_Entropy_Building,Transition_Entropy_Global_Landmark,Stationary_Entropy_Global_Landmark
0,01_Cma_Trial_22,0.570817,0.605376,0.09128,0.430677,0.062735,0.689468,0.228565,0.440598,0.206797,0.534424,0.227872,,
1,03_Cma_Trial_16,0.34064,0.0,0.027601,0.619451,0.168669,0.30132,0.117293,,,0.43156,0.133281,,
2,09_Cma_Trial_14,0.289236,0.266753,0.124505,0.424315,0.068234,0.204848,0.161853,0.808547,0.13017,0.379551,0.159054,,
3,09_Cma_Trial_24,0.361897,,,0.0,0.024192,0.305478,0.139582,0.539047,0.091174,0.559766,0.190264,0.395488,0.064483
4,09_Cma_Trial_26,0.376103,,,0.395488,0.047812,0.342352,0.111824,0.628421,0.103383,0.511955,0.153667,0.0,0.028284


In [16]:
import pandas as pd
import numpy as np
from math import log2
import os
import glob

# Define paths
input_dir = "/Volumes/TwoTeras/1_Experiment_2/Eye_Tracking/Pre_processed/05_Debbies_gaze/"
output_trials_dir = "/Volumes/TwoTeras/1_Experiment_2/Entropy_Results/Window/trials_df/"
output_transition_dir = "/Volumes/TwoTeras/1_Experiment_2/Entropy_Results/Window/transition_matrix/"
output_entropy_dir = "/Volumes/TwoTeras/1_Experiment_2/Entropy_Results/Window/entropy_results/"

# Mapping dictionary
mapping = {
    "TaskBuilding_Public": "Task_Building",
    "TaskBuilding_Residential": "Task_Building",
    "Active_Agent": "Agent_Body",
    "Passive_Agent": "Agent_Body",
    "Active_Agent_Face": "Agent_Face",
    "Passive_Agent_Face": "Agent_Face"
}


# Ensure output directories exist
os.makedirs(output_trials_dir, exist_ok=True)
os.makedirs(output_transition_dir, exist_ok=True)
os.makedirs(output_entropy_dir, exist_ok=True)

# Collider list
collider_list = [
    '56_Sa', '39_Sa', '19_Cma', '55_Sa', '25_Cma', '40_Sa', '41_Sa',
    '17_Cma', '47_Sa', '03_Cma', '13_Cma', '24_Cma', '01_Cma', '54_Sa',
    '15_Cma', '29_Sa', '04_Cma', '49_Sa', '30_Sa', '02_Cma', '51_Sa',
    '08_Cma', '28_Cma', '26_Cma', '44_Sa', '06_Cma', '53_Sa', '37_Sa',
    '32_Sa', '20_Cma', '16_Cma', '50_Sa', '34_Sa', '11_Cma', '38_Sa',
    '33_Sa', '12_Cma', '22_Cma', '42_Sa', '05_Cma', '23_Cma', '18_Cma',
    '27_Cma', '45_Sa', '43_Sa', '09_Cma', '31_Sa', '48_Sa', '10_Cma',
    '52_Sa', '07_Cma', '46_Sa', '35_Sa', '36_Sa', '21_Cma', '14_Cma'
]

# Process all CSV files
for file_path in glob.glob(os.path.join(input_dir, "*.csv")):
    # Load the data
    data = pd.read_csv(file_path)
    data['date_seconds'] = pd.to_datetime(data['timeStampDataPointEnd'], unit='s')

    # Filter for the desired gaze events
    data_Reduced = data[data['events'] == -2]

    # Filter and label rows with colliders
    filtered_df = data_Reduced[data_Reduced['names'].isin(collider_list)].copy()
    filtered_df['Occurrence_Order'] = filtered_df.groupby('names').cumcount() + 1

    # Maintain a dictionary to track the last processed time for each collider
    last_processed_time = {}

    # Segment data by each occurrence of colliders
    trials = []
    for index, row in filtered_df.iterrows():
        collider_name = row['names']
        occurrence_time = row['date_seconds']

        # Check if this occurrence falls within the active window
        if (
            collider_name in last_processed_time
            and (occurrence_time - last_processed_time[collider_name]).total_seconds() <= 30
        ):
            # Skip this occurrence since it's within the 30-second window
            continue

        # Update the last processed time for this collider
        last_processed_time[collider_name] = occurrence_time

        # Constrain the 30-second window to the dataset bounds
        window_start = max(data_Reduced['date_seconds'].min(), occurrence_time)
        window_end = min(data_Reduced['date_seconds'].max(), occurrence_time + pd.Timedelta(seconds=30))

        # Extract the constrained window
        trial_segment = data_Reduced[
            (data_Reduced['date_seconds'] >= window_start) &
            (data_Reduced['date_seconds'] <= window_end)
        ].copy()

        if trial_segment.empty:
            continue

        # Add trial-specific labels
        trial_segment['Collider_Name'] = collider_name
        trial_segment['Occurrence_Order'] = len(trials) + 1  # Increment trial count
        trial_segment['Trial_ID'] = f"{collider_name}_Trial_{len(trials) + 1}"
        trials.append(trial_segment)

    # Combine all trials into a single DataFrame
    if trials:
        trials_df = pd.concat(trials, ignore_index=True)
    else:
        continue  # Skip this file if no trials are found

    # Save trials_df
    participant_id = file_path[-10:-4]
    trials_df.to_csv(os.path.join(output_trials_dir, f"{participant_id}_trials_df.csv"), index=False)

    # Calculate transition matrices and entropy
    entropy_results = []
    for trial_id, trial_data in trials_df.groupby('Trial_ID'):
        collider_name = trial_data['Collider_Name'].iloc[0]
        occurrence_order = trial_data['Occurrence_Order'].iloc[0]

        gaze_sequence = trial_data['Collider_CategoricalN'].reset_index(drop=True)
        categories = gaze_sequence.unique()
        transition_matrix = pd.DataFrame(0, index=categories, columns=categories, dtype=float)

        # Build the transition matrix
        for i in range(len(gaze_sequence) - 1):
            current_category = gaze_sequence.iloc[i]
            next_category = gaze_sequence.iloc[i + 1]
            transition_matrix.loc[current_category, next_category] += 1

        # Normalize the transition matrix
        transition_matrix = transition_matrix.div(transition_matrix.sum(axis=1), axis=0).fillna(0)

        # Save transition matrix
        transition_matrix.to_csv(os.path.join(output_transition_dir, f"{participant_id}_transition_matrix.csv"))

        # Calculate stationary distribution
        try:
            eigvals, eigvecs = np.linalg.eig(transition_matrix.T)
            stationary_distribution = np.real(eigvecs[:, np.isclose(eigvals, 1)].flatten())
            stationary_distribution /= stationary_distribution.sum()

            stationary_distribution_dict = {categories[i]: stationary_distribution[i] for i in range(len(categories))}
        except:
            stationary_distribution_dict = {category: 1 / len(categories) for category in categories}

        # Calculate entropy
        def calculate_transition_entropy(matrix, stationary_distribution):
            total_entropy = 0
            for i, row in matrix.iterrows():
                row_entropy = sum(-p * log2(p) for p in row if p > 0)
                total_entropy += row_entropy * stationary_distribution.get(i, 0)
            return total_entropy

        overall_transition_entropy = calculate_transition_entropy(transition_matrix, stationary_distribution_dict)

        num_categories = len(transition_matrix)
        normalized_overall_entropy = overall_transition_entropy / log2(num_categories) if num_categories > 1 else 0

        result = {
            'Trial_ID': trial_id,
            'Collider_Name': collider_name,
            'Occurrence_Order': occurrence_order,
            'Overall_Transition_Entropy': normalized_overall_entropy
        }
        entropy_results.append(result)

    # Save entropy results
    entropy_df = pd.DataFrame(entropy_results)
    entropy_df.to_csv(os.path.join(output_entropy_dir, f"{participant_id}_entropy_results.csv"), index=False)



KeyboardInterrupt



In [13]:
entropy_df.Overall_Transition_Entropy.describe()

count    29.000000
mean      0.455161
std       0.126668
min       0.221541
25%       0.367364
50%       0.468951
75%       0.516154
max       0.691470
Name: Overall_Transition_Entropy, dtype: float64

In [15]:
trials_df.head()

Unnamed: 0.2,Unnamed: 0.1,level_0,Unnamed: 0,index,SubjectID,Session,SessionSubsection,timeStampDataPointEnd,combinedGazeValidityBitmask,eyePositionCombinedWorld.x,...,events,length,distance,avg_dist,names,Collider_CategoricalN,date_seconds,Collider_Name,Occurrence_Order,Trial_ID
0,246,246,246,246,1031,1,1,1668436000.0,3,-59.089291,...,-2.0,0.310001,7.234847,7.232347,23_Cma,Active_Agent,2022-11-14 14:27:52.612511488,23_Cma,1,23_Cma_Trial_1
1,267,267,267,267,1031,1,1,1668436000.0,3,-59.123661,...,-2.0,0.311487,12.349276,12.375113,Graffity_35,TaskBuilding_Residential,2022-11-14 14:27:52.968639488,23_Cma,1,23_Cma_Trial_1
2,307,307,307,307,1031,1,1,1668436000.0,3,-59.026291,...,-2.0,0.599665,12.322412,12.307854,Graffity_35,TaskBuilding_Residential,2022-11-14 14:27:53.611952896,23_Cma,1,23_Cma_Trial_1
3,320,320,320,320,1031,1,1,1668436000.0,3,-59.018215,...,-2.0,0.18848,12.394278,12.390313,Graffity_35,TaskBuilding_Residential,2022-11-14 14:27:53.811841280,23_Cma,1,23_Cma_Trial_1
4,331,331,331,331,1031,1,1,1668436000.0,3,-59.008221,...,-2.0,0.088783,12.808676,12.806624,Graffity_35,TaskBuilding_Residential,2022-11-14 14:27:53.945265152,23_Cma,1,23_Cma_Trial_1
