In [1]:
import pandas as pd
import numpy as np
from math import log2
from collections import Counter
import glob
import os
import re

In [12]:
# Define paths
path = "/Volumes/TwoTeras/1_Experiment_2/Eye_Tracking/Pre_processed/05_Debbies_gaze"
save_matrix_path = "/Volumes/TwoTeras/1_Experiment_2/Entropy_Results/Matrices/"
entropy_log_path = "/Volumes/TwoTeras/1_Experiment_2/Entropy_Results/entropy_log.csv"
final_matrix_path = "/Volumes/TwoTeras/1_Experiment_2/Entropy_Results/final_transition_matrix.csv"

# Ensure directories exist
os.makedirs(save_matrix_path, exist_ok=True)

# Define expected categories after merging
all_categories = [
    "Background", "Building", "TaskBuilding_Public", "TaskBuilding_Residential",
    "Global_Landmark",  "Passive_Agent_Face", "Passive_Agent", "Active_Agent_Face", "Active_Agent"
]

# Mapping dictionary
mapping = {
    "TaskBuilding_Public": "Task_Building",
    "TaskBuilding_Residential": "Task_Building",
    "Active_Agent": "Agent_Body",
    "Passive_Agent": "Agent_Body",
    "Active_Agent_Face": "Agent_Face",
    "Passive_Agent_Face": "Agent_Face"
}
# Mapping to merge `_Face` categories with broader ones
merge_face_mapping = {
    "Passive_Agent_Face": "Passive_Agent",
    "Active_Agent_Face": "Active_Agent"
}

# Initialize overall matrices and lists
overall_raw_matrix = pd.DataFrame(0, index=all_categories, columns=all_categories, dtype=float)
entropy_results = []
failed_files = []

# Function to calculate transition entropy
def calculate_transition_entropy(matrix, stationary_distribution):
    total_entropy = 0
    category_entropies = {}
    for i, row in matrix.iterrows():
        row_entropy = sum(-p * log2(p) for p in row if p > 0)
        category_entropies[i] = row_entropy
        total_entropy += row_entropy * stationary_distribution.get(i, 0)
    return total_entropy, category_entropies

# Process each file
files = glob.glob(path + "/*.csv")
for filename in files:
    try:
        print(f"Processing file: {filename}")

        # Read participant data
        One_participant = pd.read_csv(filename)
        if 'Collider_CategoricalN' not in One_participant.columns:
            print(f"Skipping {filename} due to missing required columns.")
            failed_files.append({'filename': os.path.basename(filename), 'error': 'Missing required columns'})
            continue

        # Filter for the desired gaze events
        data_Reduced = One_participant[One_participant['events'] == -2].copy()
        if data_Reduced.empty:
            print(f"No valid events for {filename}, skipping.")
            failed_files.append({'filename': os.path.basename(filename), 'error': 'No valid events in filtered data'})
            continue

        # Apply mapping to merge categories
        data_Reduced['Collider_CategoricalN'] = data_Reduced['Collider_CategoricalN'].replace(merge_face_mapping)

        # Build raw transition matrix for this file
        raw_transition_matrix = pd.DataFrame(0, index=all_categories, columns=all_categories, dtype=float)
        gaze_sequence = data_Reduced['Collider_CategoricalN'].reset_index(drop=True)
        for i in range(len(gaze_sequence) - 1):
            current_category = gaze_sequence.iloc[i]
            next_category = gaze_sequence.iloc[i + 1]
            if current_category in all_categories and next_category in all_categories:
                raw_transition_matrix.loc[current_category, next_category] += 1

        # Normalize the raw transition matrix
        row_sums = raw_transition_matrix.sum(axis=1)
        normalized_matrix = raw_transition_matrix.div(row_sums.replace(0, 1), axis=0).fillna(0)

        # Accumulate raw transitions into the overall matrix
        overall_raw_matrix += raw_transition_matrix

        # Save the individual raw transition matrix
        save_path = os.path.join(save_matrix_path, f"{os.path.basename(filename).replace('.csv', '_transition_matrix.csv')}")
        raw_transition_matrix.to_csv(save_path)
        print(f"Saved individual raw transition matrix: {save_path}")

        # Calculate stationary distribution
        eigvals, eigvecs = np.linalg.eig(normalized_matrix.T)
        stationary_distribution = np.real(eigvecs[:, np.isclose(eigvals, 1)].flatten())
        stationary_distribution /= stationary_distribution.sum()

        stationary_distribution_dict = {all_categories[i]: stationary_distribution[i] for i in range(len(all_categories))}

        # Calculate transition entropy
        overall_transition_entropy, transition_entropy_per_category = calculate_transition_entropy(normalized_matrix, stationary_distribution_dict)

        # Calculate stationary entropy
        stationary_entropy_per_category = {
            category: (-stationary_distribution_dict[category] * log2(stationary_distribution_dict[category]))
            if stationary_distribution_dict[category] > 0 else 0
            for category in all_categories
        }

        # Normalize entropies
        num_categories = len(all_categories)
        normalized_overall_transition_entropy = overall_transition_entropy / log2(num_categories) if num_categories > 1 else 0

        # Extract session number or identifier from filename
        file_basename = os.path.basename(filename)
        digits_digits_match = re.search(r'\d+_\d+', file_basename)
        session_identifier = digits_digits_match.group(0) if digits_digits_match else file_basename.replace('.csv', '')
        print(f"Extracted session: {session_identifier}")

        # Add entropy results
        result = {
            'Filename': file_basename,
            'SessionNr': session_identifier,  # Include the session identifier
            'Overall_Transition_Entropy': normalized_overall_transition_entropy
        }
        for category in all_categories:
            result[f'Transition_Entropy_{category}'] = transition_entropy_per_category.get(category, 0) / log2(num_categories) if num_categories > 1 else 0
            result[f'Stationary_Entropy_{category}'] = stationary_entropy_per_category.get(category, 0) / log2(num_categories) if num_categories > 1 else 0

        entropy_results.append(result)

    except Exception as e:
        print(f"Error processing {filename}: {e}")
        failed_files.append({'filename': os.path.basename(filename), 'error': str(e)})

# Save overall normalized transition matrix
row_sums = overall_raw_matrix.sum(axis=1)
overall_normalized_matrix = overall_raw_matrix.div(row_sums.replace(0, 1), axis=0).fillna(0)
overall_normalized_matrix.to_csv(final_matrix_path)
print(f"Final transition matrix saved to: {final_matrix_path}")

# Save entropy log to a CSV
entropy_df = pd.DataFrame(entropy_results)
entropy_df.to_csv(entropy_log_path, index=False)
print(f"Entropy log saved to: {entropy_log_path}")

# Log failed files
if failed_files:
    failed_files_path = "/Volumes/TwoTeras/1_Experiment_2/Entropy_Results/failed_files_log.csv"
    failed_df = pd.DataFrame(failed_files)
    failed_df.to_csv(failed_files_path, index=False)
    print(f"Failures logged to: {failed_files_path}")
else:
    print("No failures detected.")


Processing file: /Volumes/TwoTeras/1_Experiment_2/Eye_Tracking/Pre_processed/05_Debbies_gaze/1031_1.csv (Agent)
Valid data rows after filtering: 5987
Saved matrix: /Volumes/TwoTeras/1_Experiment_2/Entropy_Results/Matrices/Agent/1031_1_Agent_transition_matrix.csv
Processing file: /Volumes/TwoTeras/1_Experiment_2/Eye_Tracking/Pre_processed/05_Debbies_gaze/1031_1.csv (Face_Separated)
Valid data rows after filtering: 5987
Saved matrix: /Volumes/TwoTeras/1_Experiment_2/Entropy_Results/Matrices/Face_Separated/1031_1_Face_Separated_transition_matrix.csv
Processing file: /Volumes/TwoTeras/1_Experiment_2/Eye_Tracking/Pre_processed/05_Debbies_gaze/1031_2.csv (Agent)
Valid data rows after filtering: 5865
Saved matrix: /Volumes/TwoTeras/1_Experiment_2/Entropy_Results/Matrices/Agent/1031_2_Agent_transition_matrix.csv
Processing file: /Volumes/TwoTeras/1_Experiment_2/Eye_Tracking/Pre_processed/05_Debbies_gaze/1031_2.csv (Face_Separated)
Valid data rows after filtering: 5865
Saved matrix: /Volumes/Tw

Valid data rows after filtering: 5557
Saved matrix: /Volumes/TwoTeras/1_Experiment_2/Entropy_Results/Matrices/Agent/1843_1_Agent_transition_matrix.csv
Processing file: /Volumes/TwoTeras/1_Experiment_2/Eye_Tracking/Pre_processed/05_Debbies_gaze/1843_1.csv (Face_Separated)
Valid data rows after filtering: 5557
Saved matrix: /Volumes/TwoTeras/1_Experiment_2/Entropy_Results/Matrices/Face_Separated/1843_1_Face_Separated_transition_matrix.csv
Processing file: /Volumes/TwoTeras/1_Experiment_2/Eye_Tracking/Pre_processed/05_Debbies_gaze/1843_2.csv (Agent)
Valid data rows after filtering: 5279
Saved matrix: /Volumes/TwoTeras/1_Experiment_2/Entropy_Results/Matrices/Agent/1843_2_Agent_transition_matrix.csv
Processing file: /Volumes/TwoTeras/1_Experiment_2/Eye_Tracking/Pre_processed/05_Debbies_gaze/1843_2.csv (Face_Separated)
Valid data rows after filtering: 5279
Saved matrix: /Volumes/TwoTeras/1_Experiment_2/Entropy_Results/Matrices/Face_Separated/1843_2_Face_Separated_transition_matrix.csv
Proces

Valid data rows after filtering: 5766
Saved matrix: /Volumes/TwoTeras/1_Experiment_2/Entropy_Results/Matrices/Agent/3540_1_Agent_transition_matrix.csv
Processing file: /Volumes/TwoTeras/1_Experiment_2/Eye_Tracking/Pre_processed/05_Debbies_gaze/3540_1.csv (Face_Separated)
Valid data rows after filtering: 5766
Saved matrix: /Volumes/TwoTeras/1_Experiment_2/Entropy_Results/Matrices/Face_Separated/3540_1_Face_Separated_transition_matrix.csv
Processing file: /Volumes/TwoTeras/1_Experiment_2/Eye_Tracking/Pre_processed/05_Debbies_gaze/3540_2.csv (Agent)
Valid data rows after filtering: 5695
Saved matrix: /Volumes/TwoTeras/1_Experiment_2/Entropy_Results/Matrices/Agent/3540_2_Agent_transition_matrix.csv
Processing file: /Volumes/TwoTeras/1_Experiment_2/Eye_Tracking/Pre_processed/05_Debbies_gaze/3540_2.csv (Face_Separated)
Valid data rows after filtering: 5695
Saved matrix: /Volumes/TwoTeras/1_Experiment_2/Entropy_Results/Matrices/Face_Separated/3540_2_Face_Separated_transition_matrix.csv
Proces

Valid data rows after filtering: 5905
Saved matrix: /Volumes/TwoTeras/1_Experiment_2/Entropy_Results/Matrices/Agent/4847_1_Agent_transition_matrix.csv
Processing file: /Volumes/TwoTeras/1_Experiment_2/Eye_Tracking/Pre_processed/05_Debbies_gaze/4847_1.csv (Face_Separated)
Valid data rows after filtering: 5905
Saved matrix: /Volumes/TwoTeras/1_Experiment_2/Entropy_Results/Matrices/Face_Separated/4847_1_Face_Separated_transition_matrix.csv
Processing file: /Volumes/TwoTeras/1_Experiment_2/Eye_Tracking/Pre_processed/05_Debbies_gaze/4847_2.csv (Agent)
Valid data rows after filtering: 5817
Saved matrix: /Volumes/TwoTeras/1_Experiment_2/Entropy_Results/Matrices/Agent/4847_2_Agent_transition_matrix.csv
Processing file: /Volumes/TwoTeras/1_Experiment_2/Eye_Tracking/Pre_processed/05_Debbies_gaze/4847_2.csv (Face_Separated)
Valid data rows after filtering: 5817
Saved matrix: /Volumes/TwoTeras/1_Experiment_2/Entropy_Results/Matrices/Face_Separated/4847_2_Face_Separated_transition_matrix.csv
Proces

Valid data rows after filtering: 5741
Saved matrix: /Volumes/TwoTeras/1_Experiment_2/Entropy_Results/Matrices/Agent/5189_1_Agent_transition_matrix.csv
Processing file: /Volumes/TwoTeras/1_Experiment_2/Eye_Tracking/Pre_processed/05_Debbies_gaze/5189_1.csv (Face_Separated)
Valid data rows after filtering: 5741
Saved matrix: /Volumes/TwoTeras/1_Experiment_2/Entropy_Results/Matrices/Face_Separated/5189_1_Face_Separated_transition_matrix.csv
Processing file: /Volumes/TwoTeras/1_Experiment_2/Eye_Tracking/Pre_processed/05_Debbies_gaze/5189_2.csv (Agent)
Valid data rows after filtering: 5421
Saved matrix: /Volumes/TwoTeras/1_Experiment_2/Entropy_Results/Matrices/Agent/5189_2_Agent_transition_matrix.csv
Processing file: /Volumes/TwoTeras/1_Experiment_2/Eye_Tracking/Pre_processed/05_Debbies_gaze/5189_2.csv (Face_Separated)
Valid data rows after filtering: 5421
Saved matrix: /Volumes/TwoTeras/1_Experiment_2/Entropy_Results/Matrices/Face_Separated/5189_2_Face_Separated_transition_matrix.csv
Proces


KeyboardInterrupt



In [3]:
data = pd.read_csv("/Volumes/TwoTeras/1_Experiment_2/Eye_Tracking/Pre_processed/05_Debbies_gaze/1031_1.csv")
data['date_seconds'] = pd.to_datetime(data['timeStampDataPointEnd'], unit='s')      

In [4]:
data.names.unique()

array(['pavement_O.002', 'terrain_O.001', 'Wall', 'building01_LOD1',
       'TaskBuilding_27', 'Building_100', 'Building_97',
       'maraz_cafe_collider', 'barbwire0', 'CollisionObject1',
       'Building_161', 'TaskBuilding_35', '23_Cma', 'Graffity_35',
       'Building_166', 'Fence_5', 'road_base_network.004',
       'pavement_Vb.003', 'Building_215', 'Crane_59', 'cyclone0',
       'crane_1', 'Fence_34', 'Building_162', 'Building_99', '20_Cma',
       'Complete_fence.002', 'CollisionObject2', 'Building_94',
       'Building_157', 'building01_LOD0', 'CollisionObject0',
       'terrain_E.001', 'Wall_9', 'Fence_9', 'Fence_10', 'Building_98',
       'Maraz_cafe_place.001', 'Building_148', 'TaskBuilding_5', '05_Cma',
       'pileOfClay_LOD0', 'Building_101', 'Wall_3', 'Body', 'Fence_12',
       'Hedge_9', 'Building_102', 'Lamppost_v1 (15)', 'pavement_E.008',
       'Cypress_v1_2 (1)', 'TaskBuilding_30', 'Graffity_30',
       'Lamppost_v1 (56)', 'Lamppost_v1 (11)', 'road.009', 'Building_1

In [5]:
data_Reduced = data[data['events'] == 2]

In [6]:
data_Reduced[['date_seconds','names', 'Collider_CategoricalN']].head(30)

Unnamed: 0,date_seconds,names,Collider_CategoricalN
1,2022-11-14 14:27:48.847371008,pavement_O.002,Background
4,2022-11-14 14:27:48.901434368,pavement_O.002,Background
32,2022-11-14 14:27:49.403387904,Wall,Background
41,2022-11-14 14:27:49.525403136,building01_LOD1,Background
58,2022-11-14 14:27:49.815069184,TaskBuilding_27,TaskBuilding_Public
89,2022-11-14 14:27:50.229724160,Building_100,Building
102,2022-11-14 14:27:50.382988288,Building_97,Building
141,2022-11-14 14:27:50.858653184,barbwire0,Background
203,2022-11-14 14:27:51.735582464,Building_161,Building
214,2022-11-14 14:27:51.957295104,TaskBuilding_35,TaskBuilding_Residential


In [7]:
data_Reduced.Collider_CategoricalN.unique()

array(['Background', 'TaskBuilding_Public', 'Building',
       'TaskBuilding_Residential', 'Active_Agent', 'Global_Landmark',
       'Active_Agent_Face', 'Passive_Agent', 'Passive_Agent_Face'],
      dtype=object)

In [8]:
entropy_df.head()

Unnamed: 0,Filename,SessionNr,Overall_Transition_Entropy,Transition_Entropy_Background,Stationary_Entropy_Background,Transition_Entropy_Building,Stationary_Entropy_Building,Transition_Entropy_TaskBuilding_Public,Stationary_Entropy_TaskBuilding_Public,Transition_Entropy_TaskBuilding_Residential,...,Transition_Entropy_Global_Landmark,Stationary_Entropy_Global_Landmark,Transition_Entropy_Passive_Agent_Face,Stationary_Entropy_Passive_Agent_Face,Transition_Entropy_Passive_Agent,Stationary_Entropy_Passive_Agent,Transition_Entropy_Active_Agent_Face,Stationary_Entropy_Active_Agent_Face,Transition_Entropy_Active_Agent,Stationary_Entropy_Active_Agent
0,1031_1.csv,1031_1,0.389249,0.335206,0.139582,0.438203,0.152295,0.475571,0.090454,0.506264,...,0.455617,0.037203,0.608887,0.00683,0.828389,0.013998,0.45668,0.012404,0.681446,0.025486
1,1031_2.csv,1031_2,0.378364,0.317822,0.12455,0.464861,0.146222,0.511491,0.070284,0.560104,...,0.410918,0.024851,0.582487,0.004947,0.682804,0.015484,0.509265,0.006166,0.657728,0.021338
2,1031_3.csv,1031_3,0.442325,0.376536,0.154199,0.436753,0.153293,0.556428,0.08045,0.645078,...,0.478353,0.01839,0.539006,0.029476,0.682909,0.027405,0.515783,0.045352,0.579004,0.063003
3,1031_4.csv,1031_4,0.471595,0.378546,0.149628,0.464841,0.139937,0.694692,0.079991,0.735646,...,0.560158,0.020855,0.643397,0.048051,0.675854,0.05055,0.635429,0.041567,0.584689,0.06943
4,1031_5.csv,1031_5,0.391307,0.3143,0.122028,0.471721,0.131888,0.635757,0.057363,0.650421,...,0.460003,0.019106,0.659951,0.02256,0.730833,0.03566,0.494206,0.015423,0.526395,0.053301


In [9]:
entropy_df.Overall_Transition_Entropy.describe()

count    145.000000
mean       0.396558
std        0.046218
min        0.269395
25%        0.373971
50%        0.396238
75%        0.432131
max        0.491865
Name: Overall_Transition_Entropy, dtype: float64

In [10]:
trials_df.head()

NameError: name 'trials_df' is not defined