In [1]:
import pandas as pd
import numpy as np
from math import log2
from collections import Counter
import glob
import os

In [2]:
# Define paths
path = "/Volumes/TwoTeras/0_Experiment_1/Eye_Tracking/Pre_processed/05_Debbies_gaze"
save_matrix_path = "/Volumes/TwoTeras/0_Experiment_1/Entropy_Results/Matrices/"
entropy_log_path = "/Volumes/TwoTeras/0_Experiment_1/Entropy_Results/entropy_log.csv"

# Ensure directories exist
os.makedirs(save_matrix_path, exist_ok=True)

# Load all CSV files in the path
files = glob.glob(path + "/*.csv")

# Mapping dictionary
mapping = {
    "TaskBuilding_Public": "Task_Building",
    "TaskBuilding_Residential": "Task_Building",
    "Active_Agent": "Agent_Body",
    "Passive_Agent": "Agent_Body",
    "Active_Agent_Face": "Agent_Face",
    "Passive_Agent_Face": "Agent_Face"
}

# Initialize list to store entropy results
entropy_results = []
failed_files = []

# Function to calculate conditional entropy H(X|Y=y) for each row
def calculate_transition_entropy(matrix, stationary_distribution):
    total_entropy = 0
    category_entropies = {}
    for i, row in matrix.iterrows():
        row_entropy = sum(-p * log2(p) for p in row if p > 0)
        category_entropies[i] = row_entropy
        total_entropy += row_entropy * stationary_distribution.get(i, 0)
    return total_entropy, category_entropies

# Loop through all files
for filename in files:
    try:
        print(f"Processing file: {filename}")  # Debugging statement

        # Read participant data
        One_participant = pd.read_csv(filename)
        if 'timeStampDataPointEnd' not in One_participant.columns or 'Collider_CategoricalN' not in One_participant.columns:
            print(f"Skipping {filename} due to missing columns.")
            failed_files.append({'filename': filename[-10:-4], 'error': 'Missing required columns'})
            continue

        # Convert timestamps (if required)
        try:
            One_participant['date_seconds'] = pd.to_datetime(One_participant['timeStampDataPointEnd'], unit='s')
        except Exception as e:
            print(f"Timestamp conversion error in {filename}: {e}")
            failed_files.append({'filename': filename[-10:-4], 'error': str(e)})
            continue

        # Filter for the desired gaze events
        data_Reduced = One_participant[One_participant['events'] == -2]
        
        # Check if data_Reduced is empty to avoid errors in transition matrix creation
        if data_Reduced.empty:
            print(f"No valid events for {filename[-10:-4]}, skipping.")
            failed_files.append({'filename': filename[-10:-4], 'error': 'No valid events in filtered data'})
            continue

        # Get gaze sequence for category transitions
        # Apply mapping
        #data_Reduced = data_Reduced.copy()
        #data_Reduced.loc[:, "Mapped_Column_Collider_Categorical"] = data_Reduced["Collider_CategoricalN"].replace(mapping)
        gaze_sequence = data_Reduced['Collider_CategoricalN'].reset_index(drop=True)

        # Create transition matrix
        categories = gaze_sequence.unique()
        transition_matrix = pd.DataFrame(0, index=categories, columns=categories, dtype=float)

        # Build the transition matrix by counting transitions
        for i in range(len(gaze_sequence) - 1):
            current_category = gaze_sequence.iloc[i]
            next_category = gaze_sequence.iloc[i + 1]
            transition_matrix.loc[current_category, next_category] += 1

        # Normalize the transition matrix so each row sums to 1 (row normalization only)
        transition_matrix = transition_matrix.div(transition_matrix.sum(axis=1), axis=0).fillna(0)

        # Save the normalized transition matrix
        matrix_filename = os.path.join(save_matrix_path, f"{filename[-10:-4]}_transition_matrix.csv")
        transition_matrix.to_csv(matrix_filename, index=True)
        print(f"Saved transition matrix for {filename[-10:-4]}")  # Debugging statement

        # Calculate the stationary distribution (normalized left eigenvector of eigenvalue 1)
        eigvals, eigvecs = np.linalg.eig(transition_matrix.T)
        stationary_distribution = np.real(eigvecs[:, np.isclose(eigvals, 1)].flatten())
        stationary_distribution /= stationary_distribution.sum()

        # Ensure stationary distribution matches the order of `categories`
        stationary_distribution_dict = {categories[i]: stationary_distribution[i] for i in range(len(categories))}

        # Calculate overall transition entropy and per-category entropies using stationary distribution
        overall_transition_entropy, transition_entropy_per_category = calculate_transition_entropy(transition_matrix, stationary_distribution_dict)

        # Calculate stationary entropy per category (based on stationary distribution values)
        stationary_entropy_per_category = {
            category: (-stationary_distribution_dict[category] * log2(stationary_distribution_dict[category]))
            if stationary_distribution_dict[category] > 0 else 0
            for category in categories
        }

        # Normalize entropies by the number of categories
        num_categories = len(transition_matrix)
        normalized_overall_transition_entropy = overall_transition_entropy / log2(num_categories) if num_categories > 1 else 0

        # Add entropy values to results
        result = {
            'Filename': filename[-10:-4],
            'Overall_Transition_Entropy': normalized_overall_transition_entropy
        }

        # Add each category's transition and stationary entropy to the result
        for category in categories:
            result[f'Transition_Entropy_{category}'] = transition_entropy_per_category.get(category, 0) / log2(num_categories) if num_categories > 1 else 0
            result[f'Stationary_Entropy_{category}'] = stationary_entropy_per_category.get(category, 0) / log2(num_categories) if num_categories > 1 else 0

        entropy_results.append(result)

    except Exception as e:
        # Log any failed files
        print(f"Error processing {filename}: {e}")  # Debugging statement
        failed_files.append({'filename': filename[-10:-4], 'error': str(e)})

# Save entropy results to a CSV
entropy_df = pd.DataFrame(entropy_results)
entropy_df.to_csv(entropy_log_path, index=False)

# Log any failed files if they exist
if failed_files:
    failed_df = pd.DataFrame(failed_files)
    failed_df.to_csv("/Volumes/TwoTeras/0_Experiment_1/Proxemics/failed_files_log_reduced.csv", index=False)
    print(f"Failures logged for {len(failed_files)} files.")
else:
    print("No failures detected.")


Processing file: /Volumes/TwoTeras/0_Experiment_1/Eye_Tracking/Pre_processed/05_Debbies_gaze/0479_2.csv
Saved transition matrix for 0479_2
Processing file: /Volumes/TwoTeras/0_Experiment_1/Eye_Tracking/Pre_processed/05_Debbies_gaze/0479_3.csv
Saved transition matrix for 0479_3
Processing file: /Volumes/TwoTeras/0_Experiment_1/Eye_Tracking/Pre_processed/05_Debbies_gaze/0479_5.csv
Saved transition matrix for 0479_5
Processing file: /Volumes/TwoTeras/0_Experiment_1/Eye_Tracking/Pre_processed/05_Debbies_gaze/1754_1.csv
Saved transition matrix for 1754_1
Processing file: /Volumes/TwoTeras/0_Experiment_1/Eye_Tracking/Pre_processed/05_Debbies_gaze/1754_2.csv
Saved transition matrix for 1754_2
Processing file: /Volumes/TwoTeras/0_Experiment_1/Eye_Tracking/Pre_processed/05_Debbies_gaze/1754_3.csv
Saved transition matrix for 1754_3
Processing file: /Volumes/TwoTeras/0_Experiment_1/Eye_Tracking/Pre_processed/05_Debbies_gaze/1754_4.csv
Saved transition matrix for 1754_4
Processing file: /Volumes/T

Saved transition matrix for 5531_1
Processing file: /Volumes/TwoTeras/0_Experiment_1/Eye_Tracking/Pre_processed/05_Debbies_gaze/5531_3.csv
Saved transition matrix for 5531_3
Processing file: /Volumes/TwoTeras/0_Experiment_1/Eye_Tracking/Pre_processed/05_Debbies_gaze/5531_4.csv
Saved transition matrix for 5531_4
Processing file: /Volumes/TwoTeras/0_Experiment_1/Eye_Tracking/Pre_processed/05_Debbies_gaze/5531_5.csv
Saved transition matrix for 5531_5
Processing file: /Volumes/TwoTeras/0_Experiment_1/Eye_Tracking/Pre_processed/05_Debbies_gaze/5741_1.csv
Saved transition matrix for 5741_1
Processing file: /Volumes/TwoTeras/0_Experiment_1/Eye_Tracking/Pre_processed/05_Debbies_gaze/5741_2.csv
Saved transition matrix for 5741_2
Processing file: /Volumes/TwoTeras/0_Experiment_1/Eye_Tracking/Pre_processed/05_Debbies_gaze/5741_3.csv
Saved transition matrix for 5741_3
Processing file: /Volumes/TwoTeras/0_Experiment_1/Eye_Tracking/Pre_processed/05_Debbies_gaze/5741_4.csv
Saved transition matrix for

Saved transition matrix for 9502_2
Processing file: /Volumes/TwoTeras/0_Experiment_1/Eye_Tracking/Pre_processed/05_Debbies_gaze/9502_3.csv
Saved transition matrix for 9502_3
Processing file: /Volumes/TwoTeras/0_Experiment_1/Eye_Tracking/Pre_processed/05_Debbies_gaze/9502_5.csv
Saved transition matrix for 9502_5
Processing file: /Volumes/TwoTeras/0_Experiment_1/Eye_Tracking/Pre_processed/05_Debbies_gaze/9601_1.csv
Saved transition matrix for 9601_1
Processing file: /Volumes/TwoTeras/0_Experiment_1/Eye_Tracking/Pre_processed/05_Debbies_gaze/9601_2.csv
Saved transition matrix for 9601_2
Processing file: /Volumes/TwoTeras/0_Experiment_1/Eye_Tracking/Pre_processed/05_Debbies_gaze/9601_3.csv
Saved transition matrix for 9601_3
Processing file: /Volumes/TwoTeras/0_Experiment_1/Eye_Tracking/Pre_processed/05_Debbies_gaze/9601_4.csv
Saved transition matrix for 9601_4
Processing file: /Volumes/TwoTeras/0_Experiment_1/Eye_Tracking/Pre_processed/05_Debbies_gaze/9601_5.csv
Saved transition matrix for