## Importing Libraries

In [1]:
#! pip install pandas
#! pip install pathlib
#! pip install numpy
#! pip install matplotlib

In [2]:
# Importing relevant libraries
import pandas as pd
import numpy as np
from pathlib import Path
import os
import matplotlib
import matplotlib.pyplot as plt
import math

## Defining Functions

In [3]:
def remove_first(x):
  x =  x[1:]
  x = float(x)
  return x

def remove_last(x):
  x = x[:(len(x)-1)]
  x = float(x)
  return x

def revalue(x):
  if x >180:
    x= -(360-x)
  return x

In [4]:
def process_data(filepath):
  
  b = pd.read_csv(filepath)

  b = b.iloc[3:]
  b = b.reset_index(drop=True)

  b.rename(columns={'Lapsed Time': 'Time'}, inplace=True)
  b.rename(columns={'Target Name': 'Target_Name'}, inplace=True)

  b['Time_Diff'] = b['Time'].diff()
  b['Time_Diff'] = b['Time_Diff'].apply(lambda x: max(0, x))

  b['X Euler Angle'] = b['X Euler Angle'].apply(remove_first)
  b['Z Euler Angle'] = b['Z Euler Angle'].apply(remove_last)

  b['X_A'] = b['X Euler Angle']
  b['Y_A'] = b['Y Euler Angle']
  b['Z_A'] = b['Z Euler Angle']

  b['X_A_Rev'] = b['X Euler Angle'].apply(revalue)
  b['Y_A_Rev'] = b['Y Euler Angle'].apply(revalue)
  b['Z_A_Rev'] = b['Z Euler Angle'].apply(revalue)

  b['X_A_Rev_Diff'] = abs(b['X_A_Rev'].diff())
  b['Y_A_Rev_Diff'] = abs(b['Y_A_Rev'].diff())
  b['Z_A_Rev_Diff'] = abs(b['Z_A_Rev'].diff())

  # Removing all rows that say Mission Complete
  label_to_remove = 'Mission complete'
  b = b[b['Target_Name'] != label_to_remove]

  b.fillna(0, inplace=True)

  b_df = b[['Target_Name', 'X', 'Z', 'X_A', 'X_A_Rev', 'X_A_Rev_Diff', 'Y_A', 'Y_A_Rev', 'Y_A_Rev_Diff', 'Z_A', 'Z_A_Rev', 'Z_A_Rev_Diff', 'Time', 'Time_Diff']].copy()
  
  return b_df 

In [22]:
data = process_data('/Users/yasminebassil/Desktop/Data/BNC01/Saved_data_BNC01_t1.csv')
auto = data[data['Target_Name'] == 'Automobile shop']

pos_grouped = auto.groupby(['X', 'Z'])

last_times = pos_grouped['Time'].apply(lambda x: x.iloc[-1])

mean_time_spent = last_times.mean()

In [5]:
def get_ct(data):
    
    grouped = data.groupby('Target_Name')
    results = pd.DataFrame(columns=['Target_Name', 'Total_Time'])

    # Iterate through each group
    for label, group_data in grouped:
        sum = group_data['Time_Diff'].sum()

        # Store in a df with this iteration's results
        iteration_df = pd.DataFrame({'Target_Name': [label], 'Total_Time': [sum]})

        # Add to total results df
        results = pd.concat([results, iteration_df], ignore_index=True)
    
    results = results.set_index('Target_Name', drop=True)
    return(results)

In [6]:
def get_ori_ct(data):
    
    grouped = data.groupby('Target_Name')
    results = pd.DataFrame(columns=['Target_Name', 'Orientation_Time'])

    # Iterate through each group
    for label, group_data in grouped:
        # Initialize a count variable
        count = 0

        # Iterate through the DataFrame
        for index, row in group_data.iterrows():
            if row['X'] == 0 and row['Z'] == -4.1:
                count += row['Time_Diff']

        # Store in a df with this iteration's results
        iteration_df = pd.DataFrame({'Target_Name': [label], 'Orientation_Time': [count]})

        # Add to total results df
        results = pd.concat([results, iteration_df], ignore_index=True)
    
    results = results.set_index('Target_Name', drop=True)
    return(results)


In [7]:
def get_nav_ct(data):
    
    grouped = data.groupby('Target_Name')
    results = pd.DataFrame(columns=['Target_Name', 'Navigation_Time'])

    # Iterate through each group
    for label, group_data in grouped:
        # Initialize a count variable
        count = 0

        # Iterate through the DataFrame
        for index, row in group_data.iterrows():
            if row['X'] == 0 and row['Z'] == -4.1:
                continue
            else:
                count += row['Time_Diff']

        # Store in a df with this iteration's results
        iteration_df = pd.DataFrame({'Target_Name': [label], 'Navigation_Time': [count]})

        # Add to total results df
        results = pd.concat([results, iteration_df], ignore_index=True)
    
    results = results.set_index('Target_Name', drop=True)
    return(results)

In [8]:
def get_dt(data):
  
  # Selecting columns
  df = data.drop_duplicates(subset = ['X','Z','Target_Name'],keep = ('first'))

  # Creating columns that we need
  df['X_Diff'] = df['X'].diff()
  df['Z_Diff'] = df['Z'].diff()
  df = df[['Target_Name', 'X_Diff', 'Z_Diff','Time_Diff']].copy()

  df['Distance'] = np.sqrt((df['X_Diff']**2) + (df['Z_Diff']**2))
  dt = df.groupby(['Target_Name'])['Distance'].sum()

  dt = pd.DataFrame(dt)

  return(dt)

In [9]:
def get_dwell(data):
    # Group the DataFrame by 'Label'
    grouped = data.groupby('Target_Name')
            
    results = pd.DataFrame(columns=['Target_Name', 'Mean_Dwell'])

    # Iterate through each group
    for label, group_data in grouped:

        # Group the DataFrame by 'X' and 'Y' coordinates
        pos_grouped = group_data.groupby(['X', 'Z'])

        last_times = pos_grouped['Time'].apply(lambda x: x.iloc[-1])
        mean_time_spent = last_times.mean()

        # Store in a df with this iteration's results
        iteration_df = pd.DataFrame({'Target_Name': [label], 'Mean_Dwell': [mean_time_spent]})

        # Add to total results df
        results = pd.concat([results, iteration_df], ignore_index=True)

    results = results.set_index('Target_Name', drop=True)
    return(results)

In [10]:
def get_hmvar(data):
    
    target_names = set(data['Target_Name'])

    # Column name for which you want to calculate the sum
    x_name = 'X_A_Rev_Diff'
    y_name = 'Y_A_Rev_Diff'
    z_name = 'Z_A_Rev_Diff'
    time_name = 'Time_Diff'

    # Initialize a dictionary to store the sum for each label
    label_xsums = {}
    label_ysums = {}
    label_zsums = {}
    time_sums = {}

    # Iterate through the DataFrame
    for index, row in data.iterrows():
        label = row['Target_Name']
        value_x = row[x_name]
        value_y = row[y_name]
        value_z = row[z_name]
        time = row[time_name]
        
        # Add the value to the sum for the corresponding label
        if label in label_xsums:
            label_xsums[label] += abs(value_x)
            label_ysums[label] += abs(value_y)
            label_zsums[label] += abs(value_z)
            time_sums[label] += time
        else:
            label_xsums[label] = abs(value_x)
            label_ysums[label] = abs(value_y)
            label_zsums[label] = abs(value_z)
            time_sums[label] = time

    var_x = {}
    var_y = {}
    var_z = {}

    for key in label_xsums:
        if key in time_sums:
            var_x[key] = label_xsums[key] / time_sums[key]

    for key in label_ysums:
        if key in time_sums:
            var_y[key] = label_ysums[key] / time_sums[key]

    for key in label_zsums:
        if key in time_sums:
            var_z[key] = label_zsums[key] / time_sums[key]

    var_x = pd.DataFrame(var_x.items(), columns=['Target_Name', 'Var_X'])
    var_x = var_x.set_index('Target_Name', drop=True)
    var_y = pd.DataFrame(var_y.items(), columns=['Target_Name', 'Var_Y'])
    var_y = var_y.set_index('Target_Name', drop=True)
    var_z = pd.DataFrame(var_z.items(), columns=['Target_Name', 'Var_Z'])
    var_z = var_z.set_index('Target_Name', drop=True)

    results = pd.concat([var_x, var_y, var_z], axis=1)
  
    return(results)

In [11]:
def get_teleport(data):
    grouped = data.groupby('Target_Name')
    results = pd.DataFrame(columns=['Target_Name', 'Teleportations'])

    # Iterate through each group
    for label, group_data in grouped:
    # Create an empty set to store unique positions
        unique_positions = set()

        # Iterate through the DataFrame
        for index, row in group_data.iterrows():
            x, y = row['X'], row['Z']
            
            # Add the (X, Y) tuple to the set
            unique_positions.add((x, y))

        # Calculate the number of unique positions
        num_unique_positions = len(unique_positions)

        # Store in a df with this iteration's results
        iteration_df = pd.DataFrame({'Target_Name': [label], 'Teleportations': [num_unique_positions]})

        # Add to total results df
        results = pd.concat([results, iteration_df], ignore_index=True)

    results = results.set_index('Target_Name', drop=True)
    return(results)


## Getting All Data

In [12]:
# Setting filepath to all data folders
fp_data = '/Users/yasminebassil/Desktop/Data/'

# Reading & storing demographic information
demographics = pd.read_csv(fp_data + 'demographics.csv')

sub_folders = []
for i in os.listdir(fp_data):
    if os.path.isdir(os.path.join(fp_data,i)) and 'BNC' in i:
        sub_folders.append(i)

num_subjects = len(sub_folders)
sub_folders = sorted(sub_folders)
print(sub_folders)

['BNC01', 'BNC02', 'BNC03', 'BNC04', 'BNC05', 'BNC07', 'BNC08', 'BNC09', 'BNC11', 'BNC12', 'BNC23', 'BNC26', 'BNC27', 'BNC28', 'BNC29', 'BNC30', 'BNC31', 'BNC32', 'BNC33', 'BNC34', 'BNC35', 'BNC36', 'BNC37']


In [13]:
# Iterating through participant folders and conducting analyses
for PID in sub_folders:
    
    # Filepath to each participant folder
    fp_PID = '/Users/yasminebassil/Desktop/Data/' + PID

    for i in range(1,4):
        fp_data = fp_PID +'/Saved_data_'+ PID + '_t' + str(i) + '.csv'

        data = process_data(fp_data)

        ct_results = get_ct(data)
        ori_ct_results = get_ori_ct(data)
        nav_ct_results = get_nav_ct(data)
        dt_results = get_dt(data)
        dwell_results = get_dwell(data)
        var_results = get_hmvar(data)
        teleport_results = get_teleport(data)
        speed_results = dt_results['Distance'] / ct_results['Total_Time']
        speed_results = speed_results.rename('Speed')

        final = pd.concat([ct_results, ori_ct_results, nav_ct_results, dt_results, speed_results, dwell_results, var_results, teleport_results], axis=1)

        # Define the desired order of indices
        new_order = ['Automobile shop', 'Police station ', 'Fire Station', 'Bank', 'Pawn Shop', 'Pizzeria', 'Quattroki Restaurant', 'High School']

        # Reorganize the DataFrame based on the new index order
        final = final.reindex(new_order)
        
        block = f"b{i}"
        final.to_csv(fp_PID + '/' + block + '_results.csv', index=True)
    
    # Filepath to each participant folder
    control = fp_PID + '/Saved_control_data_' + PID + '.csv'

    control = process_data(control)

    ct_results = get_ct(control)
    ori_ct_results = get_ori_ct(control)
    nav_ct_results = get_nav_ct(control)
    dt_results = get_dt(control)
    dwell_results = get_dwell(control)
    var_results = get_hmvar(control)
    teleport_results = get_teleport(control)
    speed_results = dt_results['Distance'] / ct_results['Total_Time']
    speed_results = speed_results.rename('Speed')

    control_final = pd.concat([ct_results, ori_ct_results, nav_ct_results, dt_results, speed_results, dwell_results, var_results, teleport_results], axis=1)

    # Define the desired order of indices
    new_order = ['Automobile shop', 'Police station ', 'Fire Station', 'Bank', 'Pawn Shop', 'Pizzeria', 'Quattroki Restaurant', 'High School']

    # Reorganize the DataFrame based on the new index order
    control_final = control_final.reindex(new_order)

    # Save results
    control_final.to_csv(fp_PID + '/control_final_results.csv', index=True)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['X_Diff'] = df['X'].diff()
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['Z_Diff'] = df['Z'].diff()
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['X_Diff'] = df['X'].diff()
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value 