# PathFinding Data PreProcessing
* find subject data
* find experiment and eyetracking files
* combine experiment files for single subject to consolidate
* combine eyetracking files for single subject and pre-process
* performance analysis of PathFinding experiments

## Configuration

In [1]:
# General configuration
import os

# data_directory: str
#     Path to a directory to store data.
data_directory = '.'

# install_missing_packages: bool
#     A flag indicating if missing packages should be automatically installed
install_missing_packages = True

# use_conda: bool
#     A flag indicating if conda should be used for software installation.
#     If False, pip will be used. The default is to use conda if jupyter
#     is run in a conda environment.
use_conda = 'CONDA_EXE' in os.environ

## Checking for missing packages

In [2]:
import importlib

def check_package(package, pip_pkg: str = None, conda_pkg: str = None):
    """Check if a given package is installed. If missing install
    it (if global flag `install_missing_packages` is True) either with
    pip or with conda (depending on `use_conda`).
    """
    if importlib.util.find_spec(package) is not None:
        return  # ok, package is already installed

    if not install_missing_packages:
        raise RuntimeError(f"{package} is not installed!")

    if use_conda:
        import conda.cli
        conda.cli.main('conda', 'install',  '-y', conda_pkg or package)
    else:
        import subprocess
        import sys            
        subprocess.check_call([sys.executable, '-m', 'pip', 'install', pip_pkg or package])
        
# This is to exit cells without error tracebacks (cosmetic purpose)
class StopExecution(Exception):
    def _render_traceback_(self):
        pass

## Creating the required environment (skip if already done)

Running the following cell will create a file graphs.yml that can be used to setup a conda environment containing the required packages. If you already downloaded the file from my GitHub, skip the next cell and create the env directly from it.

In [3]:
%%writefile graphs.yml
name: graphs
channels:
  - conda-forge
  - defaults
dependencies:
  - python=3.6
  - jupyter
  - imageio
  - imageio-ffmpeg
  - matplotlib
  - scikit-image
  - opencv
  - networkx
  - pandas
  - statsmodels

Writing graphs.yml


### Environment Creation
To create the environment, open the terminal, go to the directory where you stored the graphs.yml file (the directory of the notebook) and type
conda env create -f graphs.yml
After running this command you have to activate the environment (Linux/MacOS: conda activate graphs, Windows: activate graphs) and then reopen the notebook in that environment.

## Main Part

### Imports and directory information

In [1]:
import os
import cv2
import json
import numpy as np
import re
import matplotlib.pyplot as plt
import pandas as pd
import networkx as nx
import glob
import scipy.cluster.vq as clusters
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)

#from sklearn.preprocessing import normalize
from pandas.plotting import autocorrelation_plot as AC_plot 
from statsmodels.graphics import tsaplots
from statsmodels.tsa.stattools import acf
from skimage.filters import gaussian
from mpl_toolkits.mplot3d import Axes3D 
from matplotlib.colors import LinearSegmentedColormap
import time
import math

In [2]:
OG_DATA_PATH = './'

condition = 'Dyadic' # Single, Dyadic, SingleC
DATA_PATH = './Data {}/'.format(condition)
LAB_DATA_PATH = "D:/StefanSpaRe/DataBackUp/seahavenlab/"
LAB_DATA_PATH += condition.lower() + '/';
DATA_PATH = LAB_DATA_PATH # Comment out when not in lab

# print(LAB_DATA_PATH);
RESSOURCES_PATH = './Ressources/'
#houselist 
house_file = RESSOURCES_PATH + 'building_collider_list.csv'
try:
    houselist = pd.read_csv(house_file)
except:
    print('HouseList could not be loaded!')

PROCESSED_DATA_PATH = './Results/' + condition + '/'
PROCESSED_DATA_FOLDER = sorted([f for f in os.listdir(PROCESSED_DATA_PATH) if not f.startswith('.')], key=str.lower)

# Extracting all subject IDs from the data folder
# Getting the Folder without hidden files in ascending order 
DATA_FOLDER = sorted([f for f in os.listdir(DATA_PATH) if not f.startswith('.')], key=str.lower)

subIDs = []
for sub in DATA_FOLDER:
    if sub[0].isdigit():
        subIDs.append(int(sub[0:4]))
    else:
        pass
subIDs = np.unique(subIDs)


#subIDs = [1023] # remove to do for all subIDs
print(subIDs)

[1004 1005 1008 1011 1013 1017 1018 1021 1022 1023 1054 1055 1057 1058
 1069 1074]


In [4]:
# Store Time limits for each path condition
path_time_limits_A = [300, 305, 234, 332, 268, 394, 152, 383, 332];
target_house_names_A = ['windmill', 'book', 'shark', 'bear', 'no graffiti', \
                        'banque strechnitz', 'foxes', 'johny patisserie', \
                       'gorilla', 'tailor']

target_house_names_B = target_house_names_A.copy();
target_house_names_B.reverse();
path_time_limits_B = path_time_limits_A.copy();
path_time_limits_B.reverse();

path_time_limits_A = [15] + path_time_limits_A
path_time_limits_B = [15] + path_time_limits_B


building_coords = {}

building_coords["windmill"] = (300.0, -190.7) # TB10 not so straigtforward path
building_coords["book"] = (-142.43, -62.66) # TB41 not so straigtforward path
building_coords["shark"] = (-69.552, 66.1) # TB39 straightforward path
building_coords["bear"] = (-192.46, 173.510) # TB7 not so straightforward path
building_coords["no graffiti"] = (-262.52, -115.752) # B186 mostly sraightforward
building_coords["banque strechnitz"] = (32.923, -158.66) # TB29 not so straightforward
building_coords["foxes"] = (255.062, 18.984) # TB1 mostly straightforward
building_coords["johny patisserie"] = (375.813, -120.97) # TB9  mostly straightforward
building_coords["gorilla"] = (45.867, 122.198) # TB4 not so straightforward path
building_coords["tailor"] = (-142.43, -62.66) # TB53

path_dists_A = []

for i in range(len(target_house_names_A)-1):
    h1_coord = np.array(building_coords[target_house_names_A[i]])
    h2_coord = np.array(building_coords[target_house_names_A[i + 1]])
    print(target_house_names_A[i], target_house_names_A[i + 1], np.linalg.norm(h1_coord - h2_coord))
    path_dists_A.append(np.linalg.norm(h1_coord - h2_coord))

path_dists_B = path_dists_A.copy();
path_dists_B.reverse();

windmill book 460.5850046408372
book shark 147.9538457898273
shark bear 163.2277077091999
bear no graffiti 297.6254495905886
no graffiti banque strechnitz 298.54256432374933
banque strechnitz foxes 284.4347483290324
foxes johny patisserie 184.84568189979444
johny patisserie gorilla 409.8719789641639
gorilla tailor 263.87163616614805


In [3]:
import random



# Leader : follower map
dyadic_pair_map = {
    '1021' : '1022',
    '1023' : '1013',
    '1005' : '1055',
    '1074' : '1069',
    '1008' : '1058',
    '1054' : '1004',
    '1011' : '1017',
    '1018' : '1057'
}

# Generate random session IDs to group leader and follower from same session easily in analysis
dyadic_session_ids = {}
for leader in dyadic_pair_map.keys():
    _id = random.randint(1000,9999);
    dyadic_session_ids[leader] = _id;
    dyadic_session_ids[dyadic_pair_map[leader]] = _id;
    
print(dyadic_session_ids);

{'1021': 6904, '1022': 6904, '1023': 8049, '1013': 8049, '1005': 3709, '1055': 3709, '1074': 8351, '1069': 8351, '1008': 9829, '1058': 9829, '1054': 1768, '1004': 1768, '1011': 4920, '1017': 4920, '1018': 7882, '1057': 7882}


<img src='Pics/paths.png'>

# Performance Analysis

## Calculating Distances for Each Path from Eyetracking File

In [11]:
# Function to take subject ID and get distance walked for each path from subject's eyetracking files

def get_covered_distances_2(subID, condition):
    # TODO: have to think about no-hits for this?
#     return [0] * 10;
    
    # Get the consolidated hitpoints file which has HMD position data for each hitpoint
    # If hitpoint file not found, return 0 distance for all paths
    PROCESSED_DATA_PATH = './Results/' + condition + '/'
    try:
        filename = PROCESSED_DATA_PATH + str(subID) + '_' + condition +"_CompleteHitpoints.csv";
        data = pd.read_csv(filename);
    except:
        print('Could not read ' + filename);
        return[0] * 10;

    # Column names for intermediate DF and final DF respectively
    condense_col_names = ['PathNumber', 'xsqDiff', 'zsqDiff', 'eucDist']
    covered_dist_col_names = ['eucDist'];
    
    # Remove all second ordinal hits (HMD position is NA in consolidated hitpoint file)
    data = data.dropna(subset=['hmdPosition.x','hmdPosition.y','hmdPosition.z']);
    data = data.reset_index(drop=True)
    data['hmdPosition.z'] = data['hmdPosition.z'].astype(float);
    
    test_2df = data.copy()
    # Create intermediate and Final DF respectively
    condense_df = pd.DataFrame(index=range(len(data)), columns=condense_col_names)
    cov_dist_df = pd.DataFrame(index=range(10), columns=covered_dist_col_names);
    
    # Transfer over path number as is
    condense_df.PathNumber = data.PathNumber
    
    # Get the difference in the X- and Z-coordinates between each consecutive point 
    # Y-coordinate corresponds to HMD height and is ignored for map-distance covered
    condense_df.xsqDiff = data['hmdPosition.x'] - data['hmdPosition.x'].shift();
    condense_df.zsqDiff = data['hmdPosition.z'] - data['hmdPosition.z'].shift();
    
    test_df = condense_df.copy()
    
    # Square the x- and z- differences between each consecutive point
    condense_df.xsqDiff = condense_df.xsqDiff ** 2;
    condense_df.zsqDiff = condense_df.zsqDiff ** 2;
 
    # add the squared distances and take square root to get euclidean distance
    #    between each consecutive point
    condense_df.eucDist = (condense_df.xsqDiff + condense_df.zsqDiff) ** 0.5;
    
    # sum up the total euclidean distance grouped by path number to get distance 
    #    covered for each path
    cov_dist_df['eucDist'] = condense_df.groupby(['PathNumber'])['eucDist'].sum()
    
    # Return distances as list
    return list(cov_dist_df.eucDist)

get_covered_distances_2('1023', 'Single')


[20.246097486375024,
 1319.0949930328525,
 300.2472684556124,
 172.2245698054245,
 1919.3240861635531,
 346.1269282883433,
 520.6334534541306,
 214.3287610941289,
 489.09540160587665,
 442.6263057041419]

In [5]:
# Function to take subject ID and get distance walked for each path from subject's eyetracking files
# This function separates the path data before calculating distance between points, thereby removing 
# calculation of differences between two data points which belong to different paths (distance between red zone and 
#  subsequent green zone)
def get_covered_distances_3(subID, condition):
    # TODO: have to think about no-hits for this?
#     return [0] * 10;
    
    # Get the consolidated hitpoints file which has HMD position data for each hitpoint
    # If hitpoint file not found, return 0 distance for all paths
    PROCESSED_DATA_PATH = './Results/' + condition + '/'
    try:
        filename = PROCESSED_DATA_PATH + str(subID) + '_' + condition +"_CompleteHitpoints.csv";
        data = pd.read_csv(filename);
    except:
        print('Could not read ' + filename);
        return[0] * 10;

    # Column names for intermediate DF and final DF respectively
    condense_col_names = ['PathNumber', 'xsqDiff', 'zsqDiff', 'eucDist']
    covered_dist_col_names = ['eucDist'];
    
    # Remove all second ordinal hits (HMD position is NA in consolidated hitpoint file)
    data = data.dropna(subset=['hmdPosition.x','hmdPosition.y','hmdPosition.z']);
    data = data.reset_index(drop=True)
    data['hmdPosition.z'] = data['hmdPosition.z'].astype(float);
    
    test_2df = data.copy()
    # Create intermediate and Final DF respectively
    condense_df = pd.DataFrame(columns=condense_col_names)
    cov_dist_df = pd.DataFrame(index=range(10), columns=covered_dist_col_names);
    
    
    # Get the difference in the X- and Z-coordinates between each consecutive point 
    # Y-coordinate corresponds to HMD height and is ignored for map-distance covered
    #print(data[data.PathNumber==1])
    for i in range(10):
        
        filtered_df = data[data.PathNumber==i].reset_index();
        temp_df = pd.DataFrame(index=range(len(filtered_df)), columns=condense_col_names)
        temp_df.PathNumber = filtered_df.PathNumber
        temp_df.xsqDiff = filtered_df['hmdPosition.x'] - filtered_df['hmdPosition.x'].shift();
        temp_df.zsqDiff = filtered_df['hmdPosition.z'] - filtered_df['hmdPosition.z'].shift();
        temp_df = temp_df.dropna(subset=["xsqDiff", "zsqDiff"])
        condense_df = condense_df.append(temp_df, ignore_index=True)
    
    test_df = condense_df.copy()
    
    # Square the x- and z- differences between each consecutive point
    condense_df.xsqDiff = condense_df.xsqDiff ** 2;
    condense_df.zsqDiff = condense_df.zsqDiff ** 2;
 
    # add the squared distances and take square root to get euclidean distance
    #    between each consecutive point
    condense_df.eucDist = (condense_df.xsqDiff + condense_df.zsqDiff) ** 0.5;
    
    # sum up the total euclidean distance grouped by path number to get distance 
    #    covered for each path
    cov_dist_df['eucDist'] = condense_df.groupby(['PathNumber'])['eucDist'].sum()
    
    # Return distances as list
    return list(cov_dist_df.eucDist)

get_covered_distances_3('1023', 'Single')


[20.246097486375024,
 1313.6518207494814,
 296.1762350141909,
 168.4622204448431,
 1916.1847935678902,
 342.51937901707055,
 514.9431120922344,
 208.6479978623132,
 486.3522752343006,
 437.92203958766544]

In [5]:
# Function to take subject ID and get distance walked for each path from subject's eyetracking files
# This function separates the path data before calculating distance between points, thereby removing 
# calculation of differences between two data points which belong to different paths (distance between red zone and 
#  subsequent green zone)
# This function also returns the distance covered after the guide appeared for the participant, should the participant
#   have failed a certain path
def get_covered_distances(subID, condition, path_time_limits):
    
    # Get the consolidated hitpoints file which has HMD position data for each hitpoint
    # If hitpoint file not found, return 0 distance for all paths
    PROCESSED_DATA_PATH = './Results/' + condition + '/'
    try:
        filename = PROCESSED_DATA_PATH + str(subID) + '_' + condition +"_CompleteHitpoints.csv";
        data = pd.read_csv(filename);
    except:
        print('Could not read ' + filename);
        return[0] * 10;

    # Column names for intermediate DF and final DF respectively
    condense_col_names = ['PathNumber', 'xsqDiff', 'zsqDiff', 'eucDist']
    covered_dist_col_names = ['eucDist'];
    
    # Remove all second ordinal hits (HMD position is NA in consolidated hitpoint file)
    data = data.dropna(subset=['hmdPosition.x','hmdPosition.y','hmdPosition.z']);
    data = data.reset_index(drop=True)
    data['hmdPosition.z'] = data['hmdPosition.z'].astype(float);
    
    test_2df = data.copy()
    # Create intermediate and Final DF respectively
    condense_df = pd.DataFrame(columns=condense_col_names)
    late_df = pd.DataFrame(columns=condense_col_names)
    cov_dist_df = pd.DataFrame(index=range(10), columns=covered_dist_col_names);
    late_dist_df = pd.DataFrame(index=range(10), columns=covered_dist_col_names);
    
    
    # Get the difference in the X- and Z-coordinates between each consecutive point 
    # Y-coordinate corresponds to HMD height and is ignored for map-distance covered
    #print(data[data.PathNumber==1])
    for i in range(10):
        
        filtered_df = data[data.PathNumber==i].reset_index();
        
        temp_df = pd.DataFrame(index=range(len(filtered_df)), columns=condense_col_names)
        temp_df.PathNumber = filtered_df.PathNumber
        temp_df.xsqDiff = filtered_df['hmdPosition.x'] - filtered_df['hmdPosition.x'].shift();
        temp_df.zsqDiff = filtered_df['hmdPosition.z'] - filtered_df['hmdPosition.z'].shift();
        temp_df = temp_df.dropna(subset=["xsqDiff", "zsqDiff"])
        condense_df = condense_df.append(temp_df, ignore_index=True)
        
        # Calculate distance covered (approximately) after guide appeared
        start_time = filtered_df["timeStampDataPointStart"][0];
        
        
        late_path_df = filtered_df[filtered_df["timeStampDataPointStart"] - start_time > path_time_limits[i]];
        late_path_df = late_path_df.reset_index(drop=True)
        
        # If path time limit is not überschritten worden
        if len(late_path_df) == 0: 
            late_df = late_df.append({ "PathNumber" : i, "xsqDiff" : 0, "zsqDiff" : 0}, ignore_index=True);
            continue; 
            
        late_temp_df = pd.DataFrame(index=range(len(late_path_df)), columns=condense_col_names)
        late_temp_df.PathNumber = late_path_df.PathNumber
        late_temp_df.xsqDiff = late_path_df['hmdPosition.x'] - late_path_df['hmdPosition.x'].shift();
        late_temp_df.zsqDiff = late_path_df['hmdPosition.z'] - late_path_df['hmdPosition.z'].shift();
        late_temp_df = late_temp_df.dropna(subset=["xsqDiff", "zsqDiff"])
        late_df = late_df.append(late_temp_df, ignore_index=True)
    
    # Square the x- and z- differences between each consecutive point
    condense_df.xsqDiff = condense_df.xsqDiff ** 2;
    condense_df.zsqDiff = condense_df.zsqDiff ** 2;
    
    late_df.xsqDiff = late_df.xsqDiff ** 2;
    late_df.zsqDiff = late_df.zsqDiff ** 2;
 
    # add the squared distances and take square root to get euclidean distance
    #    between each consecutive point
    condense_df.eucDist = (condense_df.xsqDiff + condense_df.zsqDiff) ** 0.5;
    late_df.eucDist = (late_df.xsqDiff + late_df.zsqDiff) ** 0.5;
    
    # sum up the total euclidean distance grouped by path number to get distance 
    #    covered for each path
    cov_dist_df['eucDist'] = condense_df.groupby(['PathNumber'])['eucDist'].sum()
    late_dist_df['eucDist'] = late_df.groupby(['PathNumber'])['eucDist'].sum()
    
    # Return distances as list
    return list(cov_dist_df.eucDist), list(late_dist_df.eucDist)

get_covered_distances('1004', 'Single', path_time_limits_B)


([23.21802271614962,
  1755.0379098316687,
  1966.572535284299,
  1081.5331969032616,
  552.5488132809445,
  1209.9983231336776,
  2052.430426263223,
  173.840104625947,
  1770.585235977393,
  496.3669776068633],
 [19.805844963532945,
  478.15989301341455,
  506.2488888805683,
  431.41465521512816,
  0.0,
  123.50999041026809,
  523.8054285726294,
  0.0,
  409.925404333586,
  0.0])

## Condensing Each Subject Experiment Files into Performance Table on Each Path

In [6]:
# Columns for consolidated data from all experiment files
pA_col_names = ['SubjectID', 'Condition', 'Dyadic?', 'Leader?', 'SessionID'];
for i in range(1,10):
    path_prefix = "P"+str(i);
    
    pA_col_names.append(path_prefix + ":Success")
    pA_col_names.append(path_prefix + ":TimeSec")
    pA_col_names.append(path_prefix + ":Time After Guide")
    pA_col_names.append(path_prefix + ":Distance Covered")
    pA_col_names.append(path_prefix + ":Dist After Fail")

pA_col_names.append("#Success");


In [7]:
# Create analysis data frame
perf_anal_df = pd.DataFrame(columns=pA_col_names);

# Loop through all subjects
for subject in subIDs:
    
    
    
    # Read the experiment data for the subject
    sub_expt_data = {}
    sub_expt_file_name = PROCESSED_DATA_PATH + str(subject) + "_"+condition+"Pathfinding_Final.json";
    try:
        with open(sub_expt_file_name, 'r') as fp:
            sub_expt_data = json.load(fp);
    except:
        print("Could not read experiment file for subject " + str(subject));
        continue;
    
    print("Running for subject " + str(subject))
    # Create the initial dictionary to store the experiment performance information for current subject 
    sub_per_data = {'SubjectID' : sub_expt_data['ParticipantID'],
                    'Condition' : 'B' if sub_expt_data['PathsReversed'] else 'A',
                    'Dyadic?' : sub_expt_data['IsDyadic'],
                    'Leader?' : sub_expt_data['IsLeader']
                   };
    
    # Store randomly generated (see cell somewher above) session ID if dyadic
    if sub_per_data['Dyadic?']: 
        sub_per_data['SessionID'] = dyadic_session_ids[sub_per_data['SubjectID']];
    else:
        sub_per_data['SessionID'] = 'NaN';
    
    # Get the path time limits for the current condition
    path_time_limits = path_time_limits_B if sub_expt_data['PathsReversed'] else path_time_limits_A;
    num_success = 0;
    
    # Get distance covered per path for given subject
    covered_distances, late_distances = get_covered_distances(subject, condition, path_time_limits);
    
    # for each path (ignoring the tutorial path 0)
    for i in range(1,10):
        # Get the trial data from the experiment file for the current path
        current_trial_data = sub_expt_data['TrialData'][i];
        
        # Get the time for the path by subtracting start time from end time
        sub_per_data["P"+str(i)+":TimeSec"] = current_trial_data['EndMachineTimeStamp'] \
                                                - current_trial_data['StartMachineTimeStamp'];
        
        # Path was a success for subject if they did not use the path guide
        sub_per_data["P"+str(i)+":Success"] = not current_trial_data['GuideUsed'];
        
        # Count number of successes
        if sub_per_data["P"+str(i)+":Success"]: 
            num_success += 1;
        
        # Get the time after the guide appeared
        sub_per_data["P"+str(i)+":Time After Guide"] = sub_per_data["P"+str(i)+":TimeSec"] - path_time_limits[i];
        
        # Store the distance covered for the path
        sub_per_data["P"+str(i)+":Distance Covered"] = covered_distances[i];
        
        sub_per_data["P"+str(i)+":Dist After Fail"] = late_distances[i];
        
        
    
    # Store the number of successes
    sub_per_data['#Success'] = num_success;
    
    # Normalize collected data and append to dataframe
    sub_df = pd.json_normalize(sub_per_data);
    perf_anal_df = perf_anal_df.append(sub_df);

print('Analysis Complete');

Running for subject 1004
Running for subject 1005
Running for subject 1008
Running for subject 1011
Running for subject 1013
Running for subject 1017
Running for subject 1018
Running for subject 1021
Running for subject 1022
Running for subject 1023
Running for subject 1054
Running for subject 1055
Running for subject 1057
Running for subject 1058
Running for subject 1069
Running for subject 1074
Analysis Complete


In [122]:
# perf_anal_df

In [92]:
# Save to Single
perf_anal_df.to_csv(PROCESSED_DATA_PATH + 'Single_Performance_Analysis.csv', index=False);


In [8]:
# Save to Dyadic
perf_anal_df.to_csv(PROCESSED_DATA_PATH + 'Dyadic_Performance_Analysis.csv', index=False);


In [46]:
# Save to SingleC
perf_anal_df.to_csv(PROCESSED_DATA_PATH + 'SingleC_Performance_Analysis.csv', index=False);


In [3]:
# Display Collected Single Experiment Data
anal = pd.read_csv('./Results/Single/Single_Performance_Analysis.csv');
anal

Unnamed: 0,SubjectID,Condition,Dyadic?,Leader?,SessionID,P1:Success,P1:TimeSec,P1:Time After Guide,P1:Distance Covered,P1:Dist After Fail,...,P8:TimeSec,P8:Time After Guide,P8:Distance Covered,P8:Dist After Fail,P9:Success,P9:TimeSec,P9:Time After Guide,P9:Distance Covered,P9:Dist After Fail,#Success
0,1004,B,False,False,,False,449.037666,117.037666,1755.03791,478.159893,...,390.436826,85.436826,1770.585236,409.925404,True,113.507971,-186.492029,496.366978,0.0,3
1,1005,A,False,False,,True,158.458649,-141.541351,470.272979,0.0,...,182.991541,-200.008459,713.965772,0.0,False,485.03952,153.03952,1822.081722,721.341452,6
2,1008,B,False,False,,True,89.310862,-242.689138,442.416783,0.0,...,90.587071,-214.412929,445.694201,0.0,True,106.655506,-193.344494,529.413491,0.0,8
3,1010,B,False,False,,False,602.11131,270.11131,1660.381992,48.622561,...,162.011336,-142.988664,766.886526,0.0,True,196.642525,-103.357475,935.244486,0.0,7
4,1011,B,False,False,,True,102.925068,-229.074932,474.958253,0.0,...,368.540978,63.540978,1647.21178,315.342526,True,217.615655,-82.384345,980.786808,0.0,7
5,1013,A,False,False,,True,125.361833,-174.638167,561.43704,0.0,...,136.801561,-246.198439,638.763385,0.0,False,438.497586,106.497586,1847.357822,497.463189,8
6,1017,B,False,False,,True,261.689323,-70.310677,1154.83516,0.0,...,432.588424,127.588424,1944.986397,557.092471,True,289.393871,-10.606129,1384.185154,0.0,4
7,1018,B,False,False,,True,165.25444,-166.74556,749.457232,0.0,...,407.066795,102.066795,1995.233412,510.7769,True,273.094684,-26.905316,1320.094456,0.0,7
8,1019,A,False,False,,True,100.74939,-199.25061,484.189099,0.0,...,114.624974,-268.375026,564.016549,0.0,False,470.427692,138.427692,2318.70726,692.95219,7
9,1021,A,False,False,,False,398.568231,98.568231,1851.263347,486.417396,...,118.187025,-264.812975,571.066503,0.0,False,447.441651,115.441651,2196.429637,567.590842,6


In [4]:
import pandas as pd
# Display Collected Dyadic Experiment Data
anal = pd.read_csv('./Results/Dyadic/Dyadic_Performance_Analysis.csv');
anal[anal["Leader?"]]

Unnamed: 0,SubjectID,Condition,Dyadic?,Leader?,SessionID,P1:Success,P1:TimeSec,P1:Time After Guide,P1:Distance Covered,P1:Dist After Fail,...,P8:TimeSec,P8:Time After Guide,P8:Distance Covered,P8:Dist After Fail,P9:Success,P9:TimeSec,P9:Time After Guide,P9:Distance Covered,P9:Dist After Fail,#Success
1,1005,B,True,True,3709,True,159.901736,-172.098264,468.431516,0.0,...,105.293208,-199.706792,325.758144,0.0,True,173.905779,-126.094221,486.779248,0.0,8
2,1008,A,True,True,9829,True,117.755503,-182.244497,580.952496,0.0,...,117.429136,-265.570864,580.853947,0.0,True,258.906364,-73.093636,1279.019076,0.0,8
3,1011,A,True,True,4920,True,134.858309,-165.141691,606.083715,0.0,...,119.176291,-263.823709,577.076636,0.0,True,290.754437,-41.245563,1430.229198,0.0,7
6,1018,A,True,True,7882,True,263.787934,-36.212066,1241.986397,0.0,...,332.989794,-50.010206,1624.277866,0.0,True,111.840953,-220.159047,492.752037,0.0,9
7,1021,B,True,True,6904,True,101.885863,-230.114137,470.318873,0.0,...,62.674078,-242.325922,300.016425,0.0,True,100.308087,-199.691913,491.61978,0.0,9
9,1023,B,True,True,8049,True,96.119958,-235.880042,462.392495,0.0,...,103.161679,-201.838321,506.234923,0.0,True,95.697365,-204.302635,472.818372,0.0,9
10,1054,A,True,True,1768,True,188.245622,-111.754378,781.676181,0.0,...,107.084548,-275.915452,508.893059,0.0,True,116.14251,-215.85749,537.107423,0.0,8
15,1074,B,True,True,8351,True,148.087081,-183.912919,656.698526,0.0,...,166.638451,-138.361549,763.259155,0.0,True,145.070412,-154.929588,651.380744,0.0,9


In [5]:
# Display Collected Single Control Experiment Data
anal = pd.read_csv('./Results/SingleC/SingleC_Performance_Analysis.csv');
anal

Unnamed: 0,SubjectID,Condition,Dyadic?,Leader?,SessionID,P1:Success,P1:TimeSec,P1:Time After Guide,P1:Distance Covered,P1:Dist After Fail,...,P8:TimeSec,P8:Time After Guide,P8:Distance Covered,P8:Dist After Fail,P9:Success,P9:TimeSec,P9:Time After Guide,P9:Distance Covered,P9:Dist After Fail,#Success
0,1010,A,False,False,,True,103.283452,-196.716548,476.096039,0.0,...,149.935175,-233.064825,641.026161,0.0,False,349.954884,17.954884,1619.421569,91.207877,7
1,1019,B,False,False,,True,121.585427,-210.414573,588.410275,0.0,...,62.756872,-242.243128,304.284589,0.0,True,126.389681,-173.610319,626.570717,0.0,9
2,1056,B,False,False,,True,99.185621,-232.814379,474.406991,0.0,...,376.036466,71.036466,1713.066832,356.673271,True,101.787637,-198.212363,490.77452,0.0,7
3,1068,A,False,False,,True,129.268349,-170.731651,574.518955,0.0,...,117.311292,-265.688708,553.495537,0.0,False,474.19772,142.19772,2198.983763,698.27376,7
4,1072,A,False,False,,True,106.815908,-193.184092,528.177114,0.0,...,113.280268,-269.719732,562.24821,0.0,True,150.568494,-181.431506,670.313338,0.0,8
5,1073,A,False,False,,False,352.014228,52.014228,1698.387879,255.775648,...,175.417123,-207.582877,829.897009,0.0,False,454.078002,122.078002,2196.108475,600.500869,5
6,1075,B,False,False,,True,96.997154,-235.002846,450.548992,0.0,...,58.880095,-246.119905,284.709058,0.0,True,97.31509,-202.68491,476.694398,0.0,9
7,1077,A,False,False,,True,102.892725,-197.107275,480.416954,0.0,...,211.720585,-171.279415,976.365778,0.0,True,100.524325,-231.475675,467.020851,0.0,7
8,1079,B,False,False,,True,294.374525,-37.625475,1409.264233,0.0,...,345.073664,40.073664,1600.080064,195.790279,True,274.738379,-25.261621,1269.599908,0.0,7
9,1080,A,False,False,,True,137.255427,-162.744573,659.431672,0.0,...,125.162962,-257.837038,555.606218,0.0,False,432.332893,100.332893,1953.778391,484.897133,6


## Analysis of Performance per Path

### Collect Path Averages and SDs for Each Condition

In [8]:

# Per Condition, Per Path: Average Time, SD Time, Average Success, Average Time after Fail
# Per Condition Success Average

def get_path_stats(condition1, condition2):
    # Columns for the descriptive statistics that we want to consider
    path_anal_cols = ['PathNum', 'Target', '+ Rate', 'Limit', 'Avg T (+)', 'SD T (+)', 'Avg T (after -)', \
                      'SD T (after -)', 'Avg D (+)', 'SD D (+)', 'PathEucDist', 'Avg D (after -)', 'SD D (after -)', \
                      'Avg S (+)', 'SD S (+)', 'Max S (+)', 'Min S (+)', 'Avg S (after -)', 'SD S (after -)']
    
    target_house_names = target_house_names_A if condition2 == "A" else target_house_names_B
    path_time_limits = path_time_limits_A if condition2 == "A" else path_time_limits_B
    path_dists = path_dists_A if condition2 == "A" else path_dists_B
    
    PROCESSED_DATA_PATH = './Results/' + condition1 + '/'
    # Read single pathfinding performance data
    anal = pd.read_csv(PROCESSED_DATA_PATH + condition1 + '_Performance_Analysis.csv');
    

    # Separate into Conditions A and B
    cond_anal = anal[anal.Condition == condition2]
    
    num_subs = len(cond_anal);
    

    # Get average number of total successes and std across participants for both conditions
    succ_avg = cond_anal['#Success'].mean() 
    anal_succ_std  = cond_anal['#Success'].std()


    # Starting extraction of statistics for condition A

    # Filter information from the dataframe that pertains to path performance (1-9)
    cond_paths = cond_anal.filter(regex="P.*")
    cond_path_stats = pd.DataFrame(columns = path_anal_cols);

    curr_row = {};
    successed = [];
    fails = []
    times = []
    times_after = []

    # Iterate through all the columns containing path information
    # Each path has 5 columns
    num_cols = 5
    for idx, column in enumerate(cond_paths):
        coltype = idx % num_cols;
        # Store the path number and the corresponding time limit
        curr_row['PathNum'] = int(idx / num_cols) + 1;
        curr_row['Target'] = target_house_names[curr_row['PathNum']];
        curr_row['Limit'] = path_time_limits[curr_row['PathNum']]
        curr_row['PathEucDist'] = path_dists[curr_row['PathNum']-1]


        # First column describes path success or not. Calculate success rate.
        if coltype == 0:
            successes = cond_paths[column];
            fails = ~successes;
            curr_row['+ Rate'] = successes.sum() / successes.count();

        # Second column describes time taken. Compute average and SD time for all succesful paths
        elif coltype == 1:
            times = cond_paths[column]
            curr_row['Avg T (+)'] = cond_paths[column][successes].mean();
            curr_row['SD T (+)'] = cond_paths[column][successes].std();

        # Third column describes time taken after path time limit. Compute avg and SD for all failed paths
        elif coltype == 2:
            times_after = cond_paths[column]
            curr_row['Avg T (after -)'] = cond_paths[column][fails].mean();
            curr_row['SD T (after -)'] = cond_paths[column][fails].std();

        # Fourth column describes distance covered. Compute avg and SD for all successes
        elif coltype == 3:
            curr_row['Avg D (+)'] = cond_paths[column][successes].mean();
            curr_row['SD D (+)'] = cond_paths[column][successes].std();

            curr_row['Avg S (+)'] = (cond_paths[column][successes] / times[successes]).mean();
            curr_row['SD S (+)'] = (cond_paths[column][successes] / times[successes]).std();
            
            curr_row['Max S (+)'] = (cond_paths[column][successes] / times[successes]).max();
            curr_row['Min S (+)'] = (cond_paths[column][successes] / times[successes]).min();

            
            
        # Fifth column describes distance covered after failure
        elif coltype == 4:
            curr_row["Avg D (after -)"] = cond_paths[column][fails].mean();
            curr_row["SD D (after -)"] = cond_paths[column][fails].std();
            
            curr_row['Avg S (after -)'] = (cond_paths[column][fails] / times_after[fails]).mean();
            curr_row['SD S (after -)'] = (cond_paths[column][fails] / times_after[fails]).std();
            
            cond_path_stats = cond_path_stats.append(curr_row, ignore_index=True)
            curr_row = {}

    cond_path_stats["Limit"] = cond_path_stats["Limit"].astype(float)
    cond_path_stats["PathNum"] = cond_path_stats["PathNum"].astype(float)
    
    return num_subs, {"avg": succ_avg, "std": anal_succ_std}, cond_path_stats;
    
    
AS_num_subs, AS_succ_stats, AS_path_stats = get_path_stats("Single", "A");
AS_path_stats.to_csv('./Results/Single/A_Path_Stats.csv');

BS_num_subs, BS_succ_stats, BS_path_stats = get_path_stats("Single", "B");
BS_path_stats.to_csv('./Results/Single/B_Path_Stats.csv');

AC_num_subs, AC_succ_stats, AC_path_stats = get_path_stats("SingleC", "A");
AC_path_stats.to_csv('./Results/SingleC/A_Path_Stats.csv');

BC_num_subs, BC_succ_stats, BC_path_stats = get_path_stats("SingleC", "B");
BC_path_stats.to_csv('./Results/SingleC/B_Path_Stats.csv');

AD_num_subs, AD_succ_stats, AD_path_stats = get_path_stats("Dyadic", "A");
AD_path_stats.to_csv('./Results/Dyadic/A_Path_Stats.csv');

BD_num_subs, BD_succ_stats, BD_path_stats = get_path_stats("Dyadic", "B");
BD_path_stats.to_csv('./Results/Dyadic/B_Path_Stats.csv');

print("Done")

Done


In [6]:
# Read

AS_path_stats = pd.read_csv('./Results/Single/A_Path_Stats.csv');

BS_path_stats = pd.read_csv('./Results/Single/B_Path_Stats.csv');

AC_path_stats = pd.read_csv('./Results/SingleC/A_Path_Stats.csv');

BC_path_stats = pd.read_csv('./Results/SingleC/B_Path_Stats.csv');

AD_path_stats = pd.read_csv('./Results/Dyadic/A_Path_Stats.csv');

BD_path_stats = pd.read_csv('./Results/Dyadic/B_Path_Stats.csv');

### Single A vs. Single B

Investigating the effect of change of direction on path performance

<ul>
    <li> 
        Low correlation between success rate and path time limit (0.1 and 0.212)
        <ul>
            <li>
                Path time limit may not have been a good indicator of path difficulty
            </li>
        </ul>
    </li>
    <li> 
        Relatively high positive correlation between avg. distance on successes and path time limit (0.816 and 0.899)
        <ul>
            <li>
                Path time limit may have been a good indicator of path length
            </li>
        </ul>
    </li>
    <li> 
        Single A has overall 19 lesser successes over all paths than single B 
        <ul>
            <li>
                8 of these are accounted for by the last house being different in either condition
            </li>
            <li>
                Remainder 11 likely because of two extra participants in B condition
            </li>
        </ul>
    </li>
    <li> 
        Low to moderate positive correlation between path number and average speed (0.53 and 0.27)
        <ul>
            <li>
                Some suggestion that participants were speeding up over time?
            </li>
        </ul>
    </li>
    <li> 
        Some paths have high deviation in distance covered, whereas others do not 
        <ul>
            <li>
                Banque Strechnitz, Foxes > Johny Patisserie, and the Tailor may have had relatively straightforward paths that all successful participants recognized in condition A
            </li>
            <li>
                Johny Patisserie > Foxes exhibits low deviation in distance covered in B as well
            </li>
        </ul>
    </li>
    <li> 
        Lower success rate correlates with distance from the target: (-0.66 and -0.25)
        <ul>
            <li>
                Suggests that harder paths lead to worse failures
            </li>
        </ul>
    </li>
    <li> 
        Condition A shows negative correlation between avg. distance covered and success: (-0.28) but B doesn't (0.05)
        <ul>
            <li>
                Suggestion that targets that required more walking on average were more difficult
            </li>
        </ul>
    </li>
</ul>

#### Single A Stats

In [17]:
print('Condition A:')
print("\tNumber of subjects: " + str(AS_num_subs))
print('\tAvg Successes: ' + str(AS_succ_stats["avg"]));
print('\tSD Successes: ' + str(AS_succ_stats["std"]));

corr_ = AS_path_stats['+ Rate'].corr(AS_path_stats['Limit']);
print('\nCorrelation between Success and Path Time Limit: ' + str(corr_))
print('\nCorrelation between Avg D and Path Time Limit: ' + str(AS_path_stats['Avg D (+)'].corr(AS_path_stats['Limit'])))
print('\nCorrelation between Avg D and Success: ' + str(AS_path_stats['Avg D (+)'].corr(AS_path_stats['+ Rate'])))
print('\nCorrelation between Avg S and Success: ' + str(AS_path_stats['Avg S (+)'].corr(AS_path_stats['+ Rate'])))
print('\nCorrelation between Avg S and PathNum: ' + str(AS_path_stats['Avg S (+)'].corr(AS_path_stats['PathNum'])))
print('\nCorrelation between Success and Avg D after failure: ' + str(AS_path_stats['+ Rate'].corr(AS_path_stats['Avg D (after -)'])))
print('\nCorrelation between Avg D and Euc Distance between buildings: ' + str(AS_path_stats['Avg D (+)'].corr(AS_path_stats['PathEucDist'])))
print('\nCorrelation between Path Time Limit and Euc Distance between buildings: ' + str(AS_path_stats['Limit'].corr(AS_path_stats['PathEucDist'])))


# Descriptive statistics per path for condition A
AS_path_stats


Condition A:
	Number of subjects: 12
	Avg Successes: 5.666666666666667
	SD Successes: 2.0150945537631877

Correlation between Success and Path Time Limit: -0.10093502226664555

Correlation between Avg D and Path Time Limit: 0.8162445933275249

Correlation between Avg D and Success: -0.2815923195280629

Correlation between Avg S and Success: 0.05441611833482798

Correlation between Avg S and PathNum: 0.5317052672025421

Correlation between Success and Avg D after failure: -0.662582474218787

Correlation between Avg D and Euc Distance between buildings: 0.7175701890959737

Correlation between Path Time Limit and Euc Distance between buildings: 0.49746557586592327


Unnamed: 0,PathNum,Target,+ Rate,Limit,Avg T (+),SD T (+),Avg T (after -),SD T (after -),Avg D (+),SD D (+),PathEucDist,Avg D (after -),SD D (after -),Avg S (+),SD S (+),Max S (+),Min S (+),Avg S (after -),SD S (after -)
0,1.0,book,0.583333,300.0,161.914439,54.236144,100.005741,38.805747,693.41046,298.965982,460.585005,433.144523,200.427157,4.247128,0.68952,4.850474,2.967796,4.365133,0.944474
1,2.0,shark,0.916667,305.0,103.495294,41.733701,60.849056,,444.105754,190.533413,147.953846,194.608491,,4.321187,0.585604,4.688475,2.680841,3.198217,
2,3.0,bear,0.833333,234.0,52.072322,35.897732,93.972566,103.42906,230.757404,162.8521,163.227708,460.52721,518.144234,4.446598,0.41387,4.815188,3.448452,4.733218,0.304259
3,4.0,no graffiti,0.166667,332.0,196.542249,58.004837,120.673807,41.780004,803.89413,264.803545,297.62545,552.726081,195.10093,4.068556,0.146572,4.172198,3.964914,4.639149,0.554686
4,5.0,banque strechnitz,0.5,268.0,79.653422,9.573732,96.395639,65.235597,353.295889,12.240751,298.542564,419.154351,270.293689,4.483926,0.499164,4.860903,3.626136,4.537901,0.615201
5,6.0,foxes,0.833333,394.0,155.129403,71.633193,62.425426,9.019406,681.017139,293.940689,284.434748,310.534263,44.78727,4.452967,0.416271,4.906345,3.628856,4.974577,0.001289
6,7.0,johny patisserie,0.75,152.0,57.555982,15.655423,150.006501,98.169669,224.525669,13.613748,184.845682,621.035775,296.566702,4.101904,0.850133,4.873735,2.226607,4.443928,0.72946
7,8.0,gorilla,0.833333,383.0,159.703784,63.684079,48.994737,56.295677,739.239353,303.403762,409.871979,237.557307,273.978794,4.641856,0.3257,4.920538,3.901633,4.813373,0.061368
8,9.0,tailor,0.25,332.0,98.339506,17.564382,132.52293,77.269232,466.609018,77.798523,263.871636,602.018447,279.809454,4.751275,0.102767,4.869043,4.679776,4.686659,0.335736


#### Single B Stats

In [18]:
print('Condition B:')
print("\tNumber of subjects: " + str(BS_num_subs))
print('\tAvg Successes: ' + str(BS_succ_stats["avg"]));
print('\tSD Successes: ' + str(BS_succ_stats["std"]));

corr_ = BS_path_stats['+ Rate'].corr(BS_path_stats['Limit']);
print('\nCorrelation between Success and Path Time Limit: ' + str(corr_))
print('\nCorrelation between Avg D and Path Time Limit: ' + str(BS_path_stats['Avg D (+)'].corr(BS_path_stats['Limit'])))
print('\nCorrelation between Avg D and Success: ' + str(BS_path_stats['Avg D (+)'].corr(BS_path_stats['+ Rate'])))
print('\nCorrelation between Avg S and Success: ' + str(BS_path_stats['Avg S (+)'].corr(BS_path_stats['+ Rate'])))
print('\nCorrelation between Avg S and PathNum: ' + str(BS_path_stats['Avg S (+)'].corr(BS_path_stats['PathNum'])))
print('\nCorrelation between Success and Average D after failure: ' + str(BS_path_stats['+ Rate'].corr(BS_path_stats['Avg D (after -)'])))
print('\nCorrelation between Avg and Euc Distance between buildings: ' + str(BS_path_stats['Avg D (+)'].corr(BS_path_stats['PathEucDist'])))
print('\nCorrelation between Path Time Limit and Euc Distance between buildings: ' + str(BS_path_stats['Limit'].corr(BS_path_stats['PathEucDist'])))


BS_path_stats

Condition B:
	Number of subjects: 14
	Avg Successes: 6.214285714285714
	SD Successes: 1.5281246137553164

Correlation between Success and Path Time Limit: 0.2128210725066101

Correlation between Avg D and Path Time Limit: 0.8993478532672612

Correlation between Avg D and Success: 0.05690320834427762

Correlation between Avg S and Success: -0.21544193356062857

Correlation between Avg S and PathNum: 0.275464971436879

Correlation between Success and Average D after failure: -0.252978105773198

Correlation between Avg and Euc Distance between buildings: 0.42001625000554343

Correlation between Path Time Limit and Euc Distance between buildings: 0.49746557586592344


Unnamed: 0,PathNum,Target,+ Rate,Limit,Avg T (+),SD T (+),Avg T (after -),SD T (after -),Avg D (+),SD D (+),PathEucDist,Avg D (after -),SD D (after -),Avg S (+),SD S (+),Max S (+),Min S (+),Avg S (after -),SD S (after -)
0,1.0,gorilla,0.714286,332.0,146.107746,62.410763,153.37046,89.679451,663.505988,275.909481,263.871636,393.084864,306.422292,4.592152,0.371846,4.953673,3.642966,3.389322,2.156248
1,2.0,johny patisserie,0.642857,383.0,161.914168,85.031437,123.339809,46.85477,740.411634,413.917311,409.871979,596.319188,239.184907,4.570293,0.348531,4.920069,3.76602,4.817433,0.19244
2,3.0,foxes,0.571429,152.0,49.831135,3.748735,129.493369,57.54961,228.44646,8.489905,184.845682,608.782317,284.375564,4.601245,0.301619,4.934554,4.119062,4.693901,0.304671
3,4.0,banque strechnitz,0.928571,394.0,206.320719,97.617372,79.476636,,948.218003,485.278432,284.434748,390.528839,,4.530608,0.376079,4.942151,3.732818,4.913757,
4,5.0,no graffiti,0.0,268.0,,,102.886259,74.88821,,,298.542564,478.072324,364.095955,,,,,4.583992,0.42951
5,6.0,bear,0.714286,332.0,167.674285,58.569429,95.827939,12.060102,774.04209,257.125562,297.62545,473.418147,56.003872,4.64018,0.219616,4.917234,4.209214,4.944031,0.063657
6,7.0,shark,1.0,234.0,62.683658,51.683104,,,279.478977,229.227745,163.227708,,,4.500475,0.377363,4.853031,3.563823,,
7,8.0,book,0.642857,305.0,177.997099,78.36798,98.827377,25.200365,821.06934,363.898623,147.953846,472.445361,107.894133,4.597477,0.221969,4.920064,4.236526,4.811699,0.260669
8,9.0,windmill,1.0,300.0,150.240288,67.216606,,,704.234993,325.006458,460.585005,,,4.672957,0.185618,4.963771,4.342491,,


#### Single A vs Single B - Comparing Paths

<ul>
    <li> 
        Relatively high positive correlation between average distances covered on the same path (0.81)
        <ul>
            <li>
                Suggests that similar paths might have been taken in both directions
            </li>
            <li>
                Some paths have significant differences, such as Shark <> Book (A2B8), Foxes <> Strechnitz
            </li>
        </ul>
    </li>
    <li> 
        Relatively high positive correlation between average times taken on the same path (0.77)
        <ul>
            <li>
                Suggests that similar paths might have been taken in both directions
            </li>
        </ul>
    </li>
    <li> 
        moderate to high positive correlation between success rates for the same targets (0.71)
        <ul>
            <li>
                Suggests the same target may have been equally difficult to find regardless of the direction
            </li>
        </ul>
    </li>
</ul>

What does this mean though in terms of correlation's power to "explain variance"? Here it's just a similarity measure it seems

In [39]:
column_subset = ["PathNum", "Target", "+ Rate", "Avg T (+)", "SD T (+)", "Avg D (+)", "SD D (+)", "Avg S (+)", "SD S (+)"]
AS_path_stats_ren = AS_path_stats[column_subset].rename(columns=lambda n: n + " A");
BS_path_stats_ren = BS_path_stats[column_subset].rename(columns=lambda n: n + " B");

BS_path_stats_reversed = BS_path_stats_ren.reindex(index=BS_path_stats_ren.index[::-1])
BS_path_stats_reversed = BS_path_stats_reversed.reset_index(drop=True)
ASBS_joined = pd.concat([AS_path_stats_ren, BS_path_stats_reversed], axis=1)

new_columns = ['PathNum A', 'Target A',  '+ Rate A', 'Target B', '+ Rate B', 'Avg D (+) A', 'SD D (+) A', \
               'Avg D (+) B', 'SD D (+) B',  'Avg T (+) A', 'SD T (+) A', 'Avg T (+) B', \
               'SD T (+) B', 'Avg S (+) A', 'SD S (+) A', 'Avg S (+) B', 'SD S (+) B']

ASBS = ASBS_joined.reindex(sorted(ASBS_joined.columns), axis=1)[new_columns]


print('\nCorrelation between Avg Ds: ' + str(ASBS['Avg D (+) A'].corr(ASBS['Avg D (+) B'])))
print('\nCorrelation between Avg Ts: ' + str(ASBS['Avg T (+) A'].corr(ASBS['Avg T (+) B'])))

ASBS




Correlation between Avg Ds: 0.8113336495301381

Correlation between Avg Ts: 0.775533358923393


Unnamed: 0,PathNum A,Target A,+ Rate A,Target B,+ Rate B,Avg D (+) A,SD D (+) A,Avg D (+) B,SD D (+) B,Avg T (+) A,SD T (+) A,Avg T (+) B,SD T (+) B,Avg S (+) A,SD S (+) A,Avg S (+) B,SD S (+) B
0,1.0,book,0.583333,windmill,1.0,693.41046,298.965982,704.234993,325.006458,161.914439,54.236144,150.240288,67.216606,4.247128,0.68952,4.672957,0.185618
1,2.0,shark,0.916667,book,0.642857,444.105754,190.533413,821.06934,363.898623,103.495294,41.733701,177.997099,78.36798,4.321187,0.585604,4.597477,0.221969
2,3.0,bear,0.833333,shark,1.0,230.757404,162.8521,279.478977,229.227745,52.072322,35.897732,62.683658,51.683104,4.446598,0.41387,4.500475,0.377363
3,4.0,no graffiti,0.166667,bear,0.714286,803.89413,264.803545,774.04209,257.125562,196.542249,58.004837,167.674285,58.569429,4.068556,0.146572,4.64018,0.219616
4,5.0,banque strechnitz,0.5,no graffiti,0.0,353.295889,12.240751,,,79.653422,9.573732,,,4.483926,0.499164,,
5,6.0,foxes,0.833333,banque strechnitz,0.928571,681.017139,293.940689,948.218003,485.278432,155.129403,71.633193,206.320719,97.617372,4.452967,0.416271,4.530608,0.376079
6,7.0,johny patisserie,0.75,foxes,0.571429,224.525669,13.613748,228.44646,8.489905,57.555982,15.655423,49.831135,3.748735,4.101904,0.850133,4.601245,0.301619
7,8.0,gorilla,0.833333,johny patisserie,0.642857,739.239353,303.403762,740.411634,413.917311,159.703784,63.684079,161.914168,85.031437,4.641856,0.3257,4.570293,0.348531
8,9.0,tailor,0.25,gorilla,0.714286,466.609018,77.798523,663.505988,275.909481,98.339506,17.564382,146.107746,62.410763,4.751275,0.102767,4.592152,0.371846


#### Single A vs Single B - Comparing Targets

In [40]:
column_subset = ["PathNum", "Target", "+ Rate", "Avg T (+)", "SD T (+)", "Avg D (+)", "SD D (+)", "Avg S (+)", "SD S (+)"]
AS_path_stats_ren = AS_path_stats[column_subset].rename(columns=lambda n: n + " A");
AS_new = pd.DataFrame(index=range(1), columns=AS_path_stats_ren.columns).append(AS_path_stats_ren).reset_index(drop=True);
BS_path_stats_ren = BS_path_stats[column_subset].rename(columns=lambda n: n + " B");
BS_new = pd.DataFrame(index=range(1), columns=BS_path_stats_ren.columns).append(BS_path_stats_ren).reset_index(drop=True);

BS_path_stats_reversed = BS_new.reindex(index=BS_new.index[::-1])
BS_path_stats_reversed = BS_path_stats_reversed.reset_index(drop=True)
ASBS_joined = pd.concat([AS_new, BS_path_stats_reversed], axis=1)

new_columns = ['PathNum A', 'Target A',  '+ Rate A', 'Target B', '+ Rate B', 'Avg D (+) A', 'SD D (+) A', \
               'Avg D (+) B', 'SD D (+) B',  'Avg T (+) A', 'SD T (+) A', 'Avg T (+) B', \
               'SD T (+) B', 'Avg S (+) A', 'SD S (+) A', 'Avg S (+) B', 'SD S (+) B']

ASBS_targ = ASBS_joined.reindex(sorted(ASBS_joined.columns), axis=1)[new_columns]


print('\nCorrelation between Success Rates: ' + str(ASBS_targ['+ Rate A'].corr(ASBS_targ['+ Rate B'])))


ASBS_targ





Correlation between Success Rates: 0.7095798077055908


Unnamed: 0,PathNum A,Target A,+ Rate A,Target B,+ Rate B,Avg D (+) A,SD D (+) A,Avg D (+) B,SD D (+) B,Avg T (+) A,SD T (+) A,Avg T (+) B,SD T (+) B,Avg S (+) A,SD S (+) A,Avg S (+) B,SD S (+) B
0,,,,windmill,1.0,,,704.234993,325.006458,,,150.240288,67.216606,,,4.672957,0.185618
1,1.0,book,0.583333,book,0.642857,693.41046,298.965982,821.06934,363.898623,161.914439,54.236144,177.997099,78.36798,4.247128,0.68952,4.597477,0.221969
2,2.0,shark,0.916667,shark,1.0,444.105754,190.533413,279.478977,229.227745,103.495294,41.733701,62.683658,51.683104,4.321187,0.585604,4.500475,0.377363
3,3.0,bear,0.833333,bear,0.714286,230.757404,162.8521,774.04209,257.125562,52.072322,35.897732,167.674285,58.569429,4.446598,0.41387,4.64018,0.219616
4,4.0,no graffiti,0.166667,no graffiti,0.0,803.89413,264.803545,,,196.542249,58.004837,,,4.068556,0.146572,,
5,5.0,banque strechnitz,0.5,banque strechnitz,0.928571,353.295889,12.240751,948.218003,485.278432,79.653422,9.573732,206.320719,97.617372,4.483926,0.499164,4.530608,0.376079
6,6.0,foxes,0.833333,foxes,0.571429,681.017139,293.940689,228.44646,8.489905,155.129403,71.633193,49.831135,3.748735,4.452967,0.416271,4.601245,0.301619
7,7.0,johny patisserie,0.75,johny patisserie,0.642857,224.525669,13.613748,740.411634,413.917311,57.555982,15.655423,161.914168,85.031437,4.101904,0.850133,4.570293,0.348531
8,8.0,gorilla,0.833333,gorilla,0.714286,739.239353,303.403762,663.505988,275.909481,159.703784,63.684079,146.107746,62.410763,4.641856,0.3257,4.592152,0.371846
9,9.0,tailor,0.25,,,466.609018,77.798523,,,98.339506,17.564382,,,4.751275,0.102767,,


In [41]:
from scipy import stats

# Paired t-test to see whether mean difference between average time and distance for each path is zero

D_a = np.array(ASBS['Avg D (+) A'])
D_b = np.array(ASBS['Avg D (+) B'])
T_a = np.array(ASBS['Avg T (+) A'])
T_b = np.array(ASBS['Avg T (+) B'])
S_a = np.array(ASBS_targ['+ Rate A'])
S_b = np.array(ASBS_targ['+ Rate B'])

s_d, p_d = stats.ttest_rel(D_a, D_b, nan_policy='omit')
s_t, p_t = stats.ttest_rel(T_a, T_b, nan_policy='omit')
s_s, p_s = stats.ttest_rel(S_a, S_b, nan_policy='omit')
print("P-value for average distances: {0:0.5f}".format(p_d))
print("P-value for average times: {0:0.5f}".format(p_t))
print("P-value for average success rate: {0:0.5f}".format(p_s))

P-value for average distances: 0.07957
P-value for average times: 0.22068
P-value for average success rate: 0.74964


<img src="./Pics/paths.png">


#### Path Difficulties per Condition by Success Rate

In [13]:
# Path difficulties on Average

def pretty_print_diffs(df, col):
    
    for idx, row in df.iterrows():
        print("\t{0:0.0f} \t {1:0.3f} \t {2}".format(row['PathNum'], row[col], row['Target']))
    print('\n')

def show_tops_and_bottoms(df, col, str_):
    top = df.sort_values(by=col, ascending=True)[["PathNum", col, "Target"]][0:3]
    bot = df.sort_values(by=col, ascending=False)[["PathNum", col, "Target"]][0:3]
    
    print(str_)
    print("\tTop: ")
    pretty_print_diffs(top, col);
    print("\tBottom: ")
    pretty_print_diffs(bot, col);
    
show_tops_and_bottoms(AS_path_stats, "+ Rate", "Difficulty in Single A:")
show_tops_and_bottoms(BS_path_stats, "+ Rate", "Difficulty in Single B:")

#show_tops_and_bottoms(AS_path_stats, "Avg D (+)", "Distance in Single A:")
#show_tops_and_bottoms(BS_path_stats, "Avg D (+)", "Distance in Single B:")

#show_tops_and_bottoms(AC_path_stats, "+ Rate", "Difficulty in SingleC A:")
#show_tops_and_bottoms(BC_path_stats, "+ Rate", "Difficulty in SingleC B:")


Difficulty in Single A:
	Top: 
	4 	 0.167 	 no graffiti
	9 	 0.250 	 tailor
	5 	 0.500 	 banque strechnitz


	Bottom: 
	2 	 0.917 	 shark
	3 	 0.833 	 bear
	6 	 0.833 	 foxes


Difficulty in Single B:
	Top: 
	5 	 0.000 	 no graffiti
	3 	 0.571 	 foxes
	2 	 0.643 	 johny patisserie


	Bottom: 
	7 	 1.000 	 shark
	9 	 1.000 	 windmill
	4 	 0.929 	 banque strechnitz




### Dyadic A vs. SingleC A

Investigating the effect of presence of follower on path performance

<ul>
    <li> 
        Low to moderate negative correlation between path number and average speed (-0.49)
        <ul>
            <li>
                Some suggestion that participants were slowing down over time, possibly due to tiredness
            </li>
            <li>
                Analysis of questionnaire answers might be helpful here
            </li>
        </ul>
    </li>
    <li> 
        Higher success rate correlates with distance from the target: (0.40)
        <ul>
            <li>
                Suggests that easier paths lead to worse failures?
            </li>
        </ul>
    </li>
    <li> 
        SingleC A shows negative correlation between avg. distance covered and success: (-0.12)
        <ul>
            <li>
                Suggestion that targets that required more walking on average were more difficult
            </li>
        </ul>
    </li>
</ul>

#### SingleC A Stats

In [19]:
print('Condition Control A:')
df = AC_path_stats
print("\tNumber of subjects: " + str(AC_num_subs))
print('\tAvg Successes: ' + str(AC_succ_stats["avg"]));
print('\tSD Successes: ' + str(AC_succ_stats["std"]));

corr_ = df['+ Rate'].corr(df['Limit']);
print('\nCorrelation between Success and Path Time Limit: ' + str(corr_))
print('\nCorrelation between Avg D and Path Time Limit: ' + str(df['Avg D (+)'].corr(df['Limit'])))
print('\nCorrelation between Avg D and Success: ' + str(df['Avg D (+)'].corr(df['+ Rate'])))
print('\nCorrelation between Avg S and Success: ' + str(df['Avg S (+)'].corr(df['+ Rate'])))
print('\nCorrelation between Avg S and PathNum: ' + str(df['Avg S (+)'].corr(df['PathNum'])))
print('\nCorrelation between Success and Avg D after failure: ' + str(df['+ Rate'].corr(df['Avg D (after -)'])))
print('\nCorrelation between Avg D and Euc Distance between buildings: ' + str(df['Avg D (+)'].corr(df['PathEucDist'])))


# Descriptive statistics per path for condition A
df


Condition Control A:
	Number of subjects: 6
	Avg Successes: 6.666666666666667
	SD Successes: 1.0327955589886444

Correlation between Success and Path Time Limit: 0.22622350379529738

Correlation between Avg D and Path Time Limit: 0.6568293220982226

Correlation between Avg D and Success: -0.12422926448484124

Correlation between Avg S and Success: 0.13286302865717167

Correlation between Avg S and PathNum: -0.4964338677610769

Correlation between Success and Avg D after failure: 0.4026118879897676

Correlation between Avg D and Euc Distance between buildings: 0.4349476403582053


Unnamed: 0,PathNum,Target,+ Rate,Limit,Avg T (+),SD T (+),Avg T (after -),SD T (after -),Avg D (+),SD D (+),PathEucDist,Avg D (after -),SD D (after -),Avg S (+),SD S (+),Max S (+),Min S (+),Avg S (after -),SD S (after -)
0,1.0,book,0.833333,300.0,115.903172,16.168288,52.014228,,543.728147,76.091431,460.585005,255.775648,,4.694451,0.190466,4.944742,4.44439,4.917417,
1,2.0,shark,1.0,305.0,137.367246,103.71476,,,643.02887,501.789916,147.953846,,,4.621776,0.271291,4.897546,4.233887,,
2,3.0,bear,0.666667,234.0,41.075203,8.961059,75.797261,0.011193,193.66742,47.190497,163.227708,375.31508,0.390817,4.701002,0.187117,4.862755,4.523539,4.951566,0.005887
3,4.0,no graffiti,0.666667,332.0,145.089852,17.997202,117.1709,46.538083,683.775682,115.618481,297.62545,553.589818,205.766163,4.696297,0.306848,4.920914,4.244751,4.750597,0.130728
4,5.0,banque strechnitz,0.333333,268.0,195.090066,6.402295,59.302238,43.244797,906.953039,27.648354,298.542564,291.288707,209.310055,4.653725,0.294443,4.861928,4.445523,4.969174,0.147021
5,6.0,foxes,1.0,394.0,152.276301,84.136355,,,725.653223,405.674905,284.434748,,,4.752809,0.122419,4.948621,4.57437,,
6,7.0,johny patisserie,0.833333,152.0,57.851791,15.430754,223.047942,,249.594794,31.093436,184.845682,1112.474763,,4.42286,0.53066,4.950977,3.598301,4.987604,
7,8.0,gorilla,1.0,383.0,148.804567,38.679864,,,686.439819,177.152249,409.871979,,,4.623084,0.2414,4.963338,4.275355,,
8,9.0,tailor,0.333333,332.0,125.546409,35.386571,95.640875,54.539251,568.667095,143.749496,263.871636,468.719909,266.357047,4.548866,0.137155,4.645849,4.451883,4.935574,0.103693


#### Dyadic A Stats

<ul>
    <li> 
        Higher number of average successes compared to Control A (8.0 compared to 6.67) (4 and 6 subjects respectively)
        <ul>
            <li>
                Presence of follower helped with performance? (or is that not significant)
            </li>
            <li>
                What else could have affected this?
            </li>
        </ul>
    </li>
    <li> 
        Low to moderate positive correlation between path number and average speed (0.48)
        <ul>
            <li>
                Participants were speeding up over time?
            </li>
            <li>
                Analysis of questionnaire answers might be helpful here
            </li>
        </ul>
    </li>
    <li> 
        Higher success rate correlates with distance from the target: (0.51)
        <ul>
            <li>
                Suggests that easier paths lead to worse failures?
            </li>
        </ul>
    </li>
    <li> 
        Negative correlation between avg. distance covered and success: (-0.37)
        <ul>
            <li>
                Suggestion that targets that required more walking on average were more difficult
            </li>
        </ul>
    </li>
    <li> 
        Strong negative correlation between avg. Speed and Success covered and success: (-0.72)
        <ul>
            <li>
                With the follower present, going slower might have helped on successes
            </li>
        </ul>
    </li>
</ul>

In [20]:
print('Condition Dyadic A:')
df = AD_path_stats
print("\tNumber of subjects: " + str(AD_num_subs))
print('\tAvg Successes: ' + str(AD_succ_stats["avg"]));
print('\tSD Successes: ' + str(AD_succ_stats["std"]));

corr_ = df['+ Rate'].corr(df['Limit']);
print('\nCorrelation between Success and Path Time Limit: ' + str(corr_))
print('\nCorrelation between Avg D and Path Time Limit: ' + str(df['Avg D (+)'].corr(df['Limit'])))
print('\nCorrelation between Avg D and Success: ' + str(df['Avg D (+)'].corr(df['+ Rate'])))
print('\nCorrelation between Avg S and Success: ' + str(df['Avg S (+)'].corr(df['+ Rate'])))
print('\nCorrelation between Avg S and PathNum: ' + str(df['Avg S (+)'].corr(df['PathNum'])))
print('\nCorrelation between Success and Avg D after failure: ' + str(df['+ Rate'].corr(df['Avg D (after -)'])))
print('\nCorrelation between Avg D and Euc Distance between buildings: ' + str(df['Avg D (+)'].corr(df['PathEucDist'])))


# Descriptive statistics per path for condition A
df


Condition Dyadic A:
	Number of subjects: 4
	Avg Successes: 8.0
	SD Successes: 0.816496580927726

Correlation between Success and Path Time Limit: -0.288800814806714

Correlation between Avg D and Path Time Limit: 0.7666936088311468

Correlation between Avg D and Success: -0.3723152079559006

Correlation between Avg S and Success: -0.7237510143721457

Correlation between Avg S and PathNum: 0.4846803327425519

Correlation between Success and Avg D after failure: 0.5101236102533833

Correlation between Avg D and Euc Distance between buildings: 0.6132597786571402


Unnamed: 0,PathNum,Target,+ Rate,Limit,Avg T (+),SD T (+),Avg T (after -),SD T (after -),Avg D (+),SD D (+),PathEucDist,Avg D (after -),SD D (after -),Avg S (+),SD S (+),Max S (+),Min S (+),Avg S (after -),SD S (after -)
0,1.0,book,1.0,300.0,176.161842,65.680114,,,802.674697,306.1832,460.585005,,,4.572119,0.332355,4.933549,4.152427,,
1,2.0,shark,1.0,305.0,98.244219,37.557791,,,458.461711,183.501587,147.953846,,,4.638136,0.151208,4.855884,4.520017,,
2,3.0,bear,1.0,234.0,52.923102,34.054835,,,242.568942,141.992764,163.227708,,,4.681004,0.227742,4.934125,4.380342,,
3,4.0,no graffiti,0.5,332.0,210.459869,122.152931,85.646875,91.338685,1034.320024,612.367175,297.62545,406.70518,447.708466,4.89461,0.068784,4.943247,4.845972,4.546913,0.378291
4,5.0,banque strechnitz,0.75,268.0,73.856586,1.508718,83.41419,,355.416099,5.027275,298.542564,416.470318,,4.813532,0.117078,4.948537,4.739907,4.992799,
5,6.0,foxes,0.75,394.0,151.696835,76.58963,226.390118,,733.680457,374.213586,284.434748,1132.531338,,4.829968,0.030382,4.85836,4.797926,5.002565,
6,7.0,johny patisserie,1.0,152.0,50.030412,1.47809,,,233.808928,7.264667,184.845682,,,4.673497,0.060357,4.733554,4.612049,,
7,8.0,gorilla,1.0,383.0,169.169942,109.34352,,,822.775377,535.357264,409.871979,,,4.854687,0.080829,4.946421,4.752255,,
8,9.0,tailor,1.0,332.0,194.411066,93.782513,,,934.776933,489.047594,263.871636,,,4.722374,0.255499,4.940084,4.405828,,


#### Dyadic A vs. SingleC A - Comparing Paths and Targets

<ul>
    <li> 
        Low correlation between success rates for the same target (0.1 and 0.212)
        <ul>
            <li>
                Not very much expected to be the same, rather should compare between Single and Control
            </li>
        </ul>
   </li>
    <li>
        Slightly better success rate on average for Dyadic on 5 paths, same on 2, worse on 2
        <ul>
            <li>
                Mean difference of -0.148, standard deviation of 0.29
            </li>
        </ul>         
    </li>
    <li>
        Slightly higher speed on average for Dyadic on 7 paths, lower on 2
        <ul>
            <li>
                Mean difference of -0.107, standard deviation of 0.127
            </li>
        </ul> 
    </li>
    <li>
        Higher distance on average for Dyadic on 7 paths, lower on 2
        <ul>
            <li>
                Mean difference of -46, standard deviation of 288.14
            </li>
            <li>
                Longer paths taken on average in Dyadic?
            </li>
        </ul> 
    </li>
    <li>
        Don't know how significant these are with so few subjects
    </li>
    <li>
        Correlations mean data vary similarly, doesn't have anything to actually do with the similarity of data
    </li>
</ul>

In [16]:
column_subset = ["PathNum", "Target", "+ Rate", "Avg T (+)", "SD T (+)", "Avg D (+)", "SD D (+)", "Avg S (+)", "SD S (+)"]
AC_path_stats_ren = AC_path_stats[column_subset].rename(columns=lambda n: n + " A");
AD_path_stats_ren = AD_path_stats[column_subset].rename(columns=lambda n: n + " B");

ASBS_joined = pd.concat([AC_path_stats_ren, AD_path_stats_ren], axis=1)

new_columns = ['PathNum A', 'Target A',  '+ Rate A', 'Target B', '+ Rate B', 'Avg D (+) A', 'SD D (+) A', \
               'Avg D (+) B', 'SD D (+) B',  'Avg T (+) A', 'SD T (+) A', 'Avg T (+) B', \
               'SD T (+) B', 'Avg S (+) A', 'SD S (+) A', 'Avg S (+) B', 'SD S (+) B']

ASBS = ASBS_joined.reindex(sorted(ASBS_joined.columns), axis=1)[new_columns]


print('\nCorrelation between Success Rates: ' + str(ASBS['+ Rate A'].corr(ASBS['+ Rate B'])))
print('\nCorrelation between Avg Ds: ' + str(ASBS['Avg D (+) A'].corr(ASBS['Avg D (+) B'])))
print('\nCorrelation between Avg Ts: ' + str(ASBS['Avg T (+) A'].corr(ASBS['Avg T (+) B'])))


(ASBS['Avg S (+) A'] - ASBS['Avg S (+) B']) * 100 / 4.5 # scale with relation to max and min speed

#(ASBS['+ Rate A'] - ASBS['+ Rate B']).std()

(ASBS['Avg D (+) A'] - ASBS['Avg D (+) B']) # scale with average and standard deviation

#ASBS




Correlation between Success Rates: 0.19239442809755278

Correlation between Avg Ds: 0.44588883127710716

Correlation between Avg Ts: 0.4282119785175122


0   -258.946551
1    184.567159
2    -48.901522
3   -350.544342
4    551.536940
5     -8.027234
6     15.785866
7   -136.335558
8   -366.109839
dtype: float64

### Dyadic B vs. SingleC B

Investigating the effect of presence of follower on path performance

<ul>
    <li> 
        Low to moderate negative correlation between path number and average speed (-0.49)
        <ul>
            <li>
                Some suggestion that participants were slowing down over time, possibly due to tiredness
            </li>
            <li>
                Analysis of questionnaire answers might be helpful here
            </li>
        </ul>
    </li>
    <li> 
        Higher success rate correlates with distance from the target: (0.40)
        <ul>
            <li>
                Suggests that easier paths lead to worse failures?
            </li>
        </ul>
    </li>
    <li> 
        SingleC A shows negative correlation between avg. distance covered and success: (-0.12)
        <ul>
            <li>
                Suggestion that targets that required more walking on average were more difficult
            </li>
        </ul>
    </li>
</ul>

#### SingleC B Stats

In [9]:
print('Condition Control A:')
df = BC_path_stats
print("\tNumber of subjects: " + str(BC_num_subs))
print('\tAvg Successes: ' + str(BC_succ_stats["avg"]));
print('\tSD Successes: ' + str(BC_succ_stats["std"]));

corr_ = df['+ Rate'].corr(df['Limit']);
print('\nCorrelation between Success and Path Time Limit: ' + str(corr_))
print('\nCorrelation between Avg D and Path Time Limit: ' + str(df['Avg D (+)'].corr(df['Limit'])))
print('\nCorrelation between Avg D and Success: ' + str(df['Avg D (+)'].corr(df['+ Rate'])))
print('\nCorrelation between Avg S and Success: ' + str(df['Avg S (+)'].corr(df['+ Rate'])))
print('\nCorrelation between Avg S and PathNum: ' + str(df['Avg S (+)'].corr(df['PathNum'])))
print('\nCorrelation between Success and Avg D after failure: ' + str(df['+ Rate'].corr(df['Avg D (after -)'])))
print('\nCorrelation between Avg D and Euc Distance between buildings: ' + str(df['Avg D (+)'].corr(df['PathEucDist'])))


# Descriptive statistics per path for condition A
df


Condition Control A:
	Number of subjects: 4
	Avg Successes: 8.0
	SD Successes: 1.1547005383792515

Correlation between Success and Path Time Limit: 0.20170215637294314

Correlation between Avg D and Path Time Limit: 0.6121613563267827

Correlation between Avg D and Success: 0.5020629163174875

Correlation between Avg S and Success: 0.0875403846917943

Correlation between Avg S and PathNum: -0.04629253088161311

Correlation between Success and Avg D after failure: 0.8540768210894227

Correlation between Avg D and Euc Distance between buildings: 0.8206832904983387


Unnamed: 0,PathNum,Target,+ Rate,Limit,Avg T (+),SD T (+),Avg T (after -),SD T (after -),Avg D (+),SD D (+),PathEucDist,Avg D (after -),SD D (after -),Avg S (+),SD S (+),Max S (+),Min S (+),Avg S (after -),SD S (after -)
0,1.0,gorilla,1.0,332.0,153.035682,94.878752,,,730.657623,456.386704,263.871636,,,4.763698,0.083207,4.83948,4.644971,,
1,2.0,johny patisserie,1.0,383.0,141.154249,62.85957,,,665.50393,277.387134,409.871979,,,4.752207,0.131022,4.882476,4.599541,,
2,3.0,foxes,1.0,152.0,48.626087,1.873098,,,233.062643,4.335299,184.845682,,,4.795756,0.101222,4.902145,4.658162,,
3,4.0,banque strechnitz,1.0,394.0,110.104158,24.92473,,,485.78756,81.32804,284.434748,,,4.463247,0.35064,4.816737,4.044215,,
4,5.0,no graffiti,0.75,268.0,116.582121,75.653676,96.68253,,551.751231,337.171079,298.542564,477.144781,,4.803572,0.163942,4.906795,4.614535,4.935171,
5,6.0,bear,1.0,332.0,110.224433,12.127923,,,537.42496,56.514821,297.62545,,,4.877903,0.057625,4.935224,4.807565,,
6,7.0,shark,0.75,234.0,85.265145,70.235353,76.71227,,368.97932,342.079493,163.227708,372.61183,,4.221457,1.016664,4.979312,3.066097,4.857265,
7,8.0,book,0.5,305.0,60.818483,2.741296,55.555065,21.894007,294.496823,13.841991,147.953846,276.231775,113.761455,4.842015,0.009349,4.848626,4.835404,4.953374,0.095621
8,9.0,windmill,1.0,300.0,150.057697,84.09762,,,715.909886,375.261735,460.585005,,,4.824648,0.146649,4.957452,4.621123,,


#### Dyadic B Stats

<ul>
    <li> 
        Higher number of average successes compared to Control A (8.0 compared to 6.67) (4 and 6 subjects respectively)
        <ul>
            <li>
                Presence of follower helped with performance? (or is that not significant)
            </li>
            <li>
                What else could have affected this?
            </li>
        </ul>
    </li>
    <li> 
        Low to moderate positive correlation between path number and average speed (0.48)
        <ul>
            <li>
                Participants were speeding up over time?
            </li>
            <li>
                Analysis of questionnaire answers might be helpful here
            </li>
        </ul>
    </li>
    <li> 
        Higher success rate correlates with distance from the target: (0.51)
        <ul>
            <li>
                Suggests that easier paths lead to worse failures?
            </li>
        </ul>
    </li>
    <li> 
        Negative correlation between avg. distance covered and success: (-0.37)
        <ul>
            <li>
                Suggestion that targets that required more walking on average were more difficult
            </li>
        </ul>
    </li>
    <li> 
        Strong negative correlation between avg. Speed and Success covered and success: (-0.72)
        <ul>
            <li>
                With the follower present, going slower might have helped on successes
            </li>
        </ul>
    </li>
</ul>

In [10]:
print('Condition Dyadic A:')
df = BD_path_stats
print("\tNumber of subjects: " + str(BD_num_subs))
print('\tAvg Successes: ' + str(BD_succ_stats["avg"]));
print('\tSD Successes: ' + str(BD_succ_stats["std"]));

corr_ = df['+ Rate'].corr(df['Limit']);
print('\nCorrelation between Success and Path Time Limit: ' + str(corr_))
print('\nCorrelation between Avg D and Path Time Limit: ' + str(df['Avg D (+)'].corr(df['Limit'])))
print('\nCorrelation between Avg D and Success: ' + str(df['Avg D (+)'].corr(df['+ Rate'])))
print('\nCorrelation between Avg S and Success: ' + str(df['Avg S (+)'].corr(df['+ Rate'])))
print('\nCorrelation between Avg S and PathNum: ' + str(df['Avg S (+)'].corr(df['PathNum'])))
print('\nCorrelation between Success and Avg D after failure: ' + str(df['+ Rate'].corr(df['Avg D (after -)'])))
print('\nCorrelation between Avg D and Euc Distance between buildings: ' + str(df['Avg D (+)'].corr(df['PathEucDist'])))


# Descriptive statistics per path for condition A
df


Condition Dyadic A:
	Number of subjects: 4
	Avg Successes: 8.75
	SD Successes: 0.5

Correlation between Success and Path Time Limit: 0.3296997436214615

Correlation between Avg D and Path Time Limit: 0.9288444726396695

Correlation between Avg D and Success: 0.5385270580081479

Correlation between Avg S and Success: -0.6988597751824265

Correlation between Avg S and PathNum: 0.5473150620182063

Correlation between Success and Avg D after failure: nan

Correlation between Avg D and Euc Distance between buildings: 0.5399646949027526


  c = cov(x, y, rowvar)
  c *= np.true_divide(1, fact)


Unnamed: 0,PathNum,Target,+ Rate,Limit,Avg T (+),SD T (+),Avg T (after -),SD T (after -),Avg D (+),SD D (+),PathEucDist,Avg D (after -),SD D (after -),Avg S (+),SD S (+),Max S (+),Min S (+),Avg S (after -),SD S (after -)
0,1.0,gorilla,1.0,332.0,126.49866,32.199796,,,514.460353,94.885692,263.871636,,,4.197688,0.859291,4.810577,2.929496,,
1,2.0,johny patisserie,1.0,383.0,141.862251,39.613139,,,601.409105,178.248435,409.871979,,,4.307676,0.802132,4.887077,3.120467,,
2,3.0,foxes,1.0,152.0,56.05566,15.612703,,,220.157685,13.26349,184.845682,,,4.096985,0.823508,4.657071,2.90474,,
3,4.0,banque strechnitz,1.0,394.0,170.793262,109.045638,,,715.6007,561.92894,284.434748,,,4.171159,1.1162,4.807597,2.50079,,
4,5.0,no graffiti,1.0,268.0,88.305936,30.00749,,,350.272692,5.071917,298.542564,,,4.248465,1.109956,4.960095,2.599573,,
5,6.0,bear,1.0,332.0,152.843303,51.425179,,,621.846816,149.930388,297.62545,,,4.255335,0.916022,4.883303,2.907698,,
6,7.0,shark,0.75,234.0,51.58991,27.855672,76.389404,,220.270903,89.759165,163.227708,304.558122,,4.455342,0.523774,4.878264,3.869459,3.986916,
7,8.0,book,1.0,305.0,109.441854,42.877062,,,473.817162,213.66309,147.953846,,,4.34207,0.843044,4.907199,3.093819,,
8,9.0,windmill,1.0,300.0,128.745411,37.446937,,,525.649536,84.198955,460.585005,,,4.282766,1.009878,4.940767,2.799098,,


### Dyadic A vs. Dyadic B - Effect of Direction

#### Dyadic A vs Dyadic B - Comparing Paths

In [42]:
column_subset = ["PathNum", "Target", "+ Rate", "Avg T (+)", "SD T (+)", "Avg D (+)", "SD D (+)", "Avg S (+)", "SD S (+)"]
AS_path_stats_ren = AD_path_stats[column_subset].rename(columns=lambda n: n + " A");
BS_path_stats_ren = BD_path_stats[column_subset].rename(columns=lambda n: n + " B");

BS_path_stats_reversed = BS_path_stats_ren.reindex(index=BS_path_stats_ren.index[::-1])
BS_path_stats_reversed = BS_path_stats_reversed.reset_index(drop=True)
ASBS_joined = pd.concat([AS_path_stats_ren, BS_path_stats_reversed], axis=1)

new_columns = ['PathNum A', 'Target A',  '+ Rate A', 'Target B', '+ Rate B', 'Avg D (+) A', 'SD D (+) A', \
               'Avg D (+) B', 'SD D (+) B',  'Avg T (+) A', 'SD T (+) A', 'Avg T (+) B', \
               'SD T (+) B', 'Avg S (+) A', 'SD S (+) A', 'Avg S (+) B', 'SD S (+) B']

ASBS = ASBS_joined.reindex(sorted(ASBS_joined.columns), axis=1)[new_columns]


print('\nCorrelation between Avg Ds: ' + str(ASBS['Avg D (+) A'].corr(ASBS['Avg D (+) B'])))
print('\nCorrelation between Avg Ts: ' + str(ASBS['Avg T (+) A'].corr(ASBS['Avg T (+) B'])))

ASBS




Correlation between Avg Ds: 0.8504605870554395

Correlation between Avg Ts: 0.8645901747582545


Unnamed: 0,PathNum A,Target A,+ Rate A,Target B,+ Rate B,Avg D (+) A,SD D (+) A,Avg D (+) B,SD D (+) B,Avg T (+) A,SD T (+) A,Avg T (+) B,SD T (+) B,Avg S (+) A,SD S (+) A,Avg S (+) B,SD S (+) B
0,1.0,book,1.0,windmill,1.0,802.674697,306.1832,525.649536,84.198955,176.161842,65.680114,128.745411,37.446937,4.572119,0.332355,4.282766,1.009878
1,2.0,shark,1.0,book,1.0,458.461711,183.501587,473.817162,213.66309,98.244219,37.557791,109.441854,42.877062,4.638136,0.151208,4.34207,0.843044
2,3.0,bear,1.0,shark,0.75,242.568942,141.992764,220.270903,89.759165,52.923102,34.054835,51.58991,27.855672,4.681004,0.227742,4.455342,0.523774
3,4.0,no graffiti,0.5,bear,1.0,1034.320024,612.367175,621.846816,149.930388,210.459869,122.152931,152.843303,51.425179,4.89461,0.068784,4.255335,0.916022
4,5.0,banque strechnitz,0.75,no graffiti,1.0,355.416099,5.027275,350.272692,5.071917,73.856586,1.508718,88.305936,30.00749,4.813532,0.117078,4.248465,1.109956
5,6.0,foxes,0.75,banque strechnitz,1.0,733.680457,374.213586,715.6007,561.92894,151.696835,76.58963,170.793262,109.045638,4.829968,0.030382,4.171159,1.1162
6,7.0,johny patisserie,1.0,foxes,1.0,233.808928,7.264667,220.157685,13.26349,50.030412,1.47809,56.05566,15.612703,4.673497,0.060357,4.096985,0.823508
7,8.0,gorilla,1.0,johny patisserie,1.0,822.775377,535.357264,601.409105,178.248435,169.169942,109.34352,141.862251,39.613139,4.854687,0.080829,4.307676,0.802132
8,9.0,tailor,1.0,gorilla,1.0,934.776933,489.047594,514.460353,94.885692,194.411066,93.782513,126.49866,32.199796,4.722374,0.255499,4.197688,0.859291


#### Dyadic A vs Dyadic B - Comparing Targets

In [43]:
column_subset = ["PathNum", "Target", "+ Rate", "Avg T (+)", "SD T (+)", "Avg D (+)", "SD D (+)", "Avg S (+)", "SD S (+)"]
AS_path_stats_ren = AD_path_stats[column_subset].rename(columns=lambda n: n + " A");
AS_new = pd.DataFrame(index=range(1), columns=AS_path_stats_ren.columns).append(AS_path_stats_ren).reset_index(drop=True);
BS_path_stats_ren = BD_path_stats[column_subset].rename(columns=lambda n: n + " B");
BS_new = pd.DataFrame(index=range(1), columns=BS_path_stats_ren.columns).append(BS_path_stats_ren).reset_index(drop=True);

BS_path_stats_reversed = BS_new.reindex(index=BS_new.index[::-1])
BS_path_stats_reversed = BS_path_stats_reversed.reset_index(drop=True)
ASBS_joined = pd.concat([AS_new, BS_path_stats_reversed], axis=1)

new_columns = ['PathNum A', 'Target A',  '+ Rate A', 'Target B', '+ Rate B', 'Avg D (+) A', 'SD D (+) A', \
               'Avg D (+) B', 'SD D (+) B',  'Avg T (+) A', 'SD T (+) A', 'Avg T (+) B', \
               'SD T (+) B', 'Avg S (+) A', 'SD S (+) A', 'Avg S (+) B', 'SD S (+) B']

ASBS_targ = ASBS_joined.reindex(sorted(ASBS_joined.columns), axis=1)[new_columns]


print('\nCorrelation between Success Rates: ' + str(ASBS_targ['+ Rate A'].corr(ASBS_targ['+ Rate B'])))


ASBS_targ





Correlation between Success Rates: -0.2672612419124244


Unnamed: 0,PathNum A,Target A,+ Rate A,Target B,+ Rate B,Avg D (+) A,SD D (+) A,Avg D (+) B,SD D (+) B,Avg T (+) A,SD T (+) A,Avg T (+) B,SD T (+) B,Avg S (+) A,SD S (+) A,Avg S (+) B,SD S (+) B
0,,,,windmill,1.0,,,525.649536,84.198955,,,128.745411,37.446937,,,4.282766,1.009878
1,1.0,book,1.0,book,1.0,802.674697,306.1832,473.817162,213.66309,176.161842,65.680114,109.441854,42.877062,4.572119,0.332355,4.34207,0.843044
2,2.0,shark,1.0,shark,0.75,458.461711,183.501587,220.270903,89.759165,98.244219,37.557791,51.58991,27.855672,4.638136,0.151208,4.455342,0.523774
3,3.0,bear,1.0,bear,1.0,242.568942,141.992764,621.846816,149.930388,52.923102,34.054835,152.843303,51.425179,4.681004,0.227742,4.255335,0.916022
4,4.0,no graffiti,0.5,no graffiti,1.0,1034.320024,612.367175,350.272692,5.071917,210.459869,122.152931,88.305936,30.00749,4.89461,0.068784,4.248465,1.109956
5,5.0,banque strechnitz,0.75,banque strechnitz,1.0,355.416099,5.027275,715.6007,561.92894,73.856586,1.508718,170.793262,109.045638,4.813532,0.117078,4.171159,1.1162
6,6.0,foxes,0.75,foxes,1.0,733.680457,374.213586,220.157685,13.26349,151.696835,76.58963,56.05566,15.612703,4.829968,0.030382,4.096985,0.823508
7,7.0,johny patisserie,1.0,johny patisserie,1.0,233.808928,7.264667,601.409105,178.248435,50.030412,1.47809,141.862251,39.613139,4.673497,0.060357,4.307676,0.802132
8,8.0,gorilla,1.0,gorilla,1.0,822.775377,535.357264,514.460353,94.885692,169.169942,109.34352,126.49866,32.199796,4.854687,0.080829,4.197688,0.859291
9,9.0,tailor,1.0,,,934.776933,489.047594,,,194.411066,93.782513,,,4.722374,0.255499,,


In [44]:
from scipy import stats

# Paired t-test to see whether mean difference between average time and distance for each path is zero

D_a = np.array(ASBS['Avg D (+) A'])
D_b = np.array(ASBS['Avg D (+) B'])
T_a = np.array(ASBS['Avg T (+) A'])
T_b = np.array(ASBS['Avg T (+) B'])
S_a = np.array(ASBS_targ['+ Rate A'])
S_b = np.array(ASBS_targ['+ Rate B'])

s_d, p_d = stats.ttest_rel(D_a, D_b, nan_policy='omit')
s_t, p_t = stats.ttest_rel(T_a, T_b, nan_policy='omit')
s_s, p_s = stats.ttest_rel(S_a, S_b, nan_policy='omit')
print("P-value for average distances: {0:0.5f}".format(p_d))
print("P-value for average times: {0:0.5f}".format(p_t))
print("P-value for average success rate: {0:0.5f}".format(p_s))

P-value for average distances: 0.03557
P-value for average times: 0.17515
P-value for average success rate: 0.28493


### SingleC A vs. SingleC B - Effect of Direction

#### SingleC A vs SingleC B - Comparing Paths

In [55]:
column_subset = ["PathNum", "Target", "+ Rate", "Avg T (+)", "SD T (+)", "Avg D (+)", "SD D (+)", "Avg S (+)", "SD S (+)"]
AS_path_stats_ren = AC_path_stats[column_subset].rename(columns=lambda n: n + " A");
BS_path_stats_ren = BC_path_stats[column_subset].rename(columns=lambda n: n + " B");

BS_path_stats_reversed = BS_path_stats_ren.reindex(index=BS_path_stats_ren.index[::-1])
BS_path_stats_reversed = BS_path_stats_reversed.reset_index(drop=True)
ASBS_joined = pd.concat([AS_path_stats_ren, BS_path_stats_reversed], axis=1)

new_columns = ['PathNum A', 'Target A',  '+ Rate A', 'Target B', '+ Rate B', 'Avg D (+) A', 'SD D (+) A', \
               'Avg D (+) B', 'SD D (+) B',  'Avg T (+) A', 'SD T (+) A', 'Avg T (+) B', \
               'SD T (+) B', 'Avg S (+) A', 'SD S (+) A', 'Avg S (+) B', 'SD S (+) B']

ASBS = ASBS_joined.reindex(sorted(ASBS_joined.columns), axis=1)[new_columns]


print('\nCorrelation between Avg Ds: ' + str(ASBS['Avg D (+) A'].corr(ASBS['Avg D (+) B'])))
print('\nCorrelation between Avg Ts: ' + str(ASBS['Avg T (+) A'].corr(ASBS['Avg T (+) B'])))

ASBS




Correlation between Avg Ds: 0.46188615015516477

Correlation between Avg Ts: 0.44816832450594185


Unnamed: 0,PathNum A,Target A,+ Rate A,Target B,+ Rate B,Avg D (+) A,SD D (+) A,Avg D (+) B,SD D (+) B,Avg T (+) A,SD T (+) A,Avg T (+) B,SD T (+) B,Avg S (+) A,SD S (+) A,Avg S (+) B,SD S (+) B
0,1.0,book,0.833333,windmill,1.0,543.728147,76.091431,715.909886,375.261735,115.903172,16.168288,150.057697,84.09762,4.694451,0.190466,4.824648,0.146649
1,2.0,shark,1.0,book,0.5,643.02887,501.789916,294.496823,13.841991,137.367246,103.71476,60.818483,2.741296,4.621776,0.271291,4.842015,0.009349
2,3.0,bear,0.666667,shark,0.75,193.66742,47.190497,368.97932,342.079493,41.075203,8.961059,85.265145,70.235353,4.701002,0.187117,4.221457,1.016664
3,4.0,no graffiti,0.666667,bear,1.0,683.775682,115.618481,537.42496,56.514821,145.089852,17.997202,110.224433,12.127923,4.696297,0.306848,4.877903,0.057625
4,5.0,banque strechnitz,0.333333,no graffiti,0.75,906.953039,27.648354,551.751231,337.171079,195.090066,6.402295,116.582121,75.653676,4.653725,0.294443,4.803572,0.163942
5,6.0,foxes,1.0,banque strechnitz,1.0,725.653223,405.674905,485.78756,81.32804,152.276301,84.136355,110.104158,24.92473,4.752809,0.122419,4.463247,0.35064
6,7.0,johny patisserie,0.833333,foxes,1.0,249.594794,31.093436,233.062643,4.335299,57.851791,15.430754,48.626087,1.873098,4.42286,0.53066,4.795756,0.101222
7,8.0,gorilla,1.0,johny patisserie,1.0,686.439819,177.152249,665.50393,277.387134,148.804567,38.679864,141.154249,62.85957,4.623084,0.2414,4.752207,0.131022
8,9.0,tailor,0.333333,gorilla,1.0,568.667095,143.749496,730.657623,456.386704,125.546409,35.386571,153.035682,94.878752,4.548866,0.137155,4.763698,0.083207


#### SingleC A vs SingleC B - Comparing Targets

In [56]:
column_subset = ["PathNum", "Target", "+ Rate", "Avg T (+)", "SD T (+)", "Avg D (+)", "SD D (+)", "Avg S (+)", "SD S (+)"]
AS_path_stats_ren = AC_path_stats[column_subset].rename(columns=lambda n: n + " A");
AS_new = pd.DataFrame(index=range(1), columns=AS_path_stats_ren.columns).append(AS_path_stats_ren).reset_index(drop=True);
BS_path_stats_ren = BC_path_stats[column_subset].rename(columns=lambda n: n + " B");
BS_new = pd.DataFrame(index=range(1), columns=BS_path_stats_ren.columns).append(BS_path_stats_ren).reset_index(drop=True);

BS_path_stats_reversed = BS_new.reindex(index=BS_new.index[::-1])
BS_path_stats_reversed = BS_path_stats_reversed.reset_index(drop=True)
ASBS_joined = pd.concat([AS_new, BS_path_stats_reversed], axis=1)

new_columns = ['PathNum A', 'Target A',  '+ Rate A', 'Target B', '+ Rate B', 'Avg D (+) A', 'SD D (+) A', \
               'Avg D (+) B', 'SD D (+) B',  'Avg T (+) A', 'SD T (+) A', 'Avg T (+) B', \
               'SD T (+) B', 'Avg S (+) A', 'SD S (+) A', 'Avg S (+) B', 'SD S (+) B']

ASBS_targ = ASBS_joined.reindex(sorted(ASBS_joined.columns), axis=1)[new_columns]


print('\nCorrelation between Success Rates: ' + str(ASBS_targ['+ Rate A'].corr(ASBS_targ['+ Rate B'])))


ASBS_targ





Correlation between Success Rates: -0.13608276348795437


Unnamed: 0,PathNum A,Target A,+ Rate A,Target B,+ Rate B,Avg D (+) A,SD D (+) A,Avg D (+) B,SD D (+) B,Avg T (+) A,SD T (+) A,Avg T (+) B,SD T (+) B,Avg S (+) A,SD S (+) A,Avg S (+) B,SD S (+) B
0,,,,windmill,1.0,,,715.909886,375.261735,,,150.057697,84.09762,,,4.824648,0.146649
1,1.0,book,0.833333,book,0.5,543.728147,76.091431,294.496823,13.841991,115.903172,16.168288,60.818483,2.741296,4.694451,0.190466,4.842015,0.009349
2,2.0,shark,1.0,shark,0.75,643.02887,501.789916,368.97932,342.079493,137.367246,103.71476,85.265145,70.235353,4.621776,0.271291,4.221457,1.016664
3,3.0,bear,0.666667,bear,1.0,193.66742,47.190497,537.42496,56.514821,41.075203,8.961059,110.224433,12.127923,4.701002,0.187117,4.877903,0.057625
4,4.0,no graffiti,0.666667,no graffiti,0.75,683.775682,115.618481,551.751231,337.171079,145.089852,17.997202,116.582121,75.653676,4.696297,0.306848,4.803572,0.163942
5,5.0,banque strechnitz,0.333333,banque strechnitz,1.0,906.953039,27.648354,485.78756,81.32804,195.090066,6.402295,110.104158,24.92473,4.653725,0.294443,4.463247,0.35064
6,6.0,foxes,1.0,foxes,1.0,725.653223,405.674905,233.062643,4.335299,152.276301,84.136355,48.626087,1.873098,4.752809,0.122419,4.795756,0.101222
7,7.0,johny patisserie,0.833333,johny patisserie,1.0,249.594794,31.093436,665.50393,277.387134,57.851791,15.430754,141.154249,62.85957,4.42286,0.53066,4.752207,0.131022
8,8.0,gorilla,1.0,gorilla,1.0,686.439819,177.152249,730.657623,456.386704,148.804567,38.679864,153.035682,94.878752,4.623084,0.2414,4.763698,0.083207
9,9.0,tailor,0.333333,,,568.667095,143.749496,,,125.546409,35.386571,,,4.548866,0.137155,,


In [63]:
from scipy import stats

# Paired t-test to see whether mean difference between average time and distance for each path is zero

D_a = np.flip(np.array(ASBS['Avg D (+) A']))
D_b = np.array(ASBS['Avg D (+) B'])
T_a = np.array(ASBS['Avg T (+) A'])
T_b = np.array(ASBS['Avg T (+) B'])
S_a = np.array(ASBS_targ['+ Rate A'])
S_b = np.array(ASBS_targ['+ Rate B'])

s_d, p_d = stats.ttest_rel(D_a, D_b, nan_policy='omit')
s_t, p_t = stats.ttest_rel(T_a, T_b, nan_policy='omit')
s_s, p_s = stats.ttest_rel(S_a, S_b, nan_policy='omit')
print("P-value for average distances: {0:0.5f}".format(p_d))
print("P-value for average times: {0:0.5f}".format(p_t))
print("P-value for average success rate: {0:0.5f}".format(p_s))

P-value for average distances: 0.37520
P-value for average times: 0.32769
P-value for average success rate: 0.48283


In [121]:
# Averages for each category and standard deviation
# Look for outliers
# Average speed? - Distance over time - Compare to final performance
#    for individual paths and for participants overall

# TODO - think about the relevance of individual analysis and path analysis
# TODO - time before guide appeared for success - individual subject and for each path?
# TODO - same performance for dyadic, stare at statistics and generate some observations

Other possible path analysis to apply:
- Look at the heatmap of movement for each path in either conditions. See if there can be identified a most used route, compare perhaps to the shortest route, and see if the most used route differs in either condition.

<img src='Pics/paths.png'>