# Supplementary analysis

This code compares on-ride self-reports with post-ride self-reports to:
1. compute the number of mismatched responses and their percentage
2. calculate the average on-ride and post-ride ratings for each participant
3. examine the (0 = busy route / Hengelosestraat, 
   1 = peaceful route / F35) and participants' on-ride self-reported valence.


In [4]:
import numpy as np
import pandas as pd
from scipy.stats import pearsonr


## Number and percentage of mismatched responses

In [5]:
# List of participant IDs
PARTICIPANTS = ['P01', 'P02', 'P03', 'P04', 'P05', 
                'P06', 'P07', 'P08', 'P09', 'P10', 
                'P11', 'P12', 'P13', 'P14', 'P15', 
                'P16', 'P17']

# ROUTES dictionary maps each participant to an array of 0s (busy route) or 1s (peaceful route).
# Each index corresponds to the route type reported at a particular time or prompt.
ROUTES = {
    'P01': [0, 0, 0, 0, 1, 1, 1, 1, 1],
    'P02': [0, 0, 0, 0, 0, 1, 1, 1, 1, 1],
    'P03': [0, 0, 0, 0, 1, 1, 1, 1, 1],
    'P04': [1, 1, 1, 1, 0, 0, 0, 0, 0],
    'P05': [1, 1, 1, 1, 1, 0, 0, 0, 0],
    'P06': [0, 0, 0, 0, 0, 1, 1, 1, 1],
    'P07': [1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0],
    'P08': [1, 1, 1, 1, 0, 0, 0, 0, 0],
    'P09': [1, 1, 1, 1, 0, 0, 0, 0],
    'P10': [0, 0, 0, 0, 0, 1, 1, 1, 1],
    'P11': [0, 0, 0, 0, 0, 1, 1, 1, 1],
    'P12': [0, 0, 0, 0, 0, 1, 1, 1, 1],
    'P13': [0, 0, 0, 0, 0, 0, 1, 1, 1],
    'P14': [0, 0, 0, 0, 1, 1, 1, 1],
    'P15': [1, 1, 1, 1, 1, 0, 0, 0, 0],
    'P16': [1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0],
    'P17': [1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0],
}

In [6]:
# Lists to store results of mismatch between on-ride (row 0) and post-ride (row 1) ratings
num_differences = []
percentage_mismatch = []

# Loop through each participant's self-report CSV file, append route data, 
# and calculate the number of mismatches.
for participant_id in PARTICIPANTS:
    # Read on-ride (row 0) and post-ride (row 1) from CSV
    self_reports_csv = pd.read_csv(f'data/self-reports/{participant_id}.csv', header=None)
    self_reports = self_reports_csv.values
    
    # Append the route array as a new row (row 2).
    self_reports = np.append(
        self_reports, 
        np.asarray(ROUTES[participant_id]).reshape(1, -1), 
        axis=0
    )
    
    # Count how many values differ between on-ride and post-ride
    n_diff = np.sum(self_reports[0] != self_reports[1])
    perc_diff = n_diff / self_reports.shape[1] * 100
    
    num_differences.append(n_diff)
    percentage_mismatch.append(perc_diff)

In [7]:
print("Mean number of mismatches:", np.mean(np.asarray(num_differences)))
print("Mean percentage mismatch:", np.mean(np.asarray(percentage_mismatch)))

Mean number of mismatches: 2.9411764705882355
Mean percentage mismatch: 31.26262626262626


## Average on-ride and post-ride ratings

In [8]:
# Compute average on-ride and post-ride self-report ratings per participant
on_ride_average = []
post_ride_average = []

for participant_id in PARTICIPANTS:
    self_reports_csv = pd.read_csv(f'data/self-reports/{participant_id}.csv', header=None)
    self_reports = self_reports_csv.values
    
    # Append route array
    self_reports = np.append(
        self_reports, 
        np.asarray(ROUTES[participant_id]).reshape(1, -1), 
        axis=0
    )
    
    # Convert strings to float before averaging
    on_ride_avg = self_reports[0].astype(float).mean()
    post_ride_avg = self_reports[1].astype(float).mean()
    
    on_ride_average.append(on_ride_avg)
    post_ride_average.append(post_ride_avg)

In [9]:
print("Mean on-ride rating:", np.mean(np.asarray(on_ride_average)))
print("Mean post-ride rating:", np.mean(np.asarray(post_ride_average)))

Mean on-ride rating: 2.2298871063576944
Mean post-ride rating: 2.128921568627451


In [10]:
# Detailed example for a single participant (P16); prints mismatch and correlation with route
participant_id = 'P16'
self_reports_csv = pd.read_csv(f'data/self-reports/{participant_id}.csv', header=None)
self_reports = self_reports_csv.values
self_reports = np.append(
    self_reports, 
    np.asarray(ROUTES[participant_id]).reshape(1, -1), 
    axis=0
)

In [11]:
print(f"\nDetailed example for {participant_id}:")
print("Self-reports array (first row on-ride, second row post-ride, third row route):")
print(self_reports)
print("Summary of on-ride:")
print(f"Mean: {self_reports[0].mean()}")
print(f"SD: {self_reports[0].std()}")


Detailed example for P16:
Self-reports array (first row on-ride, second row post-ride, third row route):
[[3 2 2 2 2 2 2 2 3 3 3]
 [3 2 2 3 2 2 2 2 3 3 2]
 [1 1 1 1 1 1 0 0 0 0 0]]
Summary of on-ride:
Mean: 2.3636363636363638
SD: 0.48104569292083466


In [12]:
num_differences = np.sum(self_reports[0] != self_reports[1])
print("\nOn-ride vs. post-ride comparison:")
print(f"Prompts: {self_reports.shape[1]}")
print(f"Differences: {num_differences}")
print(f"Percentage mismatch: {num_differences / self_reports.shape[1] * 100:.2f}%")


On-ride vs. post-ride comparison:
Prompts: 11
Differences: 2
Percentage mismatch: 18.18%


## Correlation between route type

In [13]:
# Correlation between on-ride self-report and route type
correlation_coefficient, p_value = pearsonr(self_reports[0], ROUTES[participant_id])
print("\nCorrelation between on-ride self-report and route type:")
print(f"Correlation coefficient: {correlation_coefficient}")
print(f"p-value: {p_value}")


Correlation between on-ride self-report and route type:
Correlation coefficient: -0.4485426135725303
p-value: 0.16643734614715577


In [14]:
# Recompute on_ride_average and post_ride_average, 
# though we already did above. This section merges all on-ride data across participants.
on_ride_average = []
post_ride_average = []
for participant_id in PARTICIPANTS:
    self_reports_csv = pd.read_csv(f'data/self-reports/{participant_id}.csv', header=None)
    self_reports = self_reports_csv.values
    self_reports = np.append(
        self_reports, 
        np.asarray(ROUTES[participant_id]).reshape(1, -1), 
        axis=0
    )
    on_ride_average.append(self_reports[0].astype(float).mean())
    post_ride_average.append(self_reports[1].astype(float).mean())

# Gather all on-ride self-reports from all participants, along with route data, 
# to compute a correlation across the entire dataset.
all_on_ride_self_reports = []
all_routes = []

for participant_id in PARTICIPANTS:
    self_reports_csv = pd.read_csv(f'data/self-reports/{participant_id}.csv', header=None)
    self_reports = self_reports_csv.values
    self_reports = np.append(
        self_reports, 
        np.asarray(ROUTES[participant_id]).reshape(1, -1), 
        axis=0
    )
    
    # Row 0 = on-ride self-report
    all_on_ride_self_reports.append(self_reports[0].astype(float))
    # Route data (row 2)
    all_routes.append(ROUTES[participant_id])

# Flatten arrays to create a single list of on-ride self-reports and routes
merged_all_on_ride_self_reports = []
merged_all_routes = []

for arr in all_on_ride_self_reports:
    merged_all_on_ride_self_reports.extend(arr)

for arr in all_routes:
    merged_all_routes.extend(arr)

merged_all_on_ride_self_reports = np.asarray(merged_all_on_ride_self_reports)
merged_all_routes = np.asarray(merged_all_routes)

# Pearson correlation over all participants & prompts
correlation_coefficient, p_value = pearsonr(merged_all_routes, merged_all_on_ride_self_reports)

In [15]:
print("\nOverall correlation across all participants and prompts:")
print(f"Correlation coefficient (route vs. on-ride rating): {correlation_coefficient}")
print(f"p-value: {p_value}")


Overall correlation across all participants and prompts:
Correlation coefficient (route vs. on-ride rating): -0.05787695769034776
p-value: 0.47009083715885897
