In [27]:
import os
import pandas as pd
import plotly.graph_objs as go
from plotly.offline import plot

In [28]:
# Constants
BASE_FOLDER = 'probanten'
PARTICIPANTS = ['01', '02', '03', '04', '05', '06', '07', '08', '09', '10', '11']
SENSOR_FILES = ['accelerometer_data', 'gyroscope_data', 'magnetometer_data', 'user_accelerometer_data']
SPEED_FILE = 'speed_data'
STARTING_TIMESTAMPS = {
    '01': 1720610204501,
    '02': 1720611503602,
    '03': 1720612727640,
    '04': 1720613776577,
    '05': 1720614956563,
    '06': 1720617727299,
    '07': 1720621234414,
    '08': 1720622424927,
    '09': 1720623626949,
    '10': 1720624816427,
    '11': 1720625936732
}

In [29]:
def load_sensor_data(participant, sensor_file):
    file_path = os.path.join(BASE_FOLDER, participant, sensor_file)
    df = pd.read_csv(file_path, header=None, names=['time', 'x', 'y', 'z'])
    # Add column to identify the type of sensor (accelorometer, magnetometer, etc.)
    # Removes "_data" from the string to make it look nicer
    df['sensor'] = sensor_file.split('_')[0]
    return df

def load_all_sensor_data(participant):
    dfs = [load_sensor_data(participant, sensor_file) for sensor_file in SENSOR_FILES]
    return pd.concat(dfs)

def load_speed_data(participant):
    file_path = os.path.join(BASE_FOLDER, participant, SPEED_FILE)
    df = pd.read_csv(file_path, header=None, names=['time', 'speed'])
    df['time'] = pd.to_datetime(df['time'], unit='ms')
    # Converting to km/h from m/s
    df['speed'] = df['speed'] * 3.6 
    return df


In [34]:
# Clean up speed data
def clean_speed_data(df, upper_threshold, lower_theshold):
    return df[df['speed'] <= upper_threshold]    

In [31]:
def plot_sensor_data(df, participant):
    fig = go.Figure()
    sensors = df['sensor'].unique()
    for sensor in sensors:
        sensor_data = df[df['sensor'] == sensor]
        fig.add_trace(go.Scatter(x=pd.to_datetime(sensor_data['time'], unit='ms'), y=sensor_data['x'], mode='lines', name=f'{sensor}_x'))
        fig.add_trace(go.Scatter(x=pd.to_datetime(sensor_data['time'], unit='ms'), y=sensor_data['y'], mode='lines', name=f'{sensor}_y'))
        fig.add_trace(go.Scatter(x=pd.to_datetime(sensor_data['time'], unit='ms'), y=sensor_data['z'], mode='lines', name=f'{sensor}_z'))
    add_time_markers(df, fig, 'sensor')
    fig.update_layout(title=f'Sensor Data for Participant {participant}', xaxis_title='Time', yaxis_title='Sensor Value', xaxis=dict(tickformat='%H:%M'))
    fig.show()

def plot_speed_data(df, participant):
    fig = go.Figure()
    fig.add_trace(go.Scatter(x=pd.to_datetime(df['time'], unit='ms'), y=df['speed'], mode='lines', name='Speed'))
    add_time_markers(df, fig, 'speed')
    fig.update_layout(title=f'Speed Data for Participant {participant}', xaxis_title='Time', yaxis_title='Speed in km/h', xaxis=dict(tickformat='%H:%M'))
    fig.show()

def plot_average_speeds(averages, participant):
    # To explain zip(*variable):
    # * operator unpacks lists into individual tupels
    # -> it turns [(a,b,c), (d,e,f), ...] into (a,c,b), (d,e,f), ...
    # useful because zip takes in tupels
    #
    # zip groups elements from tupels:
    # zip(
    # (a,b,c),
    # (a,b,c),
    # (a,b,c))
    #   returns
    # [(a,a,a), (b,b,b), (c,c,c)]
    markers, avg_before, avg_after = zip(*averages)
    fig = go.Figure()
    # At each marker we plot the average speed before and after
    fig.add_trace(go.Scatter(x=markers, y=avg_before, mode='lines+markers', name='Average Speed Before Marker (km/h)'))
    fig.add_trace(go.Scatter(x=markers, y=avg_after, mode='lines+markers', name='Average Speed After Marker (km/h)'))
    fig.update_layout(title=f'Average Speed Before and After Markers for Participant {participant}', 
                      xaxis_title='Time', yaxis_title='Average Speed (km/h)', xaxis=dict(tickformat='%H:%M'))
    fig.show()

def plot_percentage_changes(percentage_changes, participant):
    markers, percent_changes = zip(*percentage_changes)
    
    fig = go.Figure()
    fig.add_trace(go.Scatter(x=markers, y=percent_changes, mode='lines+markers', name='Percentage Change'))
    
    fig.update_layout(title=f'Percentage Change in Speed After Markers for Participant {participant}', 
                      xaxis_title='Time', yaxis_title='Percentage Change (%)', xaxis=dict(tickformat='%H:%M'))
    fig.show()

# Function to create the vertical dashes to indicate the time where the audio prompts where played to the participants
def add_time_markers(df, fig, data):
    starting_time = pd.to_datetime(STARTING_TIMESTAMPS[participant], unit='ms')
    
    # Start Dash
    fig.add_vline(x=starting_time, line=dict(color='red', dash='dash'))
    fig.add_annotation(x=starting_time, y=max(df[data]), text='Start', showarrow=True, arrowhead=1, ax=-10, ay=-30, arrowcolor='red')
    # Prompt Dashes
    for i in range(1, 7):
        marker_time = starting_time + pd.Timedelta(minutes=2 * i)
        fig.add_vline(x=marker_time, line=dict(color='blue', dash='dash'))
        fig.add_annotation(x=marker_time, y=max(df[data]), text=f'+{2 * i}min', showarrow=True, arrowhead=1, ax=-10, ay=-30, arrowcolor='blue')
    # End Dash
    marker_time = starting_time + pd.Timedelta(minutes=15)
    fig.add_vline(x=marker_time, line=dict(color='red', dash='dash'))
    fig.add_annotation(x=marker_time, y=max(df[data]), text='End', showarrow=True, arrowhead=1, ax=-10, ay=-30, arrowcolor='blue')


In [32]:
# Calculate average speed before and after each marker
# Starting time is the starting timestamp of each run
def calculate_average_speeds(df, starting_time):
    averages = []
    # Defines the markers at each audio prompt (every two minutes, 6 times)
    markers = [starting_time + pd.Timedelta(minutes=2 * i) for i in range(1, 7)]
    
    for marker in markers:
        # Get the timestamp before and after
        before_start = marker - pd.Timedelta(minutes=1)
        after_end = marker + pd.Timedelta(minutes=1)
        
        # Calculate the mean value of all speed values before and after the marker
        avg_before = df[(df['time'] >= before_start) & (df['time'] < marker)]['speed'].mean()
        avg_after = df[(df['time'] >= marker) & (df['time'] < after_end)]['speed'].mean()
        
        # Appends it to the list to return afterwards
        averages.append((marker, avg_before, avg_after))
    
    return averages

# Calculate the ratio of average before and average after
# Input is the output of calculate_average_speeds
def calculate_percentage_changes(averages):
    percentage_changes = []

    for marker, avg_before, avg_after in averages:
        
        # Safety handling division by 0
        if avg_before != 0:
            # Calc percentage change of average before compared to after
            percent_change = ((avg_after - avg_before) / avg_before) * 100
        else:
            percent_change = 0 
        
        # Append to list and return afterwards
        percentage_changes.append((marker, percent_change))
    
    return percentage_changes

In [33]:
for participant in PARTICIPANTS:
    # Sensor data
    sensor_df = load_all_sensor_data(participant)
    plot_sensor_data(sensor_df, participant)
    
    # Speed data
    speed_df = load_speed_data(participant)
    cleaned_speed_df = clean_speed_data(speed_df, 15, 0)
    plot_speed_data(cleaned_speed_df, participant)

    # Average speeds
    starting_time = pd.to_datetime(STARTING_TIMESTAMPS[participant], unit='ms')
    averages = calculate_average_speeds(speed_df, starting_time)
    plot_average_speeds(averages, participant)
    
    # Percentage changes
    percentage_changes = calculate_percentage_changes(averages)
    plot_percentage_changes(percentage_changes, participant)

NameError: name 'threshold' is not defined