In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from scipy.stats import norm

In [2]:
data = pd.read_csv('results.csv')

In [3]:
def calculate_statistics(group):
    stats = {}
    stats['mean'] = group['duration'].mean()
    stats['median'] = group['duration'].median()
    stats['std_dev'] = group['duration'].std()
    stats['variance'] = group['duration'].var()

    n = len(group)
    if n > 1:
        se = stats['std_dev'] / np.sqrt(n)
        ci = norm.interval(0.95, loc=stats['mean'], scale=se)
        stats['conf_interval'] = ci
    else:
        stats['conf_interval'] = (stats['mean'], stats['mean'])
    return pd.Series(stats)

In [None]:
task_stats = data.groupby('task').apply(calculate_statistics)
print(task_stats)

In [None]:
overall_stats = calculate_statistics(data)
print(overall_stats)

In [None]:
fig, ax = plt.subplots(figsize=(10, 6))
ax.bar(task_stats.index, task_stats['mean'], color='green', label='Mean Duration')
for i, (mean, ci) in enumerate(zip(task_stats['mean'], task_stats['conf_interval'])):
    ax.errorbar(i + 1, mean, yerr=[[mean - ci[0]], [ci[1] - mean]], fmt='o', color='black', capsize=5)
ax.set_title('Task Performance Analysis')
ax.set_xlabel('Task')
ax.set_ylabel('Duration (seconds)')
ax.set_xticks(task_stats.index)
ax.legend()

plt.tight_layout()
plt.show()

In [7]:
unique_tasks = data['task'].unique()

In [None]:
for task in unique_tasks:
    task_data = data[data['task'] == task]

    mean_duration = task_data['duration'].mean()
    std_dev = task_data['duration'].std()
    n = len(task_data)

    if n > 1:
        se = std_dev / np.sqrt(n)
        ci = norm.interval(0.95, loc=mean_duration, scale=se)
    else:
        ci = (mean_duration, mean_duration)

    plt.figure(figsize=(12, 6), dpi=100)
    bars = plt.bar(range(len(task_data)), task_data['duration'], color='lightblue', alpha=0.85)
    plt.axhline(mean_duration, color='blue', linestyle='-', linewidth=2, label='Mean')
    plt.axhline(ci[0], color='red', linestyle='--', linewidth=1, label='95% CI')
    plt.axhline(ci[1], color='red', linestyle='--', linewidth=1)
    for i, bar in enumerate(bars):
        height = bar.get_height()
        plt.text(bar.get_x() + bar.get_width()/2., height,f'{height:.2f}s',ha='center', va='bottom')

    plt.title(f'Task {task} - User Performance Duration\nMean: {mean_duration:.2f}s (95% CI: {ci[0]:.2f}s - {ci[1]:.2f}s)',pad=20)
    plt.xlabel('Participant Number')
    plt.ylabel('Duration (seconds)')
    plt.xticks(range(len(task_data)), task_data['participant'], rotation=45, ha='right')
    plt.grid(True, axis='y', linestyle='--', alpha=0.3)
    plt.legend(loc='upper right')
    plt.tight_layout()
    plt.show()