In [1]:
import numpy as np
from datetime import datetime, timedelta
import random
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import pandas as pd
from scipy import stats


In [2]:
m_start = ('7:45 AM', '8:15 AM')
m_stop = ('9:00 AM', '9:30 AM')

e_start = ('3:30 PM', '4:00 PM')
e_stop = ('4:30 PM', '5:00 PM')

In [3]:
#data_format = {
    #session_id = #int,
    #start_time = #HHMM,
    #stop_time = #HHMM,
    #rain = #True/False,
    #mode = #Car/Transit,
    #path = #Freeway/Backway/None,
    #lane = #left/middle/right/multi
#}

In [4]:
def generate_time(start_time_str, end_time_str):
    start_time = datetime.strptime(start_time_str, "%I:%M %p")
    end_time = datetime.strptime(end_time_str, "%I:%M %p")
    time_range_seconds = int((end_time - start_time).total_seconds())
    random_time = start_time + timedelta(seconds=random.randint(0, time_range_seconds))
    return random_time


In [5]:
# Function to generate random demographics
def random_demographics():
    mode_of_transport = random.choice(['Transit', 'Car'])
    if mode_of_transport == 'Car':
        road_type = random.choice(['Freeway', 'Backroad'])
        if road_type == 'Freeway':
            lane = random.choice(['Left', 'Center', 'Right', 'Multi-lane'])
        else:
            lane = None
    else:
        road_type = None
        lane = None
    
    return {
        'mode_of_transport': mode_of_transport,
        'road_type': road_type,
        'lane': lane
    }

In [6]:

def generate_commute_data(
    start_range,
    stop_range,
    num_users=5,
    num_days=7
):
    user_data = []
    session_id = 0
    
    for user_id in range(num_users + 1):
        for day in range(num_days + 1):
            start = generate_time(start_range[0], start_range[1])
            stop = generate_time(stop_range[0], stop_range[1])
            
            if start > stop:
                start, stop = stop, start
                
            time = 'AM' if start.hour < 12 else 'PM'
            rain = random.random() < 0.5
            
            length = int((stop - start).total_seconds() / 60)            
            demographics = random_demographics()
            
            user_data.append({
                # 'session_id': session_id,  # Unique session ID
                'user_id': user_id,        # User ID
                'day': day,                # Day of commute
                'start': start,            # Start time
                'stop': stop,              # Stop time
                'length':length,           # commute time
                'time':time,               # part of day
                'rain': rain,              # Rain flag
                **demographics             # Unpack demographics
            })
            
            session_id += 1  # Increment session ID for uniqueness
    
    return user_data


In [7]:
data = generate_commute_data(m_start, m_stop, 20, 7 )

data.extend(generate_commute_data(e_start, e_stop, 20, 7 ))


In [8]:
df = pd.DataFrame(data)

In [9]:
ci_results = []
for day in df.day.unique():
    data = df[df.day == day]
    
    m = data[data.time == 'AM']
    e = data[data.time == 'PM']
    
    m_mean = m.length.mean()
    e_mean = e.length.mean()
    
    m_sem = stats.sem(m.length)
    e_sem = stats.sem(e.length)
    
    m_CI = m_sem * stats.t.ppf(0.975, len(m) - 1)
    e_CI = e_sem * stats.t.ppf(0.975, len(e) - 1)
    
    # print(m_CI)
    # print(e_CI)
    # print('')
    ci_results.append({
        'day': day,
        'm_mean':m_mean,
        'm_CI': m_CI,
        'e_mean':e_mean,
        'e_CI':e_CI
    })
    
for item in ci_results:
    print(item)

{'day': 0, 'm_mean': 75.14285714285714, 'm_CI': 4.770399094358057, 'e_mean': 59.666666666666664, 'e_CI': 4.483916555832479}
{'day': 1, 'm_mean': 78.33333333333333, 'm_CI': 5.046983389488675, 'e_mean': 61.57142857142857, 'e_CI': 5.879893337391866}
{'day': 2, 'm_mean': 71.19047619047619, 'm_CI': 6.291933126475879, 'e_mean': 57.61904761904762, 'e_CI': 5.529210822930468}
{'day': 3, 'm_mean': 75.33333333333333, 'm_CI': 5.8777114652975015, 'e_mean': 58.333333333333336, 'e_CI': 5.0592847890513974}
{'day': 4, 'm_mean': 75.57142857142857, 'm_CI': 4.562338008710182, 'e_mean': 56.857142857142854, 'e_CI': 6.083766106996954}
{'day': 5, 'm_mean': 73.0952380952381, 'm_CI': 6.362806883369735, 'e_mean': 60.095238095238095, 'e_CI': 5.445056757376525}
{'day': 6, 'm_mean': 75.28571428571429, 'm_CI': 6.1613618254693865, 'e_mean': 59.61904761904762, 'e_CI': 5.51795714748304}
{'day': 7, 'm_mean': 73.33333333333333, 'm_CI': 6.1835013498454225, 'e_mean': 60.76190476190476, 'e_CI': 5.608145960463188}


In [10]:
pd.DataFrame(ci_results)

Unnamed: 0,day,m_mean,m_CI,e_mean,e_CI
0,0,75.142857,4.770399,59.666667,4.483917
1,1,78.333333,5.046983,61.571429,5.879893
2,2,71.190476,6.291933,57.619048,5.529211
3,3,75.333333,5.877711,58.333333,5.059285
4,4,75.571429,4.562338,56.857143,6.083766
5,5,73.095238,6.362807,60.095238,5.445057
6,6,75.285714,6.161362,59.619048,5.517957
7,7,73.333333,6.183501,60.761905,5.608146


In [11]:
def compute_ci_and_plot(users_data):
    df = pd.DataFrame(users_data)
    
    # Calculate the total commute time for each day (morning + evening)
    df['morning_commute_time'] = (df['morning_end'] - df['morning_start']) / 100
    df['evening_commute_time'] = (df['evening_end'] - df['evening_start']) / 100
    df['total_commute_time'] = df['morning_commute_time'] + df['evening_commute_time']
    
    # Group by day and calculate the mean and CI for each day
    ci_results = []
    for day in df['day'].unique():
        day_data = df[df['day'] == day]
        mean_commute_time = day_data['total_commute_time'].mean()
        sem = stats.sem(day_data['total_commute_time'])  # Standard error of the mean
        ci = sem * stats.t.ppf(0.975, len(day_data) - 1)  # 95% Confidence Interval
        
        ci_results.append({
            'day': day,
            'mean_commute_time': mean_commute_time,
            'ci_lower': mean_commute_time - ci,
            'ci_upper': mean_commute_time + ci
        })
    
    # Convert CI results to DataFrame
    ci_df = pd.DataFrame(ci_results)
    
    # Plot with error bars
    plt.figure(figsize=(10, 6))
    plt.errorbar(ci_df['day'], ci_df['mean_commute_time'], yerr=[ci_df['mean_commute_time'] - ci_df['ci_lower'], ci_df['ci_upper'] - ci_df['mean_commute_time']], fmt='o', color='blue', label='Commute Time')
    plt.xlabel('Day')
    plt.ylabel('Mean Commute Time (Hours)')
    plt.title('Average Commute Time with 95% Confidence Interval')
    plt.grid(True)
    plt.tight_layout()
    plt.show()

# Generate data for multiple users over multiple days
users_data = generate_user_commute_data(num_users=10, num_days=30)

# Compute and plot the CI with error bars
compute_ci_and_plot(users_data)


NameError: name 'generate_user_commute_data' is not defined