In [1]:
import pandas as pd
import altair as alt

In [4]:
df = pd.read_csv('US_Accidents_March23.csv')

In [5]:
# Clean
df = df[['ID', 'Severity', 'Start_Time']].dropna()
df['Start_Time'] = pd.to_datetime(df['Start_Time'], format='mixed')
df['Hour'] = df['Start_Time'].dt.hour
df['DayOfWeek'] = df['Start_Time'].dt.day_name()

In [6]:
df['Date'] = df['Start_Time'].dt.date

In [7]:
weekdays = ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday']
# weekends = ['Saturday', 'Sunday']
df['DayType'] = df['DayOfWeek'].apply(lambda x: 'Weekday' if x in weekdays else 'Weekend')

In [8]:
def aggregate_hourly_data(data, groupby_cols, day_filter_value):
    result = data.groupby(groupby_cols).agg({
        'ID': 'count',
        'Severity': ['mean', lambda x: (x == 4).mean() * 100],
        'Date': 'nunique'
    }).reset_index()
    
    # Flatten MultiIndex columns
    if len(groupby_cols) == 1:
        result.columns = ['Hour', 'ID', 'Avg_Severity', 'Pct_Severe', 'Date']
    else:
        result.columns = ['Hour', 'DayOfWeek', 'ID', 'Avg_Severity', 'Pct_Severe', 'Date']
    
    # Calculate average crash count per day
    result['Avg_Crash_Count'] = result['ID'] / result['Date']
    
    # Select and rename columns
    if len(groupby_cols) == 1:
        result = result[['Hour', 'Avg_Crash_Count', 'Avg_Severity', 'Pct_Severe']]
        result.columns = ['Hour', 'Crash_Count', 'Avg_Severity', 'Pct_Severe']
        result['day_filter'] = day_filter_value
    else:
        result = result[['Hour', 'DayOfWeek', 'Avg_Crash_Count', 'Avg_Severity', 'Pct_Severe']]
        result.columns = ['Hour', 'day_filter', 'Crash_Count', 'Avg_Severity', 'Pct_Severe']
    
    return result

In [9]:
# Use function to aggregate data
hourly_data_all = aggregate_hourly_data(df, ['Hour'], 'All')
hourly_data_weekdays = aggregate_hourly_data(df[df['DayType'] == 'Weekday'], ['Hour'], 'Weekdays')
hourly_data_weekends = aggregate_hourly_data(df[df['DayType'] == 'Weekend'], ['Hour'], 'Weekends')
hourly_data_days = aggregate_hourly_data(df, ['Hour', 'DayOfWeek'], None)

In [10]:
combined_data = pd.concat([
    hourly_data_all, 
    hourly_data_weekdays, 
    hourly_data_weekends, 
    hourly_data_days
], ignore_index=True)

In [19]:
stats_data = []
for filter_option in combined_data['day_filter'].unique():
    filter_data = combined_data[combined_data['day_filter'] == filter_option]
    total_crashes = filter_data['Crash_Count'].sum()
    weighted_avg_severity = (filter_data['Crash_Count'] * filter_data['Avg_Severity']).sum() / total_crashes
    stats_data.append({
        'day_filter': filter_option,
        'stat_text': f'Avg Daily Accident Count: {int(total_crashes):,}  |  Avg Daily Severity: {weighted_avg_severity:.3f}'
    })
stats_df = pd.DataFrame(stats_data)

In [20]:
combined_data = combined_data.merge(stats_df, on='day_filter', how='left')

In [21]:
day_dropdown = alt.binding_select(
    options=['All', 'Weekdays', 'Weekends', 'Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday'],
    name='Day of Week: '
)
day_selection = alt.selection_point(fields=['day_filter'], bind=day_dropdown, value='All')

base = alt.Chart(combined_data).add_params(day_selection).transform_filter(day_selection)

bars = base.mark_bar(
    opacity=0.85,
    color='#cc4878',
    cornerRadiusTopLeft=3,
    cornerRadiusTopRight=3
).encode(
    x=alt.X('Hour:O', 
            title='Hour of Day',
            axis=alt.Axis(
                labelAngle=0,
                labelFontSize=11,
                titleFontSize=13,
                titlePadding=10,
                grid=False
            )),
    y=alt.Y('Crash_Count:Q', 
            title='Average Accident Count',
            axis=alt.Axis(
                labelFontSize=11,
                titleFontSize=13,
                titlePadding=10,
                gridOpacity=0.3
            )),
    tooltip=[
        alt.Tooltip('Hour:O', title='Hour'),
        alt.Tooltip('Crash_Count:Q', title='Accident Count', format='.1f'),
        alt.Tooltip('Avg_Severity:Q', title='Severity', format='.2f')
        # alt.Tooltip('Pct_Severe:Q', title='% of Severity 4', format='.1f')
    ]
)

line = base.mark_line(
    color='#5b039f',
    strokeWidth=2.5,
    point=alt.OverlayMarkDef(filled=True, size=60, color='#5b039f', opacity=0.9)
).encode(
    x=alt.X('Hour:O'),
    y=alt.Y('Avg_Severity:Q', 
            title='Avgerage Severity',
            scale=alt.Scale(domain=[2.16, 2.345]),
            axis=alt.Axis(
                labelFontSize=11,
                titleFontSize=13,
                titlePadding=10,
                gridOpacity=0.3
            )),
    tooltip=[
        alt.Tooltip('Hour:O', title='Hour'),
        alt.Tooltip('Crash_Count:Q', title='Accident Count', format='.1f'),
        alt.Tooltip('Avg_Severity:Q', title='Severity', format='.2f')
        # alt.Tooltip('Pct_Severe:Q', title='% of Severity 4', format='.1f')
    ]
)

main_chart = alt.layer(bars, line).resolve_scale(
    y='independent'
).properties(
    width=950,
    height=420,
    title={
        "text": "US Accidents by Hour of Day: Severity And Frequency",
        "fontSize": 18,
        "fontWeight": 600,
        "anchor": "middle"
    }
)
main_chart

stats_text = base.mark_text(
    align='center',
    baseline='top',
    fontSize=14,
    fontWeight=500,
    color='#34495e',
    dy=10
).encode(
    x=alt.value(475),
    y=alt.value(0),
    text='stat_text:N'
).transform_aggregate(
    stat_text='max(stat_text)',
    groupby=['day_filter']
)

final_chart = alt.vconcat(
    main_chart,
    stats_text.properties(height=39)
).configure_view(
    strokeWidth=0
).configure_axis(
    labelColor='#555',
    titleColor='#333'
)

final_chart

In [22]:
# # Saving
# final_chart.save('bar_line_plot.html')