# Exploring the percentage breakdown of the Traffic Crash Dataset based on the Crash Type Column 

In [1]:
import pandas as pd
import altair as alt


# Read in the dataset
df = pd.read_csv('Traffic_Crash_Data.csv')

# Display the length of the dataset
print("Dataset length: " , len(df))

# Create a dataframe for pedestrian crashes only
pedestrian_df = df[df['FIRST_CRASH_TYPE'] == 'PEDESTRIAN']

# Print the length of the pedestrian dataframe
print("Pedestrian dataset length: " , len(pedestrian_df))

# Create a dataframe for pedalcyclist crashes only
pedalcyclist_df = df[df['FIRST_CRASH_TYPE'] == 'PEDALCYCLIST']

# Print the length of the pedalcyclist dataframe
print("Pedalcyclist dataset length: " , len(pedalcyclist_df))

# Identify the types of crashes that involve pedestrains based on the category breakdown and creating a new dataframe
pedestrain_involved_crashes = df[df['FIRST_CRASH_TYPE'].isin(['PEDESTRIAN', 'PEDALCYCLIST'])]

# Print the length of the pedestrian involved crashes dataframe and percentage 
print("Pedestrian involved crashes dataset length: " , len(pedestrain_involved_crashes))
print("Percentage of pedestrian involved crashes: " , len(pedestrain_involved_crashes) / len(df) * 100)

# Count each crash type and calculate percentage
crash_type_percentage_breakdown = df['FIRST_CRASH_TYPE'].value_counts(normalize=True) * 100

# Print results in a coincise format
for crash_type, percentage in crash_type_percentage_breakdown.items():
    print(f"{crash_type}: {percentage:.2f}%")

# Create a Dataframe from the crash type percentage breadkdown
data = pd.DataFrame(list(crash_type_percentage_breakdown.items()), columns=['Crash_Type', 'Percentage'])

# Create a pie chart using Altair
chart = alt.Chart(data).mark_arc(innerRadius=50).encode(
    theta=alt.Theta(field="Percentage", type="quantitative"), # Set the encodings to the percentage
    color=alt.Color(field="Crash_Type", type="nominal", scale=alt.Scale(scheme='category20')), # Set the color scheme 
    tooltip=[alt.Tooltip("Crash_Type:N"), alt.Tooltip("Percentage:Q", format=".2f")]  # Round percentage to 2 decimal places when hovering 
).properties(
    title="Percentage Breakdown of First Crash Types" # Title of the chart
)

# Display the chart
chart.show()


Dataset length:  929861
Pedestrian dataset length:  22146
Pedalcyclist dataset length:  14598
Pedestrian involved crashes dataset length:  36744
Percentage of pedestrian involved crashes:  3.951558351194426
PARKED MOTOR VEHICLE: 23.14%
REAR END: 22.03%
SIDESWIPE SAME DIRECTION: 15.37%
TURNING: 14.42%
ANGLE: 10.90%
FIXED OBJECT: 4.64%
PEDESTRIAN: 2.38%
PEDALCYCLIST: 1.57%
SIDESWIPE OPPOSITE DIRECTION: 1.39%
REAR TO FRONT: 1.05%
OTHER OBJECT: 1.00%
HEAD ON: 0.85%
REAR TO SIDE: 0.62%
OTHER NONCOLLISION: 0.30%
REAR TO REAR: 0.22%
ANIMAL: 0.07%
OVERTURNED: 0.06%
TRAIN: 0.01%


## Visual-# Comparing the number of Pedestrain and Pedalcyclist relative to Lighting Conditions BarChart

In [43]:
# Visual one 
import altair as alt

# Label the type of crash in each DataFrame
pedestrian_df['Type'] = 'Pedestrian'
pedalcyclist_df['Type'] = 'Pedalcyclist'

# Combine both DataFrames into one that contains all pedastrian related accidents
all_pedastrian_df = pd.concat([pedestrian_df, pedalcyclist_df], ignore_index=True)

# Group by crash type and lighting condition
grouped = all_pedastrian_df.groupby(['Type', 'LIGHTING_CONDITION']).size().reset_index(name='Count')

# Create a grouped bar chart
bar_chart = alt.Chart(grouped).mark_bar().encode(
    x=alt.X('LIGHTING_CONDITION:N', title='Lighting Condition', sort='-y'),
    y=alt.Y('Count:Q', title='Number of Crashes'),
    color=alt.Color('Type:N', title='Crash Type'),
    column=alt.Column('Type:N', title='Crash Type'), # Create separate columns for each crash type
    tooltip=['Type:N', 'LIGHTING_CONDITION:N', 'Count:Q']
).properties(
    title='Pedestrian vs. Pedalcyclist Crashes by Lighting Condition'
).configure_axisX(labelAngle=-45)

bar_chart.show()


## Visual-# Comparing the number of Pedestrain and Pedalcyclist relative to Lighting Conditions Linechart

In [44]:
import altair as alt

# Line plot
line_chart = alt.Chart(grouped).mark_line(point=True).encode(
    x=alt.X('LIGHTING_CONDITION:N', title='Lighting Condition', sort='-y'),
    y=alt.Y('Count:Q', title='Number of Crashes'),
    color=alt.Color('Type:N', title='Crash Type'),
    tooltip=['Type:N', 'LIGHTING_CONDITION:N', 'Count:Q']
).properties(
    title='Pedestrian vs. Pedalcyclist Crashes by Lighting Condition (Line Plot)'
).configure_axisX(labelAngle=-45)

line_chart.show()

# Visual-# Daily Traffic Records in 2023 with Seasonal Averages

In [42]:
import pandas as pd
import altair as alt

# Load + preprocess
df = pd.read_csv('Traffic_Crash_Data.csv')
df['CRASH_DATE'] = pd.to_datetime(df['CRASH_DATE'])
df_2023 = df[df['CRASH_DATE'].dt.year == 2023].copy()  # Make an explicit copy to avoid SettingWithCopyWarning
df_2023['DayOfYear'] = df_2023['CRASH_DATE'].dt.dayofyear  


# Group by day
daily_counts = df_2023.groupby('DayOfYear').size().reset_index(name='Accident_Count')

# Assign season
def get_season(day):
    if 80 <= day <= 172:
        return 'Summer'
    elif 173 <= day <= 265:
        return 'Fall'
    elif 266 <= day <= 365:
        return 'Winter'
    else:
        return 'Spring'

daily_counts['Season'] = daily_counts['DayOfYear'].apply(get_season)

# Compute seasonal averages
season_avg = daily_counts.groupby('Season')['Accident_Count'].mean().round(1).to_dict()

# Update season names with average for the legend
daily_counts['Season Label'] = daily_counts['Season'].apply(lambda s: f"{s} (avg: {season_avg[s]})")

# Season color scale
season_colors = {
    "Spring (avg: {})".format(season_avg["Spring"]): "lightgreen",
    "Summer (avg: {})".format(season_avg["Summer"]): "lightyellow",
    "Fall (avg: {})".format(season_avg["Fall"]): "lightcoral",
    "Winter (avg: {})".format(season_avg["Winter"]): "lightblue",
}

# Background season ranges
seasons_df = pd.DataFrame([
    {"start": 1, "end": 79, "season": f"Spring (avg: {season_avg['Spring']})"},
    {"start": 80, "end": 172, "season": f"Summer (avg: {season_avg['Summer']})"},
    {"start": 173, "end": 265, "season": f"Fall (avg: {season_avg['Fall']})"},
    {"start": 266, "end": 365, "season": f"Winter (avg: {season_avg['Winter']})"}
])

# Background rects
background = alt.Chart(seasons_df).mark_rect().encode(
    x=alt.X('start:O'),
    x2='end:O',
    color=alt.Color('season:N', scale=alt.Scale(domain=list(season_colors.keys()), range=list(season_colors.values())),
                    legend=alt.Legend(title="Season (Average Accidents)")),
    opacity=alt.value(0.3)
)

# Line + points chart
base = alt.Chart(daily_counts).encode(
    x=alt.X('DayOfYear:O', title='Day of Year', axis=alt.Axis( tickCount=4, values=[79, 172, 265, 365], labelAngle=-45)),
    y=alt.Y('Accident_Count:Q', title='Number of Accidents'),
    tooltip=['DayOfYear', 'Accident_Count', 'Season']
)

line = base.mark_line(interpolate='basis')
points = base.mark_point(filled=True, size=60)

# Combine
chart = (background + line + points).properties(
    width=1000,
    height=400,
    title='2023 Daily Traffic Accidents with Seasonal Averages'
)

chart