# Main Document

## EDA

In [21]:
import pandas as pd

# Read in the dataset
df = pd.read_csv('Traffic_Crash_Data.csv')

# Display the length of the dataset
print("Dataset length: " , len(df))

# Create a dataframe for pedestrian crashes only
pedestrian_df = df[df['FIRST_CRASH_TYPE'] == 'PEDESTRIAN']

# Print the length of the pedestrian dataframe
print("Pedestrian dataset length: " , len(pedestrian_df))

# Print the percentage of pedestrian crashes in the dataset
print("Percentage of pedestrian crashes: " , len(pedestrian_df) / len(df) * 100)

# Count each crash type and calculate percentage
crash_type_percentage_breakdown = df['FIRST_CRASH_TYPE'].value_counts(normalize=True) * 100

# Print results in a coincise format
for crash_type, percentage in crash_type_percentage_breakdown.items():
    print(f"{crash_type}: {percentage:.2f}%")

# Identify the types of crashes that involve pedestrains based on the category breakdown and creating a new dataframe
pedestrain_involved_crashes = df[df['FIRST_CRASH_TYPE'].isin(['PEDESTRIAN', 'PEDALCYCLIST'])]

# Print the length of the pedestrian involved crashes dataframe and percentage 
print("Pedestrian involved crashes dataset length: " , len(pedestrain_involved_crashes))
print("Percentage of pedestrian involved crashes: " , len(pedestrain_involved_crashes) / len(df) * 100)




Dataset length:  929861
Pedestrian dataset length:  22146
Percentage of pedestrian crashes:  2.3816462890690113
PARKED MOTOR VEHICLE: 23.14%
REAR END: 22.03%
SIDESWIPE SAME DIRECTION: 15.37%
TURNING: 14.42%
ANGLE: 10.90%
FIXED OBJECT: 4.64%
PEDESTRIAN: 2.38%
PEDALCYCLIST: 1.57%
SIDESWIPE OPPOSITE DIRECTION: 1.39%
REAR TO FRONT: 1.05%
OTHER OBJECT: 1.00%
HEAD ON: 0.85%
REAR TO SIDE: 0.62%
OTHER NONCOLLISION: 0.30%
REAR TO REAR: 0.22%
ANIMAL: 0.07%
OVERTURNED: 0.06%
TRAIN: 0.01%
Pedestrian involved crashes dataset length:  36744
Percentage of pedestrian involved crashes:  3.951558351194426


## Visuals 

In [None]:
import altair as alt
import pandas as pd

# Create a Dataframe from the crash type percentage breadkdown
data = pd.DataFrame(list(crash_type_percentage_breakdown.items()), columns=['Crash_Type', 'Percentage'])

# Create a pie chart using Altair
chart = alt.Chart(data).mark_arc(innerRadius=50).encode(
    theta=alt.Theta(field="Percentage", type="quantitative"), # Set the encodings to the percentage
    color=alt.Color(field="Crash_Type", type="nominal", scale=alt.Scale(scheme='category20')), # Set the color scheme 
    tooltip=[alt.Tooltip("Crash_Type:N"), alt.Tooltip("Percentage:Q", format=".2f")]  # Round percentage to 2 decimal places when hovering 
).properties(
    title="Percentage Breakdown of First Crash Types" # Title of the chart
)

# Display the chart
chart.show()


## ML 

## Reflection