In [None]:
%matplotlib inline
import pandas as pd
import matplotlib.pyplot as plt

# Load the collision_events and involved_persons datasets
collision_events = pd.read_csv('collision_events.csv')
involved_persons = pd.read_csv('involved_persons.csv')

# Merge the two datasets on the 'collision_no' column
merged_data = pd.merge(collision_events, involved_persons, on='collision_no')

# Filter the merged dataset for rows where involved_class is 'PEDESTRIAN' or 'PEDESTRIAN - NOT HIT'
pedestrian_data = merged_data.loc[merged_data['involved_class'].isin(['PEDESTRIAN', 'PEDESTRIAN - NOT HIT'])]

# Convert the accdate_x column to a datetime format
pedestrian_data['accdate_x'] = pd.to_datetime(pedestrian_data['accdate_x'])

# Filter the pedestrian_data for the last 10 years
last_10_years = pedestrian_data.loc[pedestrian_data['accdate_x'] >= pedestrian_data['accdate_x'].max() - pd.DateOffset(years=10)]

# Group the last_10_years pedestrian_data by year and count the number of collisions
pedestrian_counts = last_10_years.groupby(last_10_years['accdate_x'].dt.year)['collision_no'].nunique()

# Calculate the rate of pedestrian collisions per year
pedestrian_rates = pedestrian_counts / 365.25  # assuming a 365.25-day year

# Calculate the change in pedestrian collision rate over the last 10 years
pedestrian_rate_change = (pedestrian_rates.iloc[-1] - pedestrian_rates.iloc[0]) / pedestrian_rates.iloc[0] * 100

# Plot a bar chart of the pedestrian collision rates over the last 10 years
pedestrian_rates.plot(kind='bar')
plt.title('Pedestrian Collision Rates (last 10 years)')
plt.xlabel('Year')
plt.ylabel('Collisions per Day')
plt.show()

# Print the rate change percentage
print(f"The rate of pedestrian collisions has {'increased' if pedestrian_rate_change > 0 else 'decreased'} by {abs(pedestrian_rate_change):.2f}% over the last 10 years.")


# Filter the merged dataset for rows where involved_injury_class is not 'NONE' or an empty cell
injury_data = merged_data.loc[merged_data['involved_injury_class'].isin(['FATAL', 'MAJOR', 'MINIMAL', 'MINOR'])].dropna(subset=['involved_injury_class'])

# Convert the accdate_x column to a datetime format
injury_data['accdate_x'] = pd.to_datetime(injury_data['accdate_x'])

# Filter the injury_data for the last 10 years
last_10_years = injury_data.loc[injury_data['accdate_x'] >= injury_data['accdate_x'].max() - pd.DateOffset(years=10)]

# Group the last_10_years injury_data by year and injury class, and count the number of collisions
injury_counts = last_10_years.groupby([last_10_years['accdate_x'].dt.year, 'involved_injury_class'])['collision_no'].nunique()

# Reshape the injury_counts into a wide format with each injury class as a separate column
injury_counts = injury_counts.unstack(level=-1, fill_value=0)

# Plot a stacked bar chart of the injury counts by year
injury_counts.plot(kind='bar', stacked=True)
plt.title('Injury Counts (last 10 years)')
plt.xlabel('Year')
plt.ylabel('Number of Collisions')
plt.show()
