## Title
What do I do in this notebook

In [None]:
import pandas as pd

In [None]:
df = pd.read_csv('data/nyc_bike_accidents.csv')
pd.options.display.max_columns = 50

In [None]:
df['CRASH DATE'] = pd.to_datetime(df['CRASH DATE'])

df['YEAR'] = df['CRASH DATE'].dt.strftime('%Y')

### Reasons for accidents

In [None]:
bike_df = df.groupby('CONTRIBUTING FACTOR VEHICLE 1').sum().sort_values('NUMBER OF PERSONS INJURED', ascending=False).reset_index()
bike_df = bike_df[['CONTRIBUTING FACTOR VEHICLE 1', 'NUMBER OF CYCLIST INJURED', 'NUMBER OF CYCLIST KILLED']]

# Drop the first row, because that is the cases where the reason was unspecified.
bike_df.drop(index=0, inplace=True)

# Get top 10 reasons
bike_df = bike_df.head(10)

In [None]:
bike_df.to_csv('data/nyc_accidents_reason.csv')

And get the numbers for 2022 alone.

In [None]:
bike_df_2022 = df[df['YEAR'] == '2022']

bike_df_2022 = bike_df_2022.groupby('CONTRIBUTING FACTOR VEHICLE 1').sum().sort_values('NUMBER OF PERSONS INJURED', ascending=False).reset_index()
bike_df_2022 = bike_df_2022[['CONTRIBUTING FACTOR VEHICLE 1', 'NUMBER OF CYCLIST INJURED', 'NUMBER OF CYCLIST KILLED']]
bike_df_2022

bike_df_2022.to_csv('data/nyc_accidents_reason_2022.csv')

### Prepare accident data for mapping

In this section of the notebook I prepare the accident data to be mapped as dots on a datawrapper map. For the mapping I need information about the location of the accident, whether the cyclist was injured or killed (to color the dots depending on it).

In [None]:
df.groupby('YEAR').sum('NUMBER OF PERSONS KILLED')

In [None]:
# Get only the data from 2022
df_2022 = df[df['YEAR'] == '2022']

# Get only the columns we need
df_2022 = df_2022[['CRASH DATE', 'CRASH TIME', 'BOROUGH', 'ZIP CODE', 'LATITUDE', 'LONGITUDE', 'NUMBER OF CYCLIST KILLED', 'NUMBER OF CYCLIST INJURED', 'ON STREET NAME', 'CROSS STREET NAME']]

# And create new columns that will be used for visualization
df_2022['ACCIDENT TYPE'] = 0

df_2022.loc[df_2022['NUMBER OF CYCLIST INJURED'] > 0, 'ACCIDENT TYPE'] = 'Injured'
df_2022.loc[df_2022['NUMBER OF CYCLIST KILLED'] > 0, 'ACCIDENT TYPE'] = 'Fatal'

df_2022['INJURED OR DEATH'] = df_2022['NUMBER OF CYCLIST KILLED'] + df_2022['NUMBER OF CYCLIST INJURED']

df_2022.dropna(subset=['LATITUDE', 'LONGITUDE'], inplace=True)

In [None]:
df_2022.to_csv('data/nyc_bike_crashes_2022.csv')

And to check if any streets are particularly dangerous for cyclists.

In [None]:
df_2022.groupby('ON STREET NAME').sum().sort_values('INJURED OR DEATH', ascending=False).head(50)

### Development of accidents over time

In [11]:
accidents = pd.read_csv('data/nyc_bike_accidents.csv')

bikers = pd.read_csv('data/nyc_bikerides_numbers.csv')

In [12]:
accidents['CRASH DATE'] = pd.to_datetime(accidents['CRASH DATE'])

accidents['YEAR'] = accidents['CRASH DATE'].dt.strftime('%Y')

accidents_year = accidents.groupby('YEAR').sum('NUMBER OF PERSONS KILLED')
accidents_year = accidents_year[['NUMBER OF CYCLIST INJURED', 'NUMBER OF CYCLIST KILLED']]
accidents_year

Unnamed: 0_level_0,NUMBER OF CYCLIST INJURED,NUMBER OF CYCLIST KILLED
YEAR,Unnamed: 1_level_1,Unnamed: 2_level_1
2012,2210,6
2013,4075,11
2014,4000,20
2015,4281,15
2016,4975,18
2017,4889,27
2018,4725,10
2019,4986,31
2020,5576,29
2021,4961,19


In [13]:
accidents_year = accidents_year.reset_index()

accidents_year.rename(columns={"YEAR": "year", "NUMBER OF CYCLIST INJURED": "injured", "NUMBER OF CYCLIST KILLED": "killed"}, inplace=True)

accidents_year

Unnamed: 0,year,injured,killed
0,2012,2210,6
1,2013,4075,11
2,2014,4000,20
3,2015,4281,15
4,2016,4975,18
5,2017,4889,27
6,2018,4725,10
7,2019,4986,31
8,2020,5576,29
9,2021,4961,19


In [14]:
bikers['year'] = bikers['Unnamed: 0']
bikers = bikers[['year', 'Total Daily Cycling Trips']]
bikers

Unnamed: 0,year,Total Daily Cycling Trips
0,2008,240000.0
1,2009,240000.0
2,2010,250000.0
3,2011,270000.0
4,2012,320000.0
5,2013,380000.0
6,2014,420000.0
7,2015,450000.0
8,2016,460000.0
9,2017,490000.0


In [15]:
bikers.drop([13], inplace=True)
bikers

Unnamed: 0,year,Total Daily Cycling Trips
0,2008,240000.0
1,2009,240000.0
2,2010,250000.0
3,2011,270000.0
4,2012,320000.0
5,2013,380000.0
6,2014,420000.0
7,2015,450000.0
8,2016,460000.0
9,2017,490000.0


In [31]:
bikers_accidents = pd.merge(accidents_year, bikers, on=['year'], how='left')


# Standardize the injuries and deaths per 1 million rides
bikers_accidents['injury_rate'] = bikers_accidents['injured'] / (bikers_accidents['Total Daily Cycling Trips'] * 365) * 1000000
bikers_accidents['fatality_rate'] = bikers_accidents['killed'] / (bikers_accidents['Total Daily Cycling Trips'] * 365) * 1000000

bikers_accidents.rename({'Total Daily Cycling Trips': 'total_daily_bikerides'}, axis=1, inplace=True)

In [34]:
bikers_accidents

Unnamed: 0,year,injured,killed,total_daily_bikerides,injury_rate,fatality_rate
0,2012,2210,6,320000.0,18.921233,0.05137
1,2013,4075,11,380000.0,29.379957,0.079308
2,2014,4000,20,420000.0,26.092629,0.130463
3,2015,4281,15,450000.0,26.063927,0.091324
4,2016,4975,18,460000.0,29.630733,0.107207
5,2017,4889,27,490000.0,27.335756,0.150964
6,2018,4725,10,510000.0,25.382756,0.05372
7,2019,4986,31,530000.0,25.774102,0.160248
8,2020,5576,29,,,
9,2021,4961,19,550000.0,24.712329,0.094645


In [35]:
bikers_accidents.to_csv('data/accidents_development.csv')