# Associations - Making Leeds Bike-Safe
--- 
Creation: 13.02.2021
Author: Jonas-Mika Senghaas (jsen@itu.dk)

## Required Libraries
---

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import textwrap
import json
import os

In [None]:
# our library import

## Constants
---

In [2]:
PATH = {}
PATH["data_raw"] = "../data/raw/"
PATH["data_interim"] = "../data/interim/"
PATH["data_processed"] = "../data/processed/"
PATH["data_external"] = "../data/external/"
PATH["references"] = "../data/references/"

FILENAME = {}
FILENAME["accidents"] = "Road Safety Data - Accidents 2019.csv"
FILENAME["casualties"] = "Road Safety Data - Casualties 2019.csv"
FILENAME["vehicles"] = "Road Safety Data- Vehicles 2019.csv" # the original dataset has a small typing mistake
FILENAME["variable_lookup"] = "variable lookup.xls"

SUMMARY = {}

TABLENAMES = ["accidents", "casualties", "vehicles"]

## Loading Processed Leeds Data
---

In [4]:
DATA_LEEDS = {}
for dataset in TABLENAMES:
    DATA_LEEDS[dataset] = pd.read_csv(PATH['data_processed'] + FILENAME[dataset])

In [8]:
DATA_LEEDS['vehicles'].shape

(2688, 23)

## Filtering for Bike Accidents
---

In [30]:
BIKE_ACCIDENTS = {}
BIKE_ACCIDENTS['vehicles'] = DATA_LEEDS['vehicles'][DATA_LEEDS['vehicles']['Vehicle_Type'] == 1]

In [31]:
bike_accidents_indexes = set(BIKE_ACCIDENTS['vehicles']['Accident_Index'])
len(bike_accidents_indexes)

238

In [32]:
for dataset in TABLENAMES:
    if dataset != 'vehicles':
        BIKE_ACCIDENTS[dataset] = DATA_LEEDS[dataset][DATA_LEEDS[dataset]['Accident_Index'].isin(bike_accidents_indexes)]

In [33]:
BIKE_ACCIDENTS['accidents']

Unnamed: 0,Accident_Index,Location_Easting_OSGR,Location_Northing_OSGR,Longitude,Latitude,Police_Force,Accident_Severity,Number_of_Vehicles,Number_of_Casualties,Date,...,Pedestrian_Crossing-Human_Control,Pedestrian_Crossing-Physical_Facilities,Light_Conditions,Weather_Conditions,Road_Surface_Conditions,Special_Conditions_at_Site,Carriageway_Hazards,Urban_or_Rural_Area,Did_Police_Officer_Attend_Scene_of_Accident,LSOA_of_Accident_Location
2,2019136111190,435904.0,425850.0,-1.457300,53.727837,13,3,2,1,1,...,0,0,1,1,1,0,0,2,1,E01011636
4,2019136111836,429149.0,431736.0,-1.559127,53.781158,13,2,2,1,1,...,0,0,4,1,1,0,0,1,1,E01011366
5,2019136120357,428760.0,432723.0,-1.564938,53.790050,13,2,2,1,1,...,0,0,1,1,1,0,0,1,1,E01033013
12,2019136170679,432347.0,430836.0,-1.510690,53.772881,13,3,2,1,1,...,0,0,6,1,1,0,0,1,2,E01011470
17,2019136180248,429558.0,431566.0,-1.552937,53.779607,13,3,2,1,1,...,0,1,1,1,1,0,0,1,1,E01011364
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1404,2019136CF0784,425916.0,435496.0,-1.607873,53.815122,13,2,2,1,12,...,0,0,1,1,2,0,0,1,1,E01011281
1416,2019136CI0859,439303.0,432396.0,-1.404956,53.786425,13,3,2,1,12,...,0,0,1,1,2,0,0,2,2,E01011402
1417,2019136CI1657,431062.0,430364.0,-1.530234,53.768716,13,3,2,1,12,...,0,0,4,2,2,0,0,1,1,E01011472
1425,2019136CK2067,430765.0,433846.0,-1.534394,53.800029,13,3,2,1,12,...,2,4,4,1,1,0,0,1,1,E01033031


In [25]:
accidents_with_bikes = DATA_LEEDS['vehicles'][DATA_LEEDS['vehicles']['Accident_Index'].isin(bike_accidents_indexes)]
other_vehicles = accidents_with_bikes[accidents_with_bikes['Vehicle_Type'] != 1]

In [26]:
other_vehicles

Unnamed: 0,Accident_Index,Vehicle_Reference,Vehicle_Type,Towing_and_Articulation,Vehicle_Manoeuvre,Vehicle_Location-Restricted_Lane,Junction_Location,Skidding_and_Overturning,Hit_Object_in_Carriageway,Vehicle_Leaving_Carriageway,...,Journey_Purpose_of_Driver,Sex_of_Driver,Age_of_Driver,Age_Band_of_Driver,Engine_Capacity_(CC),Propulsion_Code,Age_of_Vehicle,Driver_IMD_Decile,Driver_Home_Area_Type,Vehicle_IMD_Decile
6,2019136111190,1,9,0,18,0,8,0,0,0,...,6,1,48,8,1686,2,15,7,1,7
9,2019136111836,1,9,0,9,0,1,0,0,0,...,1,1,51,8,1598,2,7,-1,-1,-1
11,2019136120357,1,19,0,5,0,4,0,0,0,...,1,1,33,6,-1,3,3,1,1,1
25,2019136170679,1,9,0,4,0,1,0,0,0,...,1,1,52,8,1998,2,11,9,1,9
32,2019136180248,1,9,0,18,0,8,0,0,0,...,2,1,22,5,1995,2,14,1,1,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2601,2019136CF0784,1,9,0,9,0,8,0,0,0,...,6,1,68,10,1461,2,7,2,1,2
2625,2019136CI0859,1,9,0,18,0,1,0,0,0,...,6,3,-1,-1,-1,-1,-1,-1,-1,-1
2627,2019136CI1657,1,9,0,5,0,6,0,0,0,...,6,1,21,5,998,1,1,6,1,6
2641,2019136CK2067,1,9,0,17,0,0,0,0,0,...,6,1,56,9,1339,1,15,1,1,1


In [None]:
# we want to use the exact same functions as we used for exploring the whole leeds dataset