# Maryland Crash Data Exploration
Data cleaning done to update bike stress data for the City of College Park.

In [118]:
# Import modules
import pandas as pd
import numpy as np
import geopandas as gpd
import matplotlib.pyplot as plt

## Loading raw data

In [119]:
non_motorists_raw = pd.read_csv('data/Crash_NonMotorists_data_raw.csv') #Crash data for non-motorists
general_crash_data = pd.read_csv('data/Crash_Reports_data_raw.csv') #Crash data for general cases
non_motorists_raw.head(5)

Unnamed: 0,Crash Severity Description,Reportnumber,Type NM Description,PersonID NM,ActionPriorToCrash NM,ActionPriorToCrash NM Description,Age NM,City NM,ContactPoint NM,ContactPoint NM Description,...,Location NM,Location NM Description,OriginDestination NM,OriginDestination NM Description,ReportNumber NM,State NM,StruckByVehID NM,Type NM,Unknown NM,ZipCode NM
0,Fatal Crashes,MSP5121000P,Pedestrian,02180e97-d5ec-42e4-ba4e-a6c7800dc4e2,0,,33,RIVERDALE,3,Right,...,6,Shoulder/Roadside,97,Not Applicable,MSP5121000P,MD,bbdd958f-b482-420a-a0fa-577fc684ea97,6,N,20737
1,Fatal Crashes,DA36550097,Pedestrian,af7f440b-b3ad-4a72-afe0-ee8c8abb8890,100,Cross/Enter at Intersection,74,BLADENSBURG,9,Left,...,1,Intersection – Marked Crosswalk,97,Not Applicable,DA36550097,MD,79108605-d5b5-4094-8da0-b0b39620a0d9,6,N,20710
2,Fatal Crashes,MSP556300B5,Pedestrian,b70c203f-7e57-42d8-9fb7-5c34666a8883,101,Cross/Enter Not at Intersection,23,WASHINGTON DC,99,Unknown,...,7,Travel Lane – Other Location,97,Not Applicable,MSP556300B5,DC,dae1b869-ac45-4a9f-a36e-54c59b1f8afd,6,N,20002
3,Injury Crashes,DA4401000X,Scooter (electric),2c9274ab-5c85-49b1-a34d-f942e33eca52,100,Cross/Enter at Intersection,47,SUITLAND,3,Right,...,2,Intersection – Unmarked Crosswalk,97,Not Applicable,DA4401000X,MD,19f78585-43a9-4e22-8f37-c21e7a740e6b,102,N,20746
4,Injury Crashes,MSP718600HV,Occupant of Motor Vehicle Not in Transport,36bac7ee-97ab-404f-8d44-a40dc64e321e,98,Other,36,DISTRICT HEIGHTS,3,Right,...,98,Other,97,Not Applicable,MSP718600HV,MD,bac96c16-5899-41a4-acf4-6c41b70b4acb,103,N,20747


In [120]:
general_crash_data.head(5) #Will get the lat/lon data from this dataset

Unnamed: 0,Crash Severity Description,Reportnumber,Agencyname,CollisionImpact Description,Crash County Description,FunctionalClass Description,ImpairedCrash,Large Vehicle Involved,Motorcycle Crash,Non-Motorist Crash,...,Surface,Surface Description,TrafficControl,Trafficcontrol Description,WZLawEnforcementPresent,WZLocation,WZLocation Description,WZType,WZType Description,WZWorkersPresent
0,Property Damage Crashes,DA37150090,Prince George's County Police,Other,Prince George's,,No,No,No,No,...,1.0,Dry,,,,,,,,
1,Injury Crashes,MSP757700KS,Maryland State Police,"Sideswipe, Same Direction",Prince George's,Interstate,No,No,No,No,...,1.0,Dry,,,,,,,,
2,Property Damage Crashes,DA4354003S,Prince George's County Police,Angle,Prince George's,Local,No,No,No,No,...,1.0,Dry,,,,,,,,
3,Property Damage Crashes,CBPD00920043,Bowie Police Department,Front to Rear,Prince George's,Minor Collector,No,No,No,No,...,1.0,Dry,,,,,,,,
4,Property Damage Crashes,DA4370001R,Prince George's County Police,Single Vehicle,Prince George's,,No,No,No,No,...,1.0,Dry,,,,,,,,


In [121]:
general_crash_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 7485 entries, 0 to 7484
Data columns (total 72 columns):
 #   Column                          Non-Null Count  Dtype  
---  ------                          --------------  -----  
 0   Crash Severity Description      7485 non-null   object 
 1   Reportnumber                    7485 non-null   object 
 2   Agencyname                      7485 non-null   object 
 3   CollisionImpact Description     7485 non-null   object 
 4   Crash County Description        7485 non-null   object 
 5   FunctionalClass Description     6264 non-null   object 
 6   ImpairedCrash                   7485 non-null   object 
 7   Large Vehicle Involved          7485 non-null   object 
 8   Motorcycle Crash                7485 non-null   object 
 9   Non-Motorist Crash              7485 non-null   object 
 10  OffRoad                         7485 non-null   object 
 11  RoadName                        6247 non-null   object 
 12  Route Number                    68

## Cleaning data

In [122]:
non_motorist_tidy = non_motorists_raw.merge(general_crash_data, on='Reportnumber', how='left')
non_motorist_tidy.head(5)

Unnamed: 0,Crash Severity Description_x,Reportnumber,Type NM Description,PersonID NM,ActionPriorToCrash NM,ActionPriorToCrash NM Description,Age NM,City NM,ContactPoint NM,ContactPoint NM Description,...,Surface,Surface Description,TrafficControl,Trafficcontrol Description,WZLawEnforcementPresent,WZLocation,WZLocation Description,WZType,WZType Description,WZWorkersPresent
0,Fatal Crashes,MSP5121000P,Pedestrian,02180e97-d5ec-42e4-ba4e-a6c7800dc4e2,0,,33,RIVERDALE,3,Right,...,1.0,Dry,,,,,,,,
1,Fatal Crashes,DA36550097,Pedestrian,af7f440b-b3ad-4a72-afe0-ee8c8abb8890,100,Cross/Enter at Intersection,74,BLADENSBURG,9,Left,...,1.0,Dry,,,,,,,,
2,Fatal Crashes,MSP556300B5,Pedestrian,b70c203f-7e57-42d8-9fb7-5c34666a8883,101,Cross/Enter Not at Intersection,23,WASHINGTON DC,99,Unknown,...,1.0,Dry,,,,,,,,
3,Injury Crashes,DA4401000X,Scooter (electric),2c9274ab-5c85-49b1-a34d-f942e33eca52,100,Cross/Enter at Intersection,47,SUITLAND,3,Right,...,1.0,Dry,,,,,,,,
4,Injury Crashes,MSP718600HV,Occupant of Motor Vehicle Not in Transport,36bac7ee-97ab-404f-8d44-a40dc64e321e,98,Other,36,DISTRICT HEIGHTS,3,Right,...,1.0,Dry,,,,,,,,


In [123]:
non_motorist_tidy.columns =non_motorist_tidy.columns.str.strip()
non_motorist_tidy.columns =non_motorist_tidy.columns.str.lower().str.replace(" ", "_")

In [124]:
non_motorist_tidy.info(verbose = True)

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 205 entries, 0 to 204
Data columns (total 100 columns):
 #   Column                             Non-Null Count  Dtype  
---  ------                             --------------  -----  
 0   crash_severity_description_x       205 non-null    object 
 1   reportnumber                       205 non-null    object 
 2   type_nm_description                205 non-null    object 
 3   personid_nm                        205 non-null    object 
 4   actionpriortocrash_nm              205 non-null    int64  
 5   actionpriortocrash_nm_description  193 non-null    object 
 6   age_nm                             205 non-null    int64  
 7   city_nm                            205 non-null    object 
 8   contactpoint_nm                    205 non-null    int64  
 9   contactpoint_nm_description        205 non-null    object 
 10  distractedbyaction_nm              205 non-null    int64  
 11  distractedbyaction_nm_description  205 non-null    object

### Classes of non-motorists

In [125]:
for i in non_motorist_tidy["type_nm_description"].unique():
    print('*' + i + '\n')

*Pedestrian

*Scooter (electric)

*Occupant of Motor Vehicle Not in Transport

*Unknown Type Of Non-Motorist

*Cyclist (Electric)

*Cyclist (non-electric)

*Occupant Of a Non-Motor Vehicle Transportation Device

*Unknown

*Scooter (non-Electric)

*Other Pedestrian (person in a building, skater, personal conveyance, etc.)



We will use:
* Scooter (electric)
* Cyclist (Electric)
* Cyclist (non-electric)
* Scooter (non-Electric)

In [126]:
# Create list of desired classes
non_motorists_classes = ['Unknown Type Of Non-Motorist','Unknown'] #Values to exclude
#Use list to filter dataframe
non_motorist_tidy = non_motorist_tidy[~non_motorist_tidy['type_nm_description'].isin(non_motorists_classes)] #Filter by list values

In [127]:
non_motorist_tidy = non_motorist_tidy[non_motorist_tidy["city_nm"] == 'COLLEGE PARK']

## Finalize tidy data
Redefine column names and plot geographic data.

In [128]:
non_motorist_tidy.to_csv('data/Crash_non_motorists_data_tidy.csv')

In [129]:
non_motorist_tidy.info()

<class 'pandas.core.frame.DataFrame'>
Index: 7 entries, 55 to 191
Data columns (total 100 columns):
 #   Column                             Non-Null Count  Dtype  
---  ------                             --------------  -----  
 0   crash_severity_description_x       7 non-null      object 
 1   reportnumber                       7 non-null      object 
 2   type_nm_description                7 non-null      object 
 3   personid_nm                        7 non-null      object 
 4   actionpriortocrash_nm              7 non-null      int64  
 5   actionpriortocrash_nm_description  7 non-null      object 
 6   age_nm                             7 non-null      int64  
 7   city_nm                            7 non-null      object 
 8   contactpoint_nm                    7 non-null      int64  
 9   contactpoint_nm_description        7 non-null      object 
 10  distractedbyaction_nm              7 non-null      int64  
 11  distractedbyaction_nm_description  7 non-null      object 
 12 