In [4]:
import matplotlib.pyplot as plt
import pandas as pd
import scipy.stats as st
import numpy as np
import geoviews as gv
import requests
import hvplot.pandas
from geoviews import opts

## Data Cleaning

In [5]:
raw_data_path = ("../Resources/Crash_Reporting_-_Drivers_Data.csv")
raw_data_df = pd.read_csv(raw_data_path, dtype=str, header=0, low_memory=False, delimiter=',')

# Splitting Date/Time into two separate columns
split_data = raw_data_df['Crash Date/Time'].str.split(' ', n=1, expand=True)
raw_data_df['Date'] = split_data[0]
raw_data_df['Time'] = split_data[1]

# Converting Injury Severity to number values
severity_scale = {
    'NO APPARENT INJURY': 0,
    'POSSIBLE INJURY': 1,
    'SUSPECTED MINOR INJURY': 2,
    'SUSPECTED SERIOUS INJURY': 3,
    'FATAL INJURY': 4
}
raw_data_df['Injury Severity Scale'] = raw_data_df['Injury Severity'].map(severity_scale)

# Ensuring specific columns are numerical data types
raw_data_df['Injury Severity Scale'] = pd.to_numeric(raw_data_df['Injury Severity Scale'], errors='coerce').fillna(0).astype(int)
raw_data_df['Speed Limit'] = pd.to_numeric(raw_data_df['Speed Limit'], errors='coerce').fillna(0).astype(int)
raw_data_df['Latitude'] = pd.to_numeric(raw_data_df['Latitude'], errors='coerce').fillna(0).astype(float)
raw_data_df['Longitude'] = pd.to_numeric(raw_data_df['Longitude'], errors='coerce').fillna(0).astype(float)




# Display raw data
raw_data_df.head()

Unnamed: 0,Report Number,Local Case Number,Agency Name,ACRS Report Type,Crash Date/Time,Route Type,Road Name,Cross-Street Type,Cross-Street Name,Off-Road Description,...,Vehicle Year,Vehicle Make,Vehicle Model,Equipment Problems,Latitude,Longitude,Location,Date,Time,Injury Severity Scale
0,MCP3170003V,240000438,Montgomery County Police,Property Damage Crash,01/03/2024 02:55:00 PM,,,,,IN FRONT OF 18900 BIRDSEYE DR,...,2017,LEXUS,SUV,NO MISUSE,39.165005,-77.24931,"(39.16500483, -77.24931)",01/03/2024,02:55:00 PM,0
1,MCP3254003K,230072050,Montgomery County Police,Injury Crash,12/16/2023 12:36:00 PM,Maryland (State),GERMANTOWN RD,County,MIDDLEBROOK RD,,...,2010,TOYT,PRIUS,NO MISUSE,39.178776,-77.26719,"(39.17877577, -77.26718974)",12/16/2023,12:36:00 PM,0
2,EJ7887003Q,230074270,Gaithersburg Police Depar,Injury Crash,12/29/2023 12:00:00 PM,Maryland (State),GREAT SENECA HWY,Municipality,KENTLANDS BLVD,,...,2021,SUBARU,FORRESTER,NO MISUSE,39.123574,-77.231769,"(39.12357374, -77.231769)",12/29/2023,12:00:00 PM,2
3,MCP2674004J,230064598,Montgomery County Police,Property Damage Crash,11/05/2023 09:07:00 PM,Maryland (State),WOODFIELD RD,County,GLENDALOUGH RD,,...,2019,DODGE,CHARGER,NO MISUSE,39.211742,-77.171461,"(39.21174219, -77.17146065)",11/05/2023,09:07:00 PM,0
4,MCP25280008,230067019,Montgomery County Police,Property Damage Crash,11/18/2023 12:40:00 AM,Maryland (State),CLARKSBURG RD,Maryland (State),CLARKSBURG RD,,...,2014,NISSAN,ROGUE,,39.228915,-77.289091,"(39.22891483, -77.28909117)",11/18/2023,12:40:00 AM,0


In [6]:
raw_data_df.dtypes['Latitude']

dtype('float64')

In [7]:
environment_data_df = raw_data_df[['Injury Severity','Injury Severity Scale','Date', 'Time', 'Speed Limit', 'Light', 'Traffic Control', 'Driver At Fault' ,'Latitude', 'Longitude' ]]

environment_data_df.head()

Unnamed: 0,Injury Severity,Injury Severity Scale,Date,Time,Speed Limit,Light,Traffic Control,Driver At Fault,Latitude,Longitude
0,NO APPARENT INJURY,0,01/03/2024,02:55:00 PM,0,DAYLIGHT,NO CONTROLS,Yes,39.165005,-77.24931
1,NO APPARENT INJURY,0,12/16/2023,12:36:00 PM,35,DAYLIGHT,TRAFFIC SIGNAL,No,39.178776,-77.26719
2,SUSPECTED MINOR INJURY,2,12/29/2023,12:00:00 PM,35,DAYLIGHT,TRAFFIC SIGNAL,No,39.123574,-77.231769
3,NO APPARENT INJURY,0,11/05/2023,09:07:00 PM,40,DARK LIGHTS ON,NO CONTROLS,No,39.211742,-77.171461
4,NO APPARENT INJURY,0,11/18/2023,12:40:00 AM,20,DARK LIGHTS ON,YIELD SIGN,Yes,39.228915,-77.289091


In [8]:
# Exporting environment_data_df to new CSV

environment_data_df.to_csv('../Resources/environment_data.csv', index=False)

## Injury Severity Exploration

### Fatal Injuries

In [14]:
# Isolating Fatal Injuries

fatal_injuries_df = environment_data_df[environment_data_df['Injury Severity Scale'] == 4]

fatal_injuries_df.head()

Unnamed: 0,Injury Severity,Injury Severity Scale,Date,Time,Speed Limit,Light,Traffic Control,Driver At Fault,Latitude,Longitude
20,FATAL INJURY,4,10/28/2023,12:15:00 PM,30,DAYLIGHT,YIELD SIGN,Yes,39.122468,-76.926338
86,FATAL INJURY,4,05/21/2023,03:04:00 PM,50,DAYLIGHT,,Yes,39.15305,-77.141618
2540,FATAL INJURY,4,07/20/2023,09:32:00 PM,40,DARK -- UNKNOWN LIGHTING,NO CONTROLS,Yes,39.161663,-77.419153
4628,FATAL INJURY,4,07/29/2023,05:49:00 AM,45,DAWN,,Yes,39.071903,-77.109592
4793,FATAL INJURY,4,09/01/2023,01:09:00 AM,35,DARK LIGHTS ON,NO CONTROLS,Yes,38.994862,-77.043648
