# Name: Margaret Nguyen

# Data Aggregation: Massachusetts Person Level Crash Data and ACS Data

**Assignment: Retrieve person level crash data from 2017 to 2021 for Massachusetts. Try to find a way to organize the data by municipality so that we can later merge it with county subdivision data from the ACS 5-year estimates. You can access the Massachusetts crash data [here](https://apps.impact.dot.state.ma.us/cdp/home). Click on "Data Extraction" and search for crashes; they should be available by year.**

**Here is the link to the Massachusetts Law Enforcement Crash Report Data Dictionary: [link](https://www.umasstransportationcenter.org/images/umtc/UMassSafe/Massachusetts%20Crash%20Report%20Data%20Dictionary.pdf).**

## Credit:

The following code is based on the work of my supervisor, Mitch Shiles. The original code can be found at this link: [Mitch Shiles' GitHub](https://github.com/rmshiles/Carlisle-Local-Crash-Analysis/blob/main/1.%20Municupal%20Crash%20Data%20Aggregation%20.ipynb).

In [6]:
# Import necessary libraries 
import requests
import json, csv
import timeit
from pathlib import Path  

# Import data handling libraries 
import numpy as np
import pandas as pd
import requests
import json

# Import graphing libraries 
import matplotlib.pyplot as plt
import matplotlib.colors as mcolors

from pathlib import Path  

pd.set_option('display.max_rows', 1000)
pd.set_option('display.max_columns', 500)
pd.set_option('display.width', 1000)
pd.options.display.float_format = '{:,.6f}'.format

## Set Global Variables

In [7]:
# Set Global Variables

# Set the time period in which to look at 
START_YEAR = 2017
END_YEAR = 2021

## Query ACS Data for Municipalities 

In [8]:
# Query ACS data for Municipalities 

# Census API
HOST = 'https://api.census.gov/data'

# Year to get data for 
YEAR = '{}'.format(END_YEAR)# 2005,2010,2015,2020  

# Survey to Access data from (ACS 5 year estimates)
DATA_SET = 'acs/acs5'
BASE_URL = '/'.join([HOST, YEAR, DATA_SET])

# Create an empty dictionary for predicates 
predicates = {}

# VARIABLES
# Population Estimate:  B01003_001E
# Population Margin of error: B01003_001M
# Bike to work Estimate: B08006_014E
# Bike to work Margin of error: B08006_014M
# Walk to work Estimate:B08006_015E
# Walk to work Margin of error:B08006_015M
# Drive to work alone Estimate: B08006_003E
# Drive to work alone Margin of error: B08006_003M
# Carpool to work Estimate:B08006_004E
# Carpool to work Margin: 
# Public transit to work Estimate: B08006_008E
# Public transit to work Margine of Error: B08006_008E
# Other to Work 
# Poverty Estimate:
# Poverty Margin of error:

get_vars = ['NAME',
            'B01001_001E',
            'B08006_014E',
            'B08006_014M',
            'B08006_015E',
            'B08006_015M',
            'B08006_003E',
            'B08006_003M',
            'B08006_004E',
            'B08006_004M',
            'B08006_008E',
            'B08006_008M']

predicates['get']=','.join(get_vars)

# Set sub geographies to get data for ('county','Place','county subdivision') * means get all 
predicates['for']='county subdivision:*'

# Set geography that contains sub geographies  (25 = Massachusetts)
predicates['in']='state:25'

# Assemble the API query 
r = requests.get(BASE_URL, params = predicates)

# Print the query URL 
print(BASE_URL,predicates)

https://api.census.gov/data/2021/acs/acs5 {'get': 'NAME,B01001_001E,B08006_014E,B08006_014M,B08006_015E,B08006_015M,B08006_003E,B08006_003M,B08006_004E,B08006_004M,B08006_008E,B08006_008M', 'for': 'county subdivision:*', 'in': 'state:25'}


In [9]:
# Print the length and first thousand characters to see what you got 
print(len(r.text))
print(r.text[0:1000])

47736
[["NAME","B01001_001E","B08006_014E","B08006_014M","B08006_015E","B08006_015M","B08006_003E","B08006_003M","B08006_004E","B08006_004M","B08006_008E","B08006_008M","state","county","county subdivision"],
["County subdivisions not defined, Barnstable County, Massachusetts","0","0","13","0","13","0","13","0","13","0","13","25","001","00000"],
["Barnstable Town city, Barnstable County, Massachusetts","48556","35","52","843","321","18901","926","2649","482","303","136","25","001","03690"],
["Bourne town, Barnstable County, Massachusetts","20364","0","25","182","64","8471","673","722","288","84","55","25","001","07175"],
["Brewster town, Barnstable County, Massachusetts","10282","66","70","1","3","3733","445","153","88","24","29","25","001","07980"],
["Chatham town, Barnstable County, Massachusetts","6554","0","19","124","74","1666","252","196","96","93","102","25","001","12995"],
["Dennis town, Barnstable County, Massachusetts","14664","8","16","147","108","4930","515","658","173","37

In [10]:
# Place the Queried ACS data into a data frame 

# Set the column names to the first row of data from the query 
column_names=r.json()[0:1][0]

# Set the data to everything after the first row and convert to an array to flatten it
ACS_DATA= r.json()[1:]
ACS_data = np.array(ACS_DATA)

# Create the pandas data frame 
ACS_MUNI_DF = pd.DataFrame(columns=column_names , data = ACS_data)

# Reset the index of the data frame
ACS_MUNI_DF.reset_index()

# Rename the columns 
ACS_MUNI_DF.rename(columns ={"B01001_001E":"POPULATION",
                    "B08006_014E":'BIKE_TO_WORK_EST',
                    "B08006_014M":"BIKE_TO_WORK_MARG",
                    "B08006_015E":"WALK_TO_WORK_EST",
                    "B08006_015M":"WALK_TO_WORK_MARG",
                    'B08006_003E':"DRIVE_SOLO_TO_WORK_EST",
                    'B08006_003M':"DRIVE_SOLO_TO_WORK_MARG",
                    'B08006_004E':"CARPOOL_TO_WORK_EST",
                    'B08006_004M':"CARPOOL_TO_WORK_MARG",
                    'B08006_008E':"PUBTRANS_TO_WORK_EST",
                    'B08006_008M':"PUBTRANS_TO_WORK_MARG",
                    "county subdivision":"county_subdivision"}, inplace=True)

# Convert the NAME column to strings 
ACS_MUNI_DF['NAME'] = ACS_MUNI_DF["NAME"].astype(str)

# Remove Massachusetts from NAME This is over redundant since all data will be from Massachusetts
ACS_MUNI_DF['NAME'] = ACS_MUNI_DF.NAME.replace({', Massachusetts':''}, regex=True)

# Create separate Name variables for county and municipality 
ACS_MUNI_DF[['MUNI_NAME','COUNTY_NAME']]=ACS_MUNI_DF['NAME'].str.split(',',expand=True)

# Convert the variables to integers
ACS_MUNI_DF[["POPULATION",
                "BIKE_TO_WORK_EST",
                "BIKE_TO_WORK_MARG",
                "WALK_TO_WORK_EST",
                "WALK_TO_WORK_MARG",
                "DRIVE_SOLO_TO_WORK_EST",
                "DRIVE_SOLO_TO_WORK_MARG",
                "CARPOOL_TO_WORK_EST",
                "CARPOOL_TO_WORK_MARG",
                "PUBTRANS_TO_WORK_EST",
                "PUBTRANS_TO_WORK_MARG",
                "state",                 
                "county",                
                "county_subdivision"]] = ACS_MUNI_DF[["POPULATION",
                                            "BIKE_TO_WORK_EST",
                                            "BIKE_TO_WORK_MARG",
                                            "WALK_TO_WORK_EST",
                                            "WALK_TO_WORK_MARG",
                                            "DRIVE_SOLO_TO_WORK_EST",
                                            "DRIVE_SOLO_TO_WORK_MARG",
                                            "CARPOOL_TO_WORK_EST",
                                            "CARPOOL_TO_WORK_MARG",
                                            "PUBTRANS_TO_WORK_EST",
                                            "PUBTRANS_TO_WORK_MARG",
                                            "state",                 
                                            "county",                
                                            "county_subdivision"]].astype(int)

# Show the length of an object referred to the dataframe
print(len(ACS_MUNI_DF))

# Show the dataframe
ACS_MUNI_DF.head()

357


Unnamed: 0,NAME,POPULATION,BIKE_TO_WORK_EST,BIKE_TO_WORK_MARG,WALK_TO_WORK_EST,WALK_TO_WORK_MARG,DRIVE_SOLO_TO_WORK_EST,DRIVE_SOLO_TO_WORK_MARG,CARPOOL_TO_WORK_EST,CARPOOL_TO_WORK_MARG,PUBTRANS_TO_WORK_EST,PUBTRANS_TO_WORK_MARG,state,county,county_subdivision,MUNI_NAME,COUNTY_NAME
0,"County subdivisions not defined, Barnstable Co...",0,0,13,0,13,0,13,0,13,0,13,25,1,0,County subdivisions not defined,Barnstable County
1,"Barnstable Town city, Barnstable County",48556,35,52,843,321,18901,926,2649,482,303,136,25,1,3690,Barnstable Town city,Barnstable County
2,"Bourne town, Barnstable County",20364,0,25,182,64,8471,673,722,288,84,55,25,1,7175,Bourne town,Barnstable County
3,"Brewster town, Barnstable County",10282,66,70,1,3,3733,445,153,88,24,29,25,1,7980,Brewster town,Barnstable County
4,"Chatham town, Barnstable County",6554,0,19,124,74,1666,252,196,96,93,102,25,1,12995,Chatham town,Barnstable County


## Importing Massachusetts Person Level Crash Data from 2017 to 2021

**The following is my code:**

In [6]:
# Import datasets for the years 2017 to 2021
for num in range(2017, 2022):
    if num == 2018 or num == 2019:
        filename = f'./data/{num}_Person_Level_Details.csv'
    else:
        filename = f'./data/{num}_Person_Level_Crash_Details.csv'
    
    # Read the CSV file into a DataFrame and assign it to a dynamically named variable
    var_name = f'crash_{num}'  # Create a variable name
    
    # globals() provides access to variables defined at the top level of your script or module
    globals()[var_name] = pd.read_csv(filename)
    
    # Can also use locals for this cell because locals() provides access to variables defined within the current function or scope
    # locals()[var_name] = pd.read_csv(filename)

  globals()[var_name] = pd.read_csv(filename)
  globals()[var_name] = pd.read_csv(filename)
  globals()[var_name] = pd.read_csv(filename)
  globals()[var_name] = pd.read_csv(filename)
  globals()[var_name] = pd.read_csv(filename)


## Data Cleaning

In [7]:
# Show the difference of variables between datasets
# Create a list of dataset names for the years 2018 to 2021
dataset_years = [2018, 2019, 2020, 2021]

# Loop through the list of dataset years
for year in dataset_years:
    # Generate the dataset variable names based on the year
    current_dataset_name = f'crash_{year}'
    
    # Find the columns that are unique to the current year's dataset compared to 2017
    columns_only_in_current = set(locals()[current_dataset_name].columns) - set(crash_2017.columns)
    
    if columns_only_in_current:
        print(f"\nColumns only in {current_dataset_name}:")
        for column in columns_only_in_current:
            print(column)


Columns only in crash_2018:
CRASH_TIME_2
CRASH_DATE
T_EXC_TIME
T_EXC_TYPE

Columns only in crash_2019:
CRASH_TIME_2
CRASH_DATE
T_EXC_TIME
T_EXC_TYPE

Columns only in crash_2020:
CRASH_TIME_2
CRASH_DATE
T_EXC_TIME
T_EXC_TYPE

Columns only in crash_2021:
CRASH_TIME_2
TRVL_DIRC_DESCR
T_EXC_TIME
SHAPE
T_EXC_TYPE


In [8]:
# Rename the columns
# Keep 'CRASH_DATE' and 'CRASH_TIME_2' since they contain the same type of information as 'CRASH_DATETIME' and 'CRASH_TIME' in 'crash_2017'
crash_2018.rename(columns={'CRASH_TIME_2': 'CRASH_TIME', 'CRASH_DATE': 'CRASH_DATETIME'}, inplace=True)
crash_2019.rename(columns={'CRASH_TIME_2': 'CRASH_TIME', 'CRASH_DATE': 'CRASH_DATETIME'}, inplace=True)
crash_2020.rename(columns={'CRASH_TIME_2': 'CRASH_TIME', 'CRASH_DATE': 'CRASH_DATETIME'}, inplace=True)
crash_2021.rename(columns={'CRASH_TIME_2': 'CRASH_TIME'}, inplace=True)

# Drop columns which aren't in crash_2017
crash_2018 = crash_2018.drop(columns = ['T_EXC_TYPE', 'T_EXC_TIME'])
crash_2019 = crash_2019.drop(columns = ['T_EXC_TYPE', 'T_EXC_TIME'])
crash_2020 = crash_2020.drop(columns = ['T_EXC_TYPE', 'T_EXC_TIME'])
crash_2021 = crash_2021.drop(columns = ['SHAPE', 'TRVL_DIRC_DESCR', 'T_EXC_TYPE', 'T_EXC_TIME'])

# List of dataset names for the years 2018 to 2021
dataset_years = [2018, 2019, 2020, 2021]

# Loop through the list of dataset years
for year in dataset_years:
    # Generate the dataset variable name based on the year
    current_dataset_name = f'crash_{year}'
    
    # Reorder the columns of the current dataset to match crash_2017
    locals()[current_dataset_name] = locals()[current_dataset_name][crash_2017.columns]

In [9]:
# Check the shape and columns (variables) of the datasets
# List of dataset names for the years 2017 to 2021
years = [2017, 2018, 2019, 2020, 2021]

# Loop through the list of dataset years
for year in years:
    # Generate the dataset variable name based on the year
    current_dataset_name = f'crash_{year}'
    
    # Check the shape of the current dataset
    dataset_shape = locals()[current_dataset_name].shape # locals() provides access to variables defined within the current function or scope
    
    # Get the columns of the current dataset
    dataset_columns = locals()[current_dataset_name].columns
    
    # Print the results
    print(f"Year {year}:")
    print(f"Shape: {dataset_shape}")
    print(f"Columns: {dataset_columns}")
    print("\n")

Year 2017:
Shape: (337569, 154)
Columns: Index(['OBJECTID', 'CRASH_NUMB', 'CITY_TOWN_NAME', 'CRASH_DATE_TEXT', 'CRASH_TIME', 'CRASH_DATETIME', 'CRASH_HOUR', 'CRASH_STATUS', 'CRASH_SEVERITY_DESCR', 'MAX_INJR_SVRTY_CL',
       ...
       'NON_MTRST_LOC_DESCR', 'NON_MTRST_TYPE_DESCR', 'PERS_ADDR_CITY', 'STATE_PRVN_CODE', 'PERS_TYPE', 'PRTC_SYS_USE_DESCR', 'SFTY_EQUP_DESC_1', 'SFTY_EQUP_DESC_2', 'SEX_DESCR', 'TRNSD_BY_DESCR'], dtype='object', length=154)


Year 2018:
Shape: (330805, 154)
Columns: Index(['OBJECTID', 'CRASH_NUMB', 'CITY_TOWN_NAME', 'CRASH_DATE_TEXT', 'CRASH_TIME', 'CRASH_DATETIME', 'CRASH_HOUR', 'CRASH_STATUS', 'CRASH_SEVERITY_DESCR', 'MAX_INJR_SVRTY_CL',
       ...
       'NON_MTRST_LOC_DESCR', 'NON_MTRST_TYPE_DESCR', 'PERS_ADDR_CITY', 'STATE_PRVN_CODE', 'PERS_TYPE', 'PRTC_SYS_USE_DESCR', 'SFTY_EQUP_DESC_1', 'SFTY_EQUP_DESC_2', 'SEX_DESCR', 'TRNSD_BY_DESCR'], dtype='object', length=154)


Year 2019:
Shape: (325406, 154)
Columns: Index(['OBJECTID', 'CRASH_NUMB', 'CITY_TOWN_N

In [10]:
# Define an empty dataframe that will be populated
mass_crash = pd.DataFrame()

# Create a list of these DataFrames
dataframes = [crash_2017, crash_2018, crash_2019, crash_2020, crash_2021]

# Concatenate them into a single DataFrame
mass_crash = pd.concat(dataframes, ignore_index=True)

# Reset the index
mass_crash = mass_crash.reset_index(drop=True)

# Print the length of mass_crash
print(len(mass_crash))

# Show mass_crash
mass_crash.head()

1498557


Unnamed: 0,OBJECTID,CRASH_NUMB,CITY_TOWN_NAME,CRASH_DATE_TEXT,CRASH_TIME,CRASH_DATETIME,CRASH_HOUR,CRASH_STATUS,CRASH_SEVERITY_DESCR,MAX_INJR_SVRTY_CL,NUMB_VEHC,NUMB_NONFATAL_INJR,NUMB_FATAL_INJR,POLC_AGNCY_TYPE_DESCR,MANR_COLL_DESCR,VEHC_MNVR_ACTN_CL,VEHC_TRVL_DIRC_CL,VEHC_SEQ_EVENTS_CL,AMBNT_LIGHT_DESCR,WEATH_COND_DESCR,ROAD_SURF_COND_DESCR,FIRST_HRMF_EVENT_DESCR,MOST_HRMFL_EVT_CL,DRVR_CNTRB_CIRC_CL,VEHC_CONFIG_CL,STREET_NUMB,RDWY,DIST_DIRC_FROM_INT,NEAR_INT_RDWY,MM_RTE,DIST_DIRC_MILEMARKER,MILEMARKER,EXIT_RTE,DIST_DIRC_EXIT,EXIT_NUMB,DIST_DIRC_LANDMARK,LANDMARK,RDWY_JNCT_TYPE_DESCR,TRAF_CNTRL_DEVC_TYPE_DESCR,TRAFY_DESCR_DESCR,JURISDICTN,FIRST_HRMF_EVENT_LOC_DESCR,NON_MTRST_TYPE_CL,NON_MTRST_ACTN_CL,NON_MTRST_LOC_CL,IS_GEOCODED,GEOCODING_METHOD_NAME,X,Y,LAT,LON,RMV_DOC_IDS,CRASH_RPT_IDS,YEAR,AGE_DRVR_YNGST,AGE_DRVR_OLDEST,AGE_NONMTRST_YNGST,AGE_NONMTRST_OLDEST,DRVR_DISTRACTED_CL,DISTRICT_NUM,RPA_ABBR,VEHC_EMER_USE_CL,VEHC_TOWED_FROM_SCENE_CL,CNTY_NAME,FMCSA_RPTBL_CL,FMCSA_RPTBL,HIT_RUN_DESCR,LCLTY_NAME,ROAD_CNTRB_DESCR,SCHL_BUS_RELD_DESCR,SPEED_LIMIT,TRAF_CNTRL_DEVC_FUNC_DESCR,WORK_ZONE_RELD_DESCR,AADT,AADT_YEAR,PK_PCT_SUT,AV_PCT_SUT,PK_PCT_CT,AV_PCT_CT,CURB,TRUCK_RTE,LT_SIDEWLK,RT_SIDEWLK,SHLDR_LT_W,SHLDR_LT_T,SURFACE_WD,SURFACE_TP,SHLDR_RT_W,SHLDR_RT_T,NUM_LANES,OPP_LANES,MED_WIDTH,MED_TYPE,URBAN_TYPE,F_CLASS,URBAN_AREA,FD_AID_RTE,FACILITY,OPERATION,CONTROL,PEAK_LANE,SPEED_LIM,STREETNAME,FROMSTREETNAME,TOSTREETNAME,CITY,STRUCT_CND,TERRAIN,URBAN_LOC_TYPE,AADT_DERIV,STATN_NUM,OP_DIR_SL,SHLDR_UL_T,SHLDR_UL_W,F_F_CLASS,VEHC_UNIT_NUMB,ALC_SUSPD_TYPE_DESCR,DRIVER_AGE,DRVR_CNTRB_CIRC_DESCR,DRIVER_DISTRACTED_TYPE_DESCR,DRVR_LCN_STATE,DRUG_SUSPD_TYPE_DESCR,EMERGENCY_USE_DESC,FMCSA_RPTBL_VL,HAZ_MAT_PLACARD_DESCR,MAX_INJR_SVRTY_VL,MOST_HRMF_EVENT,TOTAL_OCCPT_IN_VEHC,VEHC_MANR_ACT_DESCR,VEHC_CONFG_DESCR,VEHC_MOST_DMGD_AREA,OWNER_ADDR_CITY_TOWN,OWNER_ADDR_STATE,VEHC_REG_STATE,VEHC_REG_TYPE_CODE,VEHC_SEQ_EVENTS,VEHC_TOWED_FROM_SCENE,PERS_NUMB,AGE,EJCTN_DESCR,INJY_STAT_DESCR,MED_FACLY,NON_MTRST_ACT_DESCR,NON_MTRST_COND_DESCR,NON_MTRST_LOC_DESCR,NON_MTRST_TYPE_DESCR,PERS_ADDR_CITY,STATE_PRVN_CODE,PERS_TYPE,PRTC_SYS_USE_DESCR,SFTY_EQUP_DESC_1,SFTY_EQUP_DESC_2,SEX_DESCR,TRNSD_BY_DESCR
0,1,4309633,WILBRAHAM,01/02/2017,12:50 PM,2017/01/02 12:49:59+00,12:00PM to 12:59PM,Closed,Property damage only (none injured),No injury,2,0,0,Local police,Angle,V1: Entering traffic lane / V2: Travelling str...,V1: N / V2: W,V1:(Collision with motor vehicle in traffic) ...,Daylight,Cloudy,Dry,Collision with motor vehicle in traffic,V1:(Collision with motor vehicle in traffic) /...,D1: (Unknown) / D2: (Unknown),V1:(Passenger car) / V2:(Passenger car),2001.0,BOSTON RD,,,,,,,,,,HOME DEPOT EXIT/ENTRY,Four-way intersection,Traffic control signal,"Two-way, not divided",Massachusetts Department of Transportation,Roadway,,,,Yes,At Address,119450.357177,878131.459214,42.149468,-72.474569,PW201701201142,17-3-AC,2017,65-74,>84,,,,2,PVPC,V1:(No) / V2:(No),V1:(No) / V2:(No),HAMPDEN,,,No hit and run,,,"No, school bus not involved",,"Yes, device functioning",No,17338.0,2013.0,0.613,1016.0,0.121,237.0,Both sides,Designated truck route ONLY under State Author...,4.0,4.0,0.0,No Shoulder,36.0,Bituminous concrete road,0.0,No Shoulder,3.0,0.0,0.0,,Large Urbanized Area,Rural minor arterial or urban principal arterial,Springfield (MA-CT),,Mainline roadway,Two-way traffic,No control,,35.0,BOSTON ROAD,SPRINGFIELD CITY LINE,PALMER TOWN LINE,Wilbraham,Good,Level,Low density commercial,,,,,,Principal Arterial - Other,2.0,,86.0,"DCC1:Unknown, DCC2:Not reported",Unknown,MA,,No,,Not reported,No injury,Collision with motor vehicle in traffic,1.0,Travelling straight ahead,Passenger car,MDA1:Left side,SPRINGFIELD,MA,MA,PAN,SEQ1:Collision with motor vehicle in traffic,No,2,86.0,Not ejected,No injury,,,,,,SPRINGFIELD,MA,Driver,Unknown,,,Male,Not transported
1,2,4309634,FITCHBURG,01/10/2017,11:20 AM,2017/01/10 11:19:59+00,11:00AM to 11:59AM,Closed,Property damage only (none injured),No injury,2,0,0,Local police,Rear-end,V1: Slowing or stopped in traffic / V2: Travel...,V1: N / V2: N,V1:(Collision with motor vehicle in traffic) ...,Daylight,Clear,Dry,Collision with motor vehicle in traffic,V1:(Collision with motor vehicle in traffic) /...,D1: (No improper driving) / D2: (Unknown),V1:(Passenger car) / V2:(Passenger car),,ELECTRIC AVENUE / SOUTH STREET / OLD SOUTH ST...,,,,,,,,,,,Four-way intersection,Traffic control signal,"Two-way, not divided",City or Town accepted road,Roadway,,,,Yes,At Intersection,175605.930907,924309.65735,42.568967,-71.797099,PW201701200960,17-1083-AC,2017,25-34,45-54,,,,3,MRPC,V1:(No) / V2:(No),"V1:(No) / V2:(Yes, vehicle or trailer disabled)",WORCESTER,,,No hit and run,,,"No, school bus not involved",,"No, device not functioning",No,4608.0,2013.0,0.507,225.0,0.083,43.0,Both sides,Not a parkway - not on a designated truck route,,,,,40.0,Bituminous concrete road,0.0,No Shoulder,2.0,0.0,,,Small Urbanized Area,Urban minor arterial or rural major collector,Leominster-Fitchburg,,Mainline roadway,Two-way traffic,No control,1.0,30.0,ELECTRIC AVENUE,SOUTH STREET,FRANKLIN ROAD,Fitchburg,Good,Rolling,"Not applicable (i.e., not a principal arterial...",,,,,,Minor Arterial,1.0,,51.0,"DCC1:No improper driving, DCC2:Not reported",Not Distracted,RI,,No,,Not reported,No injury,Collision with motor vehicle in traffic,1.0,Slowing or stopped in traffic,Passenger car,MDA1:Center rear,COVENTRY,RI,RI,,SEQ1:Collision with motor vehicle in traffic,No,1,51.0,Not ejected,No injury,,,,,,COVENTRY,RI,Driver,Unknown,,,Male,Not transported
2,3,4309634,FITCHBURG,01/10/2017,11:20 AM,2017/01/10 11:19:59+00,11:00AM to 11:59AM,Closed,Property damage only (none injured),No injury,2,0,0,Local police,Rear-end,V1: Slowing or stopped in traffic / V2: Travel...,V1: N / V2: N,V1:(Collision with motor vehicle in traffic) ...,Daylight,Clear,Dry,Collision with motor vehicle in traffic,V1:(Collision with motor vehicle in traffic) /...,D1: (No improper driving) / D2: (Unknown),V1:(Passenger car) / V2:(Passenger car),,ELECTRIC AVENUE / SOUTH STREET / OLD SOUTH ST...,,,,,,,,,,,Four-way intersection,Traffic control signal,"Two-way, not divided",City or Town accepted road,Roadway,,,,Yes,At Intersection,175605.930907,924309.65735,42.568967,-71.797099,PW201701200960,17-1083-AC,2017,25-34,45-54,,,,3,MRPC,V1:(No) / V2:(No),"V1:(No) / V2:(Yes, vehicle or trailer disabled)",WORCESTER,,,No hit and run,,,"No, school bus not involved",,"No, device not functioning",No,4608.0,2013.0,0.507,225.0,0.083,43.0,Both sides,Not a parkway - not on a designated truck route,,,,,40.0,Bituminous concrete road,0.0,No Shoulder,2.0,0.0,,,Small Urbanized Area,Urban minor arterial or rural major collector,Leominster-Fitchburg,,Mainline roadway,Two-way traffic,No control,1.0,30.0,ELECTRIC AVENUE,SOUTH STREET,FRANKLIN ROAD,Fitchburg,Good,Rolling,"Not applicable (i.e., not a principal arterial...",,,,,,Minor Arterial,2.0,,32.0,"DCC1:Unknown, DCC2:Not reported",Not Distracted,MA,,No,,Not reported,No injury,Collision with motor vehicle in traffic,1.0,Travelling straight ahead,Passenger car,MDA1:Center front,ATLANTA,GA,MA,PAN,SEQ1:Collision with motor vehicle in traffic,"Yes, vehicle or trailer disabled",2,32.0,Not ejected,No injury,,,,,,LEOMINSTER,MA,Driver,Unknown,,,Female,Not transported
3,4,4309635,FITCHBURG,01/10/2017,6:27 PM,2017/01/10 18:27:00+00,06:00PM to 06:59PM,Closed,Property damage only (none injured),No injury,1,0,0,Local police,Single vehicle crash,V1: Travelling straight ahead,V1: E,"V1:(Ran off road right),(Collision with utili...",Dark - lighted roadway,"Rain/Sleet, hail (freezing rain or drizzle)",Slush,Collision with utility pole,V1:(Collision with utility pole),D1: (Driving too fast for conditions),V1:(Passenger car),54.0,NASHUA ST,,,,,,,,,,,Not at junction,No controls,"Two-way, not divided",City or Town accepted road,Roadside,,,,Yes,At Address,175689.117465,925279.469937,42.577692,-71.796129,PW201701201145,17-1133-AC,2017,25-34,25-34,,,,3,MRPC,V1:(No),"V1:(Yes, vehicle or trailer disabled)",WORCESTER,,,No hit and run,,"Road surface condition (wet, icy, snow, slush,...","No, school bus not involved",,"No, device not functioning",No,,,,,,,Both sides,Not a parkway - not on a designated truck route,6.0,6.0,,,33.0,Bituminous concrete road,0.0,No Shoulder,2.0,0.0,,,Small Urbanized Area,Local,Leominster-Fitchburg,,Mainline roadway,Two-way traffic,No control,,,NASHUA STREET,WATER STREET,SOUTH STREET,Fitchburg,Fair,Rolling,"Not applicable (i.e., not a principal arterial...",,,,,,Local,1.0,,29.0,"DCC1:Driving too fast for conditions, DCC2:Not...",Not Distracted,MA,,No,,Not reported,No injury,Collision with utility pole,1.0,Travelling straight ahead,Passenger car,"MDA1:Center front, MDA2:Right front",LEOMINSTER,MA,MA,PAN,"SEQ1:Ran off road right, SEQ2:Collision with u...","Yes, vehicle or trailer disabled",1,29.0,Not ejected,No injury,,,,,,FITCHBURG,MA,Driver,Shoulder and lap belt used,,,Male,Not transported
4,5,4309638,LUDLOW,01/09/2017,2:22 PM,2017/01/09 14:22:00+00,02:00PM to 02:59PM,Closed,Property damage only (none injured),No injury,2,0,0,Local police,Angle,V1: Backing / V2: Parked,V1: W / V2: Not Reported,V1:(Collision with parked motor vehicle) V2:(...,Daylight,Clear,Dry,Collision with parked motor vehicle,V1:(Collision with parked motor vehicle) / V2:...,D1: (No improper driving),V1:(Passenger car) / V2:(Passenger car),185.0,WEST AVE,,,,,,,,,,,Not at junction,No controls,"Two-way, not divided",City or Town accepted road,Outside roadway,,,,Yes,At Address,117945.327447,879927.408644,42.16548,-72.49303,PW201701201149,17-15-AC,2017,55-64,55-64,,,,2,PVPC,V1:(No) / V2:(No),V1:(No) / V2:(No),HAMPDEN,,,No hit and run,,,"No, school bus not involved",,"No, device not functioning",No,6149.0,2013.0,0.546,275.0,0.087,63.0,Left side only,Not a parkway - not on a designated truck route,5.0,,,,30.0,Bituminous concrete road,2.0,Unstable shoulder,2.0,0.0,,,Large Urbanized Area,Urban minor arterial or rural major collector,Springfield (MA-CT),,Mainline roadway,Two-way traffic,No control,,,WEST AVENUE,CENTER STREET,WEST STREET,Ludlow,Fair,Level,"Low density residential (less than 5,000 perso...",,,,,,Minor Arterial,2.0,,,DCC1:Not reported,Not reported,,,No,,Not reported,Not reported,Collision with motor vehicle in traffic,0.0,Parked,Passenger car,,WILBRAHAM,MA,MA,PAN,SEQ1:Collision with motor vehicle in traffic,No,2,,Not reported,Not reported,,,,,,,,Driver,Not reported,,,Not reported,


## Exploratory Data Analysis (EDA)

In [11]:
# Exclude the NaN from 'NON_MTRST_TYPE_CL' (Non-Motorist Type) and 'INJY_STAT_DESCR' (Injury Status) columns
df_analysis = mass_crash[(mass_crash['NON_MTRST_TYPE_CL'].notna()) & (mass_crash['INJY_STAT_DESCR'].notna())]

# Reset the index of df_analysis
df_analysis.reset_index(drop=True, inplace=True)

# Make a copy of it to fix the error
df_analysis = df_analysis.copy()

# Clean data
df_analysis['NON_MTRST_TYPE_CL'] = df_analysis['NON_MTRST_TYPE_CL'].str.split(':').str[1] # Non-Motorist Type
df_analysis['NON_MTRST_TYPE_CL'] = df_analysis['NON_MTRST_TYPE_CL'].str.strip()

# Show dataframe
df_analysis.head(3)

Unnamed: 0,OBJECTID,CRASH_NUMB,CITY_TOWN_NAME,CRASH_DATE_TEXT,CRASH_TIME,CRASH_DATETIME,CRASH_HOUR,CRASH_STATUS,CRASH_SEVERITY_DESCR,MAX_INJR_SVRTY_CL,NUMB_VEHC,NUMB_NONFATAL_INJR,NUMB_FATAL_INJR,POLC_AGNCY_TYPE_DESCR,MANR_COLL_DESCR,VEHC_MNVR_ACTN_CL,VEHC_TRVL_DIRC_CL,VEHC_SEQ_EVENTS_CL,AMBNT_LIGHT_DESCR,WEATH_COND_DESCR,ROAD_SURF_COND_DESCR,FIRST_HRMF_EVENT_DESCR,MOST_HRMFL_EVT_CL,DRVR_CNTRB_CIRC_CL,VEHC_CONFIG_CL,STREET_NUMB,RDWY,DIST_DIRC_FROM_INT,NEAR_INT_RDWY,MM_RTE,DIST_DIRC_MILEMARKER,MILEMARKER,EXIT_RTE,DIST_DIRC_EXIT,EXIT_NUMB,DIST_DIRC_LANDMARK,LANDMARK,RDWY_JNCT_TYPE_DESCR,TRAF_CNTRL_DEVC_TYPE_DESCR,TRAFY_DESCR_DESCR,JURISDICTN,FIRST_HRMF_EVENT_LOC_DESCR,NON_MTRST_TYPE_CL,NON_MTRST_ACTN_CL,NON_MTRST_LOC_CL,IS_GEOCODED,GEOCODING_METHOD_NAME,X,Y,LAT,LON,RMV_DOC_IDS,CRASH_RPT_IDS,YEAR,AGE_DRVR_YNGST,AGE_DRVR_OLDEST,AGE_NONMTRST_YNGST,AGE_NONMTRST_OLDEST,DRVR_DISTRACTED_CL,DISTRICT_NUM,RPA_ABBR,VEHC_EMER_USE_CL,VEHC_TOWED_FROM_SCENE_CL,CNTY_NAME,FMCSA_RPTBL_CL,FMCSA_RPTBL,HIT_RUN_DESCR,LCLTY_NAME,ROAD_CNTRB_DESCR,SCHL_BUS_RELD_DESCR,SPEED_LIMIT,TRAF_CNTRL_DEVC_FUNC_DESCR,WORK_ZONE_RELD_DESCR,AADT,AADT_YEAR,PK_PCT_SUT,AV_PCT_SUT,PK_PCT_CT,AV_PCT_CT,CURB,TRUCK_RTE,LT_SIDEWLK,RT_SIDEWLK,SHLDR_LT_W,SHLDR_LT_T,SURFACE_WD,SURFACE_TP,SHLDR_RT_W,SHLDR_RT_T,NUM_LANES,OPP_LANES,MED_WIDTH,MED_TYPE,URBAN_TYPE,F_CLASS,URBAN_AREA,FD_AID_RTE,FACILITY,OPERATION,CONTROL,PEAK_LANE,SPEED_LIM,STREETNAME,FROMSTREETNAME,TOSTREETNAME,CITY,STRUCT_CND,TERRAIN,URBAN_LOC_TYPE,AADT_DERIV,STATN_NUM,OP_DIR_SL,SHLDR_UL_T,SHLDR_UL_W,F_F_CLASS,VEHC_UNIT_NUMB,ALC_SUSPD_TYPE_DESCR,DRIVER_AGE,DRVR_CNTRB_CIRC_DESCR,DRIVER_DISTRACTED_TYPE_DESCR,DRVR_LCN_STATE,DRUG_SUSPD_TYPE_DESCR,EMERGENCY_USE_DESC,FMCSA_RPTBL_VL,HAZ_MAT_PLACARD_DESCR,MAX_INJR_SVRTY_VL,MOST_HRMF_EVENT,TOTAL_OCCPT_IN_VEHC,VEHC_MANR_ACT_DESCR,VEHC_CONFG_DESCR,VEHC_MOST_DMGD_AREA,OWNER_ADDR_CITY_TOWN,OWNER_ADDR_STATE,VEHC_REG_STATE,VEHC_REG_TYPE_CODE,VEHC_SEQ_EVENTS,VEHC_TOWED_FROM_SCENE,PERS_NUMB,AGE,EJCTN_DESCR,INJY_STAT_DESCR,MED_FACLY,NON_MTRST_ACT_DESCR,NON_MTRST_COND_DESCR,NON_MTRST_LOC_DESCR,NON_MTRST_TYPE_DESCR,PERS_ADDR_CITY,STATE_PRVN_CODE,PERS_TYPE,PRTC_SYS_USE_DESCR,SFTY_EQUP_DESC_1,SFTY_EQUP_DESC_2,SEX_DESCR,TRNSD_BY_DESCR
0,118,4323882,BOSTON,01/18/2017,6:38 PM,2017/01/18 18:37:59+00,06:00PM to 06:59PM,Closed,Property damage only (none injured),No injury,1,0,0,State police,Single vehicle crash,V1: Turning left,V1: W,V1:(Collision with pedestrian),Dark - lighted roadway,Cloudy,Dry,Collision with motor vehicle in traffic,V1:(Collision with pedestrian),D1: (Failed to yield right of way),V1:(Passenger car),,NEPONSET VALLEY PARKWAY Rte UNKNOW,,MILTON STREET,,,,,,,,,T-intersection,Stop signs,"Two-way, not divided",Department of Conservation and Recreation,Roadway,Pedestrian,P2: Entering or crossing specified location,P2: Marked crosswalk at intersection,Yes,Off Intersection,230226.968526,887751.687407,42.239645,-71.133772,PW201704401255,2017-0H7-000369,2017,35-44,35-44,35-44,35-44,,6,MAPC,V1:(No),,SUFFOLK,,,No hit and run,,,"No, school bus not involved",25.0,"Yes, device functioning",No,9342.0,2013.0,0.304,261.0,0.1,98.0,Both sides,Not a parkway - not on a designated truck route,,6.0,,,30.0,Bituminous concrete road,0.0,No Shoulder,2.0,0.0,,,Large Urbanized Area,Urban minor arterial or rural major collector,Boston (MA-NH-RI),,Mainline roadway,Two-way traffic,No control,,,NEPONSET VALLEY PARKWAY,RIVER STREET,MILTON STREET,Boston,Fair,Rolling,Low density commercial,,,,,,Minor Arterial,1.0,,43.0,"DCC1:Failed to yield right of way, DCC2:Not re...",Not Distracted,MA,,No,,Not reported,No injury,Collision with pedestrian,1.0,Turning left,Passenger car,MDA1:None,BROCKTON,MA,MA,PAN,SEQ1:Collision with pedestrian,Not reported,1,43.0,Not ejected,No injury,,,,,,BROCKTON,MA,Driver,Shoulder and lap belt used,,,Female,Not reported
1,262,4313601,SOUTHBOROUGH,01/15/2017,1:50 AM,2017/01/15 01:50:00+00,01:00AM to 01:59AM,Closed,Non-fatal injury,Non-fatal injury - Incapacitating,1,1,0,Local police,Head-on,V1: Travelling straight ahead,V1: W,V1:(Collision with pedestrian),Dark - lighted roadway,Clear/Other,Dry,Collision with pedestrian,V1:(Collision with pedestrian),D1: (No improper driving),V1:(Passenger car),,TURNPIKE RD Rte 9 W / CENTRAL ST,,,,,,,,,,,Four-way intersection,Traffic control signal,"Two-way, divided, positive median barrier",Massachusetts Department of Transportation,Roadway,Pedestrian,P2: Entering or crossing specified location,P2: Marked crosswalk at intersection,Yes,At Intersection,199556.468657,893790.187753,42.294589,-71.505384,PW201701908528,17SOH-23-AC,2017,21-24,21-24,25-34,25-34,,3,MAPC,V1:(No),V1:(No),WORCESTER,,,No hit and run,,,"No, school bus not involved",50.0,"Yes, device functioning",No,49200.0,2013.0,0.039,759.0,0.065,642.0,Right side only,Designated truck route ONLY under State Author...,0.0,4.0,2.0,Hardened bituminous mix or penetration,24.0,Bituminous concrete road,10.0,Hardened bituminous mix or penetration,2.0,2.0,8.0,Positive barrier - rigid,Large Urbanized Area,Rural or urban principal arterial,Boston (MA-NH-RI),4.0,Mainline roadway,Two-way traffic,Partial control,2.0,45.0,TURNPIKE ROAD,FRAMINGHAM TOWN LINE,WESTBOROUGH TOWN LINE,Southborough,Good,Rolling,Low density commercial,,,,,,Principal Arterial - Other,1.0,,23.0,"DCC1:No improper driving, DCC2:Not reported",Unknown,MA,,No,,Not reported,No injury,Collision with pedestrian,1.0,Travelling straight ahead,Passenger car,"MDA1:Right front, MDA2:Right side",MENDON,MA,MA,PAN,SEQ1:Collision with pedestrian,No,1,23.0,Not ejected,No injury,,,,,,MENDON,MA,Driver,Shoulder and lap belt used,,,Male,Not transported
2,263,4313601,SOUTHBOROUGH,01/15/2017,1:50 AM,2017/01/15 01:50:00+00,01:00AM to 01:59AM,Closed,Non-fatal injury,Non-fatal injury - Incapacitating,1,1,0,Local police,Head-on,V1: Travelling straight ahead,V1: W,V1:(Collision with pedestrian),Dark - lighted roadway,Clear/Other,Dry,Collision with pedestrian,V1:(Collision with pedestrian),D1: (No improper driving),V1:(Passenger car),,TURNPIKE RD Rte 9 W / CENTRAL ST,,,,,,,,,,,Four-way intersection,Traffic control signal,"Two-way, divided, positive median barrier",Massachusetts Department of Transportation,Roadway,Pedestrian,P2: Entering or crossing specified location,P2: Marked crosswalk at intersection,Yes,At Intersection,199556.468657,893790.187753,42.294589,-71.505384,PW201701908528,17SOH-23-AC,2017,21-24,21-24,25-34,25-34,,3,MAPC,V1:(No),V1:(No),WORCESTER,,,No hit and run,,,"No, school bus not involved",50.0,"Yes, device functioning",No,49200.0,2013.0,0.039,759.0,0.065,642.0,Right side only,Designated truck route ONLY under State Author...,0.0,4.0,2.0,Hardened bituminous mix or penetration,24.0,Bituminous concrete road,10.0,Hardened bituminous mix or penetration,2.0,2.0,8.0,Positive barrier - rigid,Large Urbanized Area,Rural or urban principal arterial,Boston (MA-NH-RI),4.0,Mainline roadway,Two-way traffic,Partial control,2.0,45.0,TURNPIKE ROAD,FRAMINGHAM TOWN LINE,WESTBOROUGH TOWN LINE,Southborough,Good,Rolling,Low density commercial,,,,,,Principal Arterial - Other,,,,,,,,,,,,,,,,,,,,,,,2,29.0,,Non-fatal injury - Incapacitating,U MASS MEMORIAL,Entering or crossing specified location,Unknown,Marked crosswalk at intersection,Pedestrian,FAYVILLE,MA,Non-motorist,,Not reported,Not reported,Female,EMS(Emergency Medical Service)


In [12]:
# Fatal - injuries that resulted in death 
# Incapacitating - serious injuries require immediate medical attention

# The number of fatal injuries to cyclists
num_fatal_cyclist = df_analysis[(df_analysis['INJY_STAT_DESCR'] == 'Fatal injury (K)') & (df_analysis['NON_MTRST_TYPE_CL'] == 'Cyclist')].shape[0]
print('The number of fatal injuries to cyclists: ' + str(num_fatal_cyclist))

# Include pedestrians and skaters together since PennDOT does not discriminate between the two

# The number of fatal injuries to pedestrians and skaters
num_fatal_pedestrian = df_analysis[(df_analysis['INJY_STAT_DESCR'] == 'Fatal injury (K)') & (df_analysis['NON_MTRST_TYPE_CL'] == 'Pedestrain')].shape[0]
num_fatal_skater = df_analysis[(df_analysis['INJY_STAT_DESCR'] == 'Fatal injury (K)') & (df_analysis['NON_MTRST_TYPE_CL'] == 'Skater')].shape[0]
fatal_pedestrian_skater = num_fatal_pedestrian + num_fatal_skater
print('The number of fatal injuries to pedestrians and skaters: ' + str(fatal_pedestrian_skater))

# The number of incapacitating injuries to cyclists
num_incapacitating_cyclist = df_analysis[(df_analysis['INJY_STAT_DESCR'] == 'Non-fatal injury - Incapacitating') & (df_analysis['NON_MTRST_TYPE_CL'] == 'Cyclist')].shape[0]
print('The number of incapacitating injuries to cyclists: ' + str(num_incapacitating_cyclist))

# The number of incapacitating injuries to pedestrians and skaters
num_incapacitating_pedestrian = df_analysis[(df_analysis['INJY_STAT_DESCR'] == 'Non-fatal injury - Incapacitating') & (df_analysis['NON_MTRST_TYPE_CL'] == 'Pedestrain')].shape[0]
num_incapacitating_skater = df_analysis[(df_analysis['INJY_STAT_DESCR'] == 'Non-fatal injury - Incapacitating') & (df_analysis['NON_MTRST_TYPE_CL'] == 'Skater')].shape[0]
incapacitating_pedestrian_skater = num_incapacitating_pedestrian + num_incapacitating_skater
print('The number of incapacitating injuries to pedestrians and skaters: ' + str(fatal_pedestrian_skater))

The number of fatal injuries to cyclists: 33
The number of fatal injuries to pedestrians and skaters: 0
The number of incapacitating injuries to cyclists: 217
The number of incapacitating injuries to pedestrians and skaters: 0


## Merge ACS Data and mass_crash Data (Massachusetts Personal Level Crash Data from 2017 to 2021)¶

In [13]:
# Clean ACS data
# Define an empty dataframe that will be populated
df_acs_muni = pd.DataFrame()

# Create a copy of ACS_MUNI_DF as df_acs_muni
df_acs_muni = ACS_MUNI_DF.copy()

# Create a boolean mask to identify rows where 'MUNI_NAME' is 'County subdivisions not defined'
mask = df_acs_muni['MUNI_NAME'] == 'County subdivisions not defined'

# Use the mask to drop rows from the DataFrame
df_acs_muni = df_acs_muni[~mask]

# Reset the index and use the drop parameter to remove the old index column
df_acs_muni.reset_index(drop=True, inplace=True)

# Print the shape of dataframe 
print(df_acs_muni.shape)

# Show dataframe
df_acs_muni.head()

(351, 17)


Unnamed: 0,NAME,POPULATION,BIKE_TO_WORK_EST,BIKE_TO_WORK_MARG,WALK_TO_WORK_EST,WALK_TO_WORK_MARG,DRIVE_SOLO_TO_WORK_EST,DRIVE_SOLO_TO_WORK_MARG,CARPOOL_TO_WORK_EST,CARPOOL_TO_WORK_MARG,PUBTRANS_TO_WORK_EST,PUBTRANS_TO_WORK_MARG,state,county,county_subdivision,MUNI_NAME,COUNTY_NAME
0,"Barnstable Town city, Barnstable County",48556,35,52,843,321,18901,926,2649,482,303,136,25,1,3690,Barnstable Town city,Barnstable County
1,"Bourne town, Barnstable County",20364,0,25,182,64,8471,673,722,288,84,55,25,1,7175,Bourne town,Barnstable County
2,"Brewster town, Barnstable County",10282,66,70,1,3,3733,445,153,88,24,29,25,1,7980,Brewster town,Barnstable County
3,"Chatham town, Barnstable County",6554,0,19,124,74,1666,252,196,96,93,102,25,1,12995,Chatham town,Barnstable County
4,"Dennis town, Barnstable County",14664,8,16,147,108,4930,515,658,173,37,47,25,1,16775,Dennis town,Barnstable County


In [14]:
# Lower the muni name from acs dataframe
df_acs_muni["muni"] = df_acs_muni["MUNI_NAME"].str.lower()

# Split the muni_name from the acs dataframe for merging
df_acs_muni["municipality_name"] = df_acs_muni["muni"].str.split(' ').str[0]
df_acs_muni["municipality_name_second"] = df_acs_muni["muni"].str.split(' ').str[1]

# Define a function to conditionally concatenate columns
def concatenate_municipalities(row):
    if "town" not in row["municipality_name_second"] and "city" not in row["municipality_name_second"]:
        return row["municipality_name"] + " " + row["municipality_name_second"]
    else:
        return row["municipality_name"]

# Apply the function to create a new column
df_acs_muni["muni_name"] = df_acs_muni.apply(concatenate_municipalities, axis=1)

# Drop unnecessary columns
df_acs_muni = df_acs_muni.drop(columns=["muni","municipality_name", "municipality_name_second"])

In [15]:
# Show dataframe
df_acs_muni.head(3)

Unnamed: 0,NAME,POPULATION,BIKE_TO_WORK_EST,BIKE_TO_WORK_MARG,WALK_TO_WORK_EST,WALK_TO_WORK_MARG,DRIVE_SOLO_TO_WORK_EST,DRIVE_SOLO_TO_WORK_MARG,CARPOOL_TO_WORK_EST,CARPOOL_TO_WORK_MARG,PUBTRANS_TO_WORK_EST,PUBTRANS_TO_WORK_MARG,state,county,county_subdivision,MUNI_NAME,COUNTY_NAME,muni_name
0,"Barnstable Town city, Barnstable County",48556,35,52,843,321,18901,926,2649,482,303,136,25,1,3690,Barnstable Town city,Barnstable County,barnstable
1,"Bourne town, Barnstable County",20364,0,25,182,64,8471,673,722,288,84,55,25,1,7175,Bourne town,Barnstable County,bourne
2,"Brewster town, Barnstable County",10282,66,70,1,3,3733,445,153,88,24,29,25,1,7980,Brewster town,Barnstable County,brewster


In [16]:
# Make a copy of df_analysis which excludes NaN values of Non-Motorist Types and clean the values of that column
mass_crash_df = df_analysis.copy()

# Keep the rows where 'NON_MTRST_TYPE_CL' is in the list ['Cyclist', 'Pedestrian', 'Skater']
mass_crash_df = mass_crash_df[mass_crash_df['NON_MTRST_TYPE_CL'].isin(['Cyclist', 'Pedestrian', 'Skater'])]

# Reset index
mass_crash_df.reset_index(drop=True, inplace=True)

# Show mass_crash_df
mass_crash_df.head(3)

Unnamed: 0,OBJECTID,CRASH_NUMB,CITY_TOWN_NAME,CRASH_DATE_TEXT,CRASH_TIME,CRASH_DATETIME,CRASH_HOUR,CRASH_STATUS,CRASH_SEVERITY_DESCR,MAX_INJR_SVRTY_CL,NUMB_VEHC,NUMB_NONFATAL_INJR,NUMB_FATAL_INJR,POLC_AGNCY_TYPE_DESCR,MANR_COLL_DESCR,VEHC_MNVR_ACTN_CL,VEHC_TRVL_DIRC_CL,VEHC_SEQ_EVENTS_CL,AMBNT_LIGHT_DESCR,WEATH_COND_DESCR,ROAD_SURF_COND_DESCR,FIRST_HRMF_EVENT_DESCR,MOST_HRMFL_EVT_CL,DRVR_CNTRB_CIRC_CL,VEHC_CONFIG_CL,STREET_NUMB,RDWY,DIST_DIRC_FROM_INT,NEAR_INT_RDWY,MM_RTE,DIST_DIRC_MILEMARKER,MILEMARKER,EXIT_RTE,DIST_DIRC_EXIT,EXIT_NUMB,DIST_DIRC_LANDMARK,LANDMARK,RDWY_JNCT_TYPE_DESCR,TRAF_CNTRL_DEVC_TYPE_DESCR,TRAFY_DESCR_DESCR,JURISDICTN,FIRST_HRMF_EVENT_LOC_DESCR,NON_MTRST_TYPE_CL,NON_MTRST_ACTN_CL,NON_MTRST_LOC_CL,IS_GEOCODED,GEOCODING_METHOD_NAME,X,Y,LAT,LON,RMV_DOC_IDS,CRASH_RPT_IDS,YEAR,AGE_DRVR_YNGST,AGE_DRVR_OLDEST,AGE_NONMTRST_YNGST,AGE_NONMTRST_OLDEST,DRVR_DISTRACTED_CL,DISTRICT_NUM,RPA_ABBR,VEHC_EMER_USE_CL,VEHC_TOWED_FROM_SCENE_CL,CNTY_NAME,FMCSA_RPTBL_CL,FMCSA_RPTBL,HIT_RUN_DESCR,LCLTY_NAME,ROAD_CNTRB_DESCR,SCHL_BUS_RELD_DESCR,SPEED_LIMIT,TRAF_CNTRL_DEVC_FUNC_DESCR,WORK_ZONE_RELD_DESCR,AADT,AADT_YEAR,PK_PCT_SUT,AV_PCT_SUT,PK_PCT_CT,AV_PCT_CT,CURB,TRUCK_RTE,LT_SIDEWLK,RT_SIDEWLK,SHLDR_LT_W,SHLDR_LT_T,SURFACE_WD,SURFACE_TP,SHLDR_RT_W,SHLDR_RT_T,NUM_LANES,OPP_LANES,MED_WIDTH,MED_TYPE,URBAN_TYPE,F_CLASS,URBAN_AREA,FD_AID_RTE,FACILITY,OPERATION,CONTROL,PEAK_LANE,SPEED_LIM,STREETNAME,FROMSTREETNAME,TOSTREETNAME,CITY,STRUCT_CND,TERRAIN,URBAN_LOC_TYPE,AADT_DERIV,STATN_NUM,OP_DIR_SL,SHLDR_UL_T,SHLDR_UL_W,F_F_CLASS,VEHC_UNIT_NUMB,ALC_SUSPD_TYPE_DESCR,DRIVER_AGE,DRVR_CNTRB_CIRC_DESCR,DRIVER_DISTRACTED_TYPE_DESCR,DRVR_LCN_STATE,DRUG_SUSPD_TYPE_DESCR,EMERGENCY_USE_DESC,FMCSA_RPTBL_VL,HAZ_MAT_PLACARD_DESCR,MAX_INJR_SVRTY_VL,MOST_HRMF_EVENT,TOTAL_OCCPT_IN_VEHC,VEHC_MANR_ACT_DESCR,VEHC_CONFG_DESCR,VEHC_MOST_DMGD_AREA,OWNER_ADDR_CITY_TOWN,OWNER_ADDR_STATE,VEHC_REG_STATE,VEHC_REG_TYPE_CODE,VEHC_SEQ_EVENTS,VEHC_TOWED_FROM_SCENE,PERS_NUMB,AGE,EJCTN_DESCR,INJY_STAT_DESCR,MED_FACLY,NON_MTRST_ACT_DESCR,NON_MTRST_COND_DESCR,NON_MTRST_LOC_DESCR,NON_MTRST_TYPE_DESCR,PERS_ADDR_CITY,STATE_PRVN_CODE,PERS_TYPE,PRTC_SYS_USE_DESCR,SFTY_EQUP_DESC_1,SFTY_EQUP_DESC_2,SEX_DESCR,TRNSD_BY_DESCR
0,118,4323882,BOSTON,01/18/2017,6:38 PM,2017/01/18 18:37:59+00,06:00PM to 06:59PM,Closed,Property damage only (none injured),No injury,1,0,0,State police,Single vehicle crash,V1: Turning left,V1: W,V1:(Collision with pedestrian),Dark - lighted roadway,Cloudy,Dry,Collision with motor vehicle in traffic,V1:(Collision with pedestrian),D1: (Failed to yield right of way),V1:(Passenger car),,NEPONSET VALLEY PARKWAY Rte UNKNOW,,MILTON STREET,,,,,,,,,T-intersection,Stop signs,"Two-way, not divided",Department of Conservation and Recreation,Roadway,Pedestrian,P2: Entering or crossing specified location,P2: Marked crosswalk at intersection,Yes,Off Intersection,230226.968526,887751.687407,42.239645,-71.133772,PW201704401255,2017-0H7-000369,2017,35-44,35-44,35-44,35-44,,6,MAPC,V1:(No),,SUFFOLK,,,No hit and run,,,"No, school bus not involved",25.0,"Yes, device functioning",No,9342.0,2013.0,0.304,261.0,0.1,98.0,Both sides,Not a parkway - not on a designated truck route,,6.0,,,30.0,Bituminous concrete road,0.0,No Shoulder,2.0,0.0,,,Large Urbanized Area,Urban minor arterial or rural major collector,Boston (MA-NH-RI),,Mainline roadway,Two-way traffic,No control,,,NEPONSET VALLEY PARKWAY,RIVER STREET,MILTON STREET,Boston,Fair,Rolling,Low density commercial,,,,,,Minor Arterial,1.0,,43.0,"DCC1:Failed to yield right of way, DCC2:Not re...",Not Distracted,MA,,No,,Not reported,No injury,Collision with pedestrian,1.0,Turning left,Passenger car,MDA1:None,BROCKTON,MA,MA,PAN,SEQ1:Collision with pedestrian,Not reported,1,43.0,Not ejected,No injury,,,,,,BROCKTON,MA,Driver,Shoulder and lap belt used,,,Female,Not reported
1,262,4313601,SOUTHBOROUGH,01/15/2017,1:50 AM,2017/01/15 01:50:00+00,01:00AM to 01:59AM,Closed,Non-fatal injury,Non-fatal injury - Incapacitating,1,1,0,Local police,Head-on,V1: Travelling straight ahead,V1: W,V1:(Collision with pedestrian),Dark - lighted roadway,Clear/Other,Dry,Collision with pedestrian,V1:(Collision with pedestrian),D1: (No improper driving),V1:(Passenger car),,TURNPIKE RD Rte 9 W / CENTRAL ST,,,,,,,,,,,Four-way intersection,Traffic control signal,"Two-way, divided, positive median barrier",Massachusetts Department of Transportation,Roadway,Pedestrian,P2: Entering or crossing specified location,P2: Marked crosswalk at intersection,Yes,At Intersection,199556.468657,893790.187753,42.294589,-71.505384,PW201701908528,17SOH-23-AC,2017,21-24,21-24,25-34,25-34,,3,MAPC,V1:(No),V1:(No),WORCESTER,,,No hit and run,,,"No, school bus not involved",50.0,"Yes, device functioning",No,49200.0,2013.0,0.039,759.0,0.065,642.0,Right side only,Designated truck route ONLY under State Author...,0.0,4.0,2.0,Hardened bituminous mix or penetration,24.0,Bituminous concrete road,10.0,Hardened bituminous mix or penetration,2.0,2.0,8.0,Positive barrier - rigid,Large Urbanized Area,Rural or urban principal arterial,Boston (MA-NH-RI),4.0,Mainline roadway,Two-way traffic,Partial control,2.0,45.0,TURNPIKE ROAD,FRAMINGHAM TOWN LINE,WESTBOROUGH TOWN LINE,Southborough,Good,Rolling,Low density commercial,,,,,,Principal Arterial - Other,1.0,,23.0,"DCC1:No improper driving, DCC2:Not reported",Unknown,MA,,No,,Not reported,No injury,Collision with pedestrian,1.0,Travelling straight ahead,Passenger car,"MDA1:Right front, MDA2:Right side",MENDON,MA,MA,PAN,SEQ1:Collision with pedestrian,No,1,23.0,Not ejected,No injury,,,,,,MENDON,MA,Driver,Shoulder and lap belt used,,,Male,Not transported
2,263,4313601,SOUTHBOROUGH,01/15/2017,1:50 AM,2017/01/15 01:50:00+00,01:00AM to 01:59AM,Closed,Non-fatal injury,Non-fatal injury - Incapacitating,1,1,0,Local police,Head-on,V1: Travelling straight ahead,V1: W,V1:(Collision with pedestrian),Dark - lighted roadway,Clear/Other,Dry,Collision with pedestrian,V1:(Collision with pedestrian),D1: (No improper driving),V1:(Passenger car),,TURNPIKE RD Rte 9 W / CENTRAL ST,,,,,,,,,,,Four-way intersection,Traffic control signal,"Two-way, divided, positive median barrier",Massachusetts Department of Transportation,Roadway,Pedestrian,P2: Entering or crossing specified location,P2: Marked crosswalk at intersection,Yes,At Intersection,199556.468657,893790.187753,42.294589,-71.505384,PW201701908528,17SOH-23-AC,2017,21-24,21-24,25-34,25-34,,3,MAPC,V1:(No),V1:(No),WORCESTER,,,No hit and run,,,"No, school bus not involved",50.0,"Yes, device functioning",No,49200.0,2013.0,0.039,759.0,0.065,642.0,Right side only,Designated truck route ONLY under State Author...,0.0,4.0,2.0,Hardened bituminous mix or penetration,24.0,Bituminous concrete road,10.0,Hardened bituminous mix or penetration,2.0,2.0,8.0,Positive barrier - rigid,Large Urbanized Area,Rural or urban principal arterial,Boston (MA-NH-RI),4.0,Mainline roadway,Two-way traffic,Partial control,2.0,45.0,TURNPIKE ROAD,FRAMINGHAM TOWN LINE,WESTBOROUGH TOWN LINE,Southborough,Good,Rolling,Low density commercial,,,,,,Principal Arterial - Other,,,,,,,,,,,,,,,,,,,,,,,2,29.0,,Non-fatal injury - Incapacitating,U MASS MEMORIAL,Entering or crossing specified location,Unknown,Marked crosswalk at intersection,Pedestrian,FAYVILLE,MA,Non-motorist,,Not reported,Not reported,Female,EMS(Emergency Medical Service)


In [17]:
# Lower the CITY_TOWN_NAME name from mass_crash dataframe
mass_crash_df["muni_name"] = mass_crash_df["CITY_TOWN_NAME"].str.lower()

# Define an empty dataframe that will be populated
df_mass_acs = pd.DataFrame()

# Merge dataframes
df_mass_acs = mass_crash_df.merge(df_acs_muni, how="inner", on="muni_name")

# Drop the muni_name column
df_mass_acs = df_mass_acs.drop(columns=["muni_name"])

# Reset the index
df_mass_acs = df_mass_acs.reset_index(drop=True)

# Show dataframe
df_mass_acs.head()

Unnamed: 0,OBJECTID,CRASH_NUMB,CITY_TOWN_NAME,CRASH_DATE_TEXT,CRASH_TIME,CRASH_DATETIME,CRASH_HOUR,CRASH_STATUS,CRASH_SEVERITY_DESCR,MAX_INJR_SVRTY_CL,NUMB_VEHC,NUMB_NONFATAL_INJR,NUMB_FATAL_INJR,POLC_AGNCY_TYPE_DESCR,MANR_COLL_DESCR,VEHC_MNVR_ACTN_CL,VEHC_TRVL_DIRC_CL,VEHC_SEQ_EVENTS_CL,AMBNT_LIGHT_DESCR,WEATH_COND_DESCR,ROAD_SURF_COND_DESCR,FIRST_HRMF_EVENT_DESCR,MOST_HRMFL_EVT_CL,DRVR_CNTRB_CIRC_CL,VEHC_CONFIG_CL,STREET_NUMB,RDWY,DIST_DIRC_FROM_INT,NEAR_INT_RDWY,MM_RTE,DIST_DIRC_MILEMARKER,MILEMARKER,EXIT_RTE,DIST_DIRC_EXIT,EXIT_NUMB,DIST_DIRC_LANDMARK,LANDMARK,RDWY_JNCT_TYPE_DESCR,TRAF_CNTRL_DEVC_TYPE_DESCR,TRAFY_DESCR_DESCR,JURISDICTN,FIRST_HRMF_EVENT_LOC_DESCR,NON_MTRST_TYPE_CL,NON_MTRST_ACTN_CL,NON_MTRST_LOC_CL,IS_GEOCODED,GEOCODING_METHOD_NAME,X,Y,LAT,LON,RMV_DOC_IDS,CRASH_RPT_IDS,YEAR,AGE_DRVR_YNGST,AGE_DRVR_OLDEST,AGE_NONMTRST_YNGST,AGE_NONMTRST_OLDEST,DRVR_DISTRACTED_CL,DISTRICT_NUM,RPA_ABBR,VEHC_EMER_USE_CL,VEHC_TOWED_FROM_SCENE_CL,CNTY_NAME,FMCSA_RPTBL_CL,FMCSA_RPTBL,HIT_RUN_DESCR,LCLTY_NAME,ROAD_CNTRB_DESCR,SCHL_BUS_RELD_DESCR,SPEED_LIMIT,TRAF_CNTRL_DEVC_FUNC_DESCR,WORK_ZONE_RELD_DESCR,AADT,AADT_YEAR,PK_PCT_SUT,AV_PCT_SUT,PK_PCT_CT,AV_PCT_CT,CURB,TRUCK_RTE,LT_SIDEWLK,RT_SIDEWLK,SHLDR_LT_W,SHLDR_LT_T,SURFACE_WD,SURFACE_TP,SHLDR_RT_W,SHLDR_RT_T,NUM_LANES,OPP_LANES,MED_WIDTH,MED_TYPE,URBAN_TYPE,F_CLASS,URBAN_AREA,FD_AID_RTE,FACILITY,OPERATION,CONTROL,PEAK_LANE,SPEED_LIM,STREETNAME,FROMSTREETNAME,TOSTREETNAME,CITY,STRUCT_CND,TERRAIN,URBAN_LOC_TYPE,AADT_DERIV,STATN_NUM,OP_DIR_SL,SHLDR_UL_T,SHLDR_UL_W,F_F_CLASS,VEHC_UNIT_NUMB,ALC_SUSPD_TYPE_DESCR,DRIVER_AGE,DRVR_CNTRB_CIRC_DESCR,DRIVER_DISTRACTED_TYPE_DESCR,DRVR_LCN_STATE,DRUG_SUSPD_TYPE_DESCR,EMERGENCY_USE_DESC,FMCSA_RPTBL_VL,HAZ_MAT_PLACARD_DESCR,MAX_INJR_SVRTY_VL,MOST_HRMF_EVENT,TOTAL_OCCPT_IN_VEHC,VEHC_MANR_ACT_DESCR,VEHC_CONFG_DESCR,VEHC_MOST_DMGD_AREA,OWNER_ADDR_CITY_TOWN,OWNER_ADDR_STATE,VEHC_REG_STATE,VEHC_REG_TYPE_CODE,VEHC_SEQ_EVENTS,VEHC_TOWED_FROM_SCENE,PERS_NUMB,AGE,EJCTN_DESCR,INJY_STAT_DESCR,MED_FACLY,NON_MTRST_ACT_DESCR,NON_MTRST_COND_DESCR,NON_MTRST_LOC_DESCR,NON_MTRST_TYPE_DESCR,PERS_ADDR_CITY,STATE_PRVN_CODE,PERS_TYPE,PRTC_SYS_USE_DESCR,SFTY_EQUP_DESC_1,SFTY_EQUP_DESC_2,SEX_DESCR,TRNSD_BY_DESCR,NAME,POPULATION,BIKE_TO_WORK_EST,BIKE_TO_WORK_MARG,WALK_TO_WORK_EST,WALK_TO_WORK_MARG,DRIVE_SOLO_TO_WORK_EST,DRIVE_SOLO_TO_WORK_MARG,CARPOOL_TO_WORK_EST,CARPOOL_TO_WORK_MARG,PUBTRANS_TO_WORK_EST,PUBTRANS_TO_WORK_MARG,state,county,county_subdivision,MUNI_NAME,COUNTY_NAME
0,118,4323882,BOSTON,01/18/2017,6:38 PM,2017/01/18 18:37:59+00,06:00PM to 06:59PM,Closed,Property damage only (none injured),No injury,1,0,0,State police,Single vehicle crash,V1: Turning left,V1: W,V1:(Collision with pedestrian),Dark - lighted roadway,Cloudy,Dry,Collision with motor vehicle in traffic,V1:(Collision with pedestrian),D1: (Failed to yield right of way),V1:(Passenger car),,NEPONSET VALLEY PARKWAY Rte UNKNOW,,MILTON STREET,,,,,,,,,T-intersection,Stop signs,"Two-way, not divided",Department of Conservation and Recreation,Roadway,Pedestrian,P2: Entering or crossing specified location,P2: Marked crosswalk at intersection,Yes,Off Intersection,230226.968526,887751.687407,42.239645,-71.133772,PW201704401255,2017-0H7-000369,2017,35-44,35-44,35-44,35-44,,6,MAPC,V1:(No),,SUFFOLK,,,No hit and run,,,"No, school bus not involved",25.0,"Yes, device functioning",No,9342.0,2013.0,0.304,261.0,0.1,98.0,Both sides,Not a parkway - not on a designated truck route,,6.0,,,30.0,Bituminous concrete road,0.0,No Shoulder,2.0,0.0,,,Large Urbanized Area,Urban minor arterial or rural major collector,Boston (MA-NH-RI),,Mainline roadway,Two-way traffic,No control,,,NEPONSET VALLEY PARKWAY,RIVER STREET,MILTON STREET,Boston,Fair,Rolling,Low density commercial,,,,,,Minor Arterial,1.0,,43.0,"DCC1:Failed to yield right of way, DCC2:Not re...",Not Distracted,MA,,No,,Not reported,No injury,Collision with pedestrian,1.0,Turning left,Passenger car,MDA1:None,BROCKTON,MA,MA,PAN,SEQ1:Collision with pedestrian,Not reported,1,43.0,Not ejected,No injury,,,,,,BROCKTON,MA,Driver,Shoulder and lap belt used,,,Female,Not reported,"Boston city, Suffolk County",672814,7294,648,52636,1913,131883,2632,19973,1210,102376,2474,25,25,7000,Boston city,Suffolk County
1,1583,4323882,BOSTON,01/18/2017,6:38 PM,2017/01/18 18:37:59+00,06:00PM to 06:59PM,Closed,Property damage only (none injured),No injury,1,0,0,State police,Single vehicle crash,V1: Turning left,V1: W,V1:(Collision with pedestrian),Dark - lighted roadway,Cloudy,Dry,Collision with motor vehicle in traffic,V1:(Collision with pedestrian),D1: (Failed to yield right of way),V1:(Passenger car),,NEPONSET VALLEY PARKWAY Rte UNKNOW,,MILTON STREET,,,,,,,,,T-intersection,Stop signs,"Two-way, not divided",Department of Conservation and Recreation,Roadway,Pedestrian,P2: Entering or crossing specified location,P2: Marked crosswalk at intersection,Yes,Off Intersection,230226.968526,887751.687407,42.239645,-71.133772,PW201704401255,2017-0H7-000369,2017,35-44,35-44,35-44,35-44,,6,MAPC,V1:(No),,SUFFOLK,,,No hit and run,,,"No, school bus not involved",25.0,"Yes, device functioning",No,9342.0,2013.0,0.304,261.0,0.1,98.0,Both sides,Not a parkway - not on a designated truck route,,6.0,,,30.0,Bituminous concrete road,0.0,No Shoulder,2.0,0.0,,,Large Urbanized Area,Urban minor arterial or rural major collector,Boston (MA-NH-RI),,Mainline roadway,Two-way traffic,No control,,,NEPONSET VALLEY PARKWAY,RIVER STREET,MILTON STREET,Boston,Fair,Rolling,Low density commercial,,,,,,Minor Arterial,,,,,,,,,,,,,,,,,,,,,,,2,35.0,,No injury,,Entering or crossing specified location,Apparently normal,Marked crosswalk at intersection,Pedestrian,HYDE PARK,MA,Non-motorist,,Not reported,Not reported,Female,Not reported,"Boston city, Suffolk County",672814,7294,648,52636,1913,131883,2632,19973,1210,102376,2474,25,25,7000,Boston city,Suffolk County
2,2196,4313687,BOSTON,01/03/2017,5:32 PM,2017/01/03 17:32:00+00,05:00PM to 05:59PM,Closed,Non-fatal injury,Non-fatal injury - Non-incapacitating,1,1,0,State police,Single vehicle crash,V1: Travelling straight ahead,V1: E,V1:(Collision with pedestrian),Dark - lighted roadway,Rain,Wet,Collision with pedestrian,V1:(Collision with pedestrian),D1: (No improper driving),"V1:(Light truck(van, mini-van, pickup, sport u...",,MASSACHUSETTS AVENUE,,MELNEA CASS BOULEVARD,,,,,,,,MASS AVE EXT,Not at junction,Traffic control signal,"Two-way, divided, positive median barrier",City or Town accepted road,Roadway,Pedestrian,P2: Entering or crossing specified location,P2: In roadway,Yes,Operator Designated,235261.229621,898113.708157,42.332721,-71.072152,PW201701907928,2017-0E4-000066,2017,21-24,21-24,55-64,55-64,,6,MAPC,V1:(No),,SUFFOLK,,,No hit and run,,,"No, school bus not involved",35.0,"Yes, device functioning",No,18500.0,2009.0,3.0,4.0,0.0,1.0,Both sides,Not a parkway - not on a designated truck route,0.0,8.0,0.0,No Shoulder,22.0,Bituminous concrete road,0.0,No Shoulder,2.0,2.0,12.0,Curbed,Large Urbanized Area,Rural minor arterial or urban principal arterial,Boston (MA-NH-RI),59,Mainline roadway,One-way traffic,No control,,30.0,MASSACHUSETTS AVENUE,COLUMBIA ROAD,HUNTINGTON AVENUE,Boston,Good,Level,High density business/commercial center (exclu...,,,,,,Principal Arterial - Other,1.0,,22.0,"DCC1:No improper driving, DCC2:Not reported",Not reported,MA,,No,,Not reported,No injury,Collision with pedestrian,2.0,Travelling straight ahead,"Light truck(van, mini-van, pickup, sport utility)",MDA1:None,MANCHESTER,NH,NH,,SEQ1:Collision with pedestrian,Not reported,1,22.0,Not ejected,No injury,,,,,,MEDFORD,MA,Driver,Shoulder and lap belt used,,,Female,Not transported,"Boston city, Suffolk County",672814,7294,648,52636,1913,131883,2632,19973,1210,102376,2474,25,25,7000,Boston city,Suffolk County
3,2197,4313687,BOSTON,01/03/2017,5:32 PM,2017/01/03 17:32:00+00,05:00PM to 05:59PM,Closed,Non-fatal injury,Non-fatal injury - Non-incapacitating,1,1,0,State police,Single vehicle crash,V1: Travelling straight ahead,V1: E,V1:(Collision with pedestrian),Dark - lighted roadway,Rain,Wet,Collision with pedestrian,V1:(Collision with pedestrian),D1: (No improper driving),"V1:(Light truck(van, mini-van, pickup, sport u...",,MASSACHUSETTS AVENUE,,MELNEA CASS BOULEVARD,,,,,,,,MASS AVE EXT,Not at junction,Traffic control signal,"Two-way, divided, positive median barrier",City or Town accepted road,Roadway,Pedestrian,P2: Entering or crossing specified location,P2: In roadway,Yes,Operator Designated,235261.229621,898113.708157,42.332721,-71.072152,PW201701907928,2017-0E4-000066,2017,21-24,21-24,55-64,55-64,,6,MAPC,V1:(No),,SUFFOLK,,,No hit and run,,,"No, school bus not involved",35.0,"Yes, device functioning",No,18500.0,2009.0,3.0,4.0,0.0,1.0,Both sides,Not a parkway - not on a designated truck route,0.0,8.0,0.0,No Shoulder,22.0,Bituminous concrete road,0.0,No Shoulder,2.0,2.0,12.0,Curbed,Large Urbanized Area,Rural minor arterial or urban principal arterial,Boston (MA-NH-RI),59,Mainline roadway,One-way traffic,No control,,30.0,MASSACHUSETTS AVENUE,COLUMBIA ROAD,HUNTINGTON AVENUE,Boston,Good,Level,High density business/commercial center (exclu...,,,,,,Principal Arterial - Other,,,,,,,,,,,,,,,,,,,,,,,2,61.0,,Non-fatal injury - Non-incapacitating,TUFTS MEDICAL CENTER,Entering or crossing specified location,"Emotional (e.g., depression, angry, disturbed)",In roadway,Pedestrian,ALLSTON,MA,Non-motorist,,Not reported,Not reported,Male,EMS(Emergency Medical Service),"Boston city, Suffolk County",672814,7294,648,52636,1913,131883,2632,19973,1210,102376,2474,25,25,7000,Boston city,Suffolk County
4,5156,4322841,BOSTON,01/10/2017,7:48 AM,2017/01/10 07:47:59+00,07:00AM to 07:59AM,Closed,Non-fatal injury,Non-fatal injury - Incapacitating,2,1,0,State police,"Sideswipe, same direction",V1: Travelling straight ahead / V2: Not reported,V1: W / V2: Not Reported,V1:(Collision with pedestrian),Daylight,Clear,Dry,Collision with pedestrian,V1:(Collision with pedestrian),D1: (Failed to yield right of way),V1:(Passenger car) / V2:(Single-unit truck (2-...,,Rte 90 W,,,90.0,,132.0,,,,,UNDER COMMONWEALTH AVE BRIDGE,Not at junction,Not reported,"Two-way, divided, positive median barrier",Massachusetts Department of Transportation,Roadway,Pedestrian,P1: Approaching or leaving vehicle,P1: In roadway,Yes,Mile Marker,232572.218178,899870.488333,42.348647,-71.10468,PW201703901713,2017-0E4-000285,2017,45-54,45-54,55-64,55-64,,6,MAPC,V1:(No) / V2:(No),"V2:(Yes, vehicle or trailer disabled)",SUFFOLK,"V2:(Yes, federally reportable)","Yes, federally reportable",No hit and run,,,"No, school bus not involved",45.0,Not reported,No,132005.0,2013.0,0.305,3641.0,0.14,1674.0,Both sides,Designated truck route under Federal Authority...,0.0,0.0,1.0,Hardened bituminous mix or penetration,44.0,Bituminous concrete road,1.0,Hardened bituminous mix or penetration,4.0,4.0,6.0,Positive barrier - semi-rigid,Large Urbanized Area,Interstate,Boston (MA-NH-RI),I-90,Mainline roadway,Two-way traffic,Full control,4.0,55.0,MASSACHUSETTS TURNPIKE,,EAST BOSTON EXPRESSWAY,Boston,Good,Level,Low density commercial,,,,,,Interstate,,,,,,,,,,,,,,,,,,,,,,,1,56.0,,Non-fatal injury - Incapacitating,TUFTS MEDICAL CENTER,Approaching or leaving vehicle,Apparently normal,In roadway,Pedestrian,REVERE,MA,Non-motorist,,Not reported,Not reported,Male,EMS(Emergency Medical Service),"Boston city, Suffolk County",672814,7294,648,52636,1913,131883,2632,19973,1210,102376,2474,25,25,7000,Boston city,Suffolk County


In [22]:
# Print the amount of municipalities in df_mass_acs
unique_names = df_mass_acs['CITY_TOWN_NAME'].unique()
print(len(unique_names))

294


In [19]:
# Assuming 'data' is a subdirectory in your current working directory
folder_path = 'data/'
file_name = 'df_mass_acs.csv'

# Combine the folder path and file name to create the full file path
full_file_path = folder_path + file_name

# Export dataframe to csv file
df_mass_acs.to_csv(full_file_path, index=True)

## Compress the CSV file before uploading it to GitHub

In [20]:
import gzip
import shutil

# Path to the CSV file you want to compress
csv_file_path = 'data/df_mass_acs.csv'

# Path for the compressed file
compressed_file_path = 'data/df_mass_acs.csv.gz'

# Open the CSV file for reading
with open(csv_file_path, 'rb') as f_in:
    # Open the compressed file for writing
    with gzip.open(compressed_file_path, 'wb') as f_out:
        # Copy the contents of the CSV file to the compressed file
        shutil.copyfileobj(f_in, f_out)

print(f'File compressed to: {compressed_file_path}')

File compressed to: data/df_mass_acs.csv.gz
