# Name: Margaret Nguyen

# Data Manipulation: Merging BNA data and Pennsylvania municipality data


Assignment: Merge the 51 municipalities from Pennsylvania that you obtained the people for bikes BNA score for with the data from the CSV file. They should all be present in the CSV plus another 2530 some odd other municipalities. You will likely have to merge them using the name of the municipality, but be careful as there are municipalities in Pennsylvania that have very similar names, i.e (Lancaster township & Lancaster City).

In [94]:
# Import packages
import pandas as pd
import re

In [95]:
# Read the csv files 
crash_data = pd.read_csv("/Users/margaret06/Documents/GitHub/Carlisle_Borough_Transportation_Study/data/2017_TO_2021_MUNI_CRASH_DATA.csv", low_memory=False)
bna = pd.read_csv("/Users/margaret06/Documents/GitHub/Carlisle_Borough_Transportation_Study/data/BNA_score.csv", low_memory=False)

In [96]:
# Clean datasets
df_bna = bna.drop(columns = ['Unnamed: 0'])
df_crash = crash_data.drop(columns = ['Unnamed: 0'])

# Keep only Pennsylvania municipalities from the BNA dataframe
df_pa = df_bna[df_bna["State"]=="PA"]

# Drop Country columns
df_pa = df_pa.drop(["Country"], axis=1)

# Reset index
df_pa = df_pa.reset_index(drop=True)

# View Pennsylvania municipalities data
df_pa.head(10)

Unnamed: 0,City,State,BNA Score
0,Allentown,PA,29
1,Altoona,PA,36
2,Ardmore,PA,41
3,Athens,PA,23
4,Beaver,PA,36
5,Bellwood,PA,31
6,Bethlehem,PA,27
7,Birdsboro,PA,59
8,Camp Hill,PA,28
9,Carlisle,PA,38


In [97]:
# Lower the city name from PA dataframe
df_pa["City"] = df_pa["City"].str.lower()

# Lower the city name from crash dataframe
df_crash["City"] = df_crash["MUNI_NAME"].str.lower()

# Extract the city name from crash dataframe for matching names (merging)
df_crash["City"] = df_crash["City"].str.extract(r'^(.*?)\s')

# Merge dataframes
df_pa_crash = df_crash.merge(df_pa, how="inner", on="City")

# View dataframe
df_pa_crash

Unnamed: 0,NAME,PENN_DOT_MUNI_ID,state,county,county_subdivision,POPULATION,LAND_AREA,BIKE_TO_WORK_EST,BIKE_TO_WORK_MARG,WALK_TO_WORK_EST,...,PED_SUSP_SERIOUS_INJ_BY_AUTO_COUNT,BICYCLE_SOLO_COUNT,BICYCLE_DEATH_SOLO_COUNT,BICYCLE_SUSP_SERIOUS_INJ_SOLO_COUNT,PED_SOLO_COUNT,PED_DEATH_SOLO_COUNT,PED_SUSP_SERIOUS_INJ_SOLO_COUNT,City,State,BNA Score
0,"Philadelphia city, Philadelphia County",67301,42,101,60000,1596865,134.1,14172,940,54269,...,214.0,505.0,10.0,24.0,2655.0,83.0,227.0,philadelphia,PA,57
1,"Pittsburgh city, Allegheny County",2301,42,3,61000,303207,55.4,1920,290,15005,...,75.0,54.0,1.0,8.0,332.0,16.0,43.0,pittsburgh,PA,28
2,"Erie city, Erie County",25302,42,49,24000,95536,19.1,329,191,2003,...,10.0,52.0,0.0,1.0,94.0,5.0,12.0,erie,PA,41
3,"Carlisle borough, Cumberland County",21402,42,41,11272,19869,5.5,235,104,1052,...,3.0,12.0,0.0,2.0,25.0,1.0,3.0,carlisle,PA,38
4,"Lancaster city, Lancaster County",36301,42,71,41216,58034,7.2,198,114,3107,...,16.0,17.0,0.0,1.0,87.0,1.0,12.0,lancaster,PA,25
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
81,"Athens township, Bradford County",8204,42,15,3400,5195,43.6,0,16,123,...,,0.0,0.0,0.0,2.0,0.0,1.0,athens,PA,23
82,"Athens township, Crawford County",20201,42,39,3408,615,28.3,0,11,5,...,,,,,,,,athens,PA,23
83,"Bellwood borough, Blair County",7401,42,13,5384,1600,0.4,0,11,25,...,,,,,,,,bellwood,PA,31
84,"Verona borough, Allegheny County",2468,42,3,80032,2731,0.5,0,11,28,...,,0.0,0.0,0.0,1.0,0.0,0.0,verona,PA,26


In [98]:
# Assuming 'data' is a subdirectory in your current working directory
folder_path = 'data/'
file_name = 'df_pa_crash.csv'

# Combine the folder path and file name to create the full file path
full_file_path = folder_path + file_name

# Export dataframe to csv file
df_pa_crash.to_csv(full_file_path, index=True)