# Name: Margaret Nguyen

# Data Manipulation: Merging BNA data and Colorado Crash Data


**Assignment: Merge the data of the 85 municipalities in Colorado, including the BNA score from the PeopleForBikes dataset, with the Colorado Crash Data sourced from [this CSV file](https://github.com/margaret613/Carlisle_Borough_Transportation_Study/blob/main/data/df_col_acs.csv).**

In [1]:
# Import packages
import pandas as pd
import re

In [2]:
# Read the csv files 
# This Municipality’s crash data is from 2017 to 2021
crash_data = pd.read_csv("/Users/margaret06/Documents/GitHub/Carlisle_Borough_Transportation_Study/data/df_col_acs.csv", low_memory=False)
bna = pd.read_csv("/Users/margaret06/Documents/GitHub/Carlisle_Borough_Transportation_Study/data/BNA_score.csv", low_memory=False)

In [11]:
# Clean datasets
df_bna = bna.drop(columns = ['Unnamed: 0'])
df_crash = crash_data.drop(columns = ['Unnamed: 0'])

# Keep only Pennsylvania municipalities from the BNA dataframe
df_co = df_bna[df_bna["State"]=="CO"]

# Drop Country columns
df_co = df_co.drop(["Country", 'State'], axis=1)

# Reset index
df_co.reset_index(inplace=True, drop=True)

# Print shape
print(df_co.shape)

# View Pennsylvania municipalities data
df_co

(85, 2)


Unnamed: 0,City,BNA Score
0,Arvada,22
1,Aspen,75
2,Aurora,36
3,Avon,26
4,Bennett,25
...,...,...
80,Timnath,17
81,Vail,34
82,Westminster,19
83,Wheat Ridge,13


In [14]:
df_crash['City'].unique()

array(['brighton', 'commerce city', 'alamosa', 'arboles',
       'pagosa springs', 'campo', 'springfield', 'walsh', 'las animas',
       'mcclave', 'boulder', 'longmont', 'broomfield', 'buena vista',
       'salida', 'cheyenne wells', 'kit carson', 'antonito', 'la jara',
       'manassa', 'blanca', 'san luis', 'ordway', 'sugar city',
       'westcliffe', 'cedaredge', 'delta', 'hotchkiss', 'paonia',
       'denver', 'dove creek', 'rico', 'castle rock', 'parker', 'sedalia',
       'basalt', 'agate', 'elizabeth', 'simla', 'cotopaxi', 'florence',
       'glenwood springs', 'new castle', 'rifle', 'central city',
       'granby', 'kremmling', 'crested butte', 'gunnison', 'lake city',
       'gardner', 'la veta', 'walsenburg', 'walden', 'golden', 'eads',
       'haswell', 'sheridan lake', 'leadville', 'berthoud', 'estes park',
       'fort collins', 'livermore', 'loveland', 'arriba', 'hugo', 'limon',
       'fleming', 'merino', 'peetz', 'sterling', 'clifton', 'collbran',
       'de beque', 'f

In [15]:
df_co['City'].unique()

array(['Arvada', 'Aspen', 'Aurora', 'Avon', 'Bennett', 'Berthoud',
       'Black Hawk', 'Boulder', 'Bow Mar', 'Breckenridge', 'Brighton',
       'Broomfield', 'Buena Vista', 'Castle Pines', 'Castle Rock',
       'Cañon City', 'Centennial', 'Central City', 'Cherry Hills Village',
       'Colorado Springs', 'Colorado Springs Metro', 'Columbine Valley',
       'Commerce City', 'Crested Butte', 'Dacono', 'Deer Trail', 'Denver',
       'Durango', 'Eaton', 'Edgewater', 'Empire', 'Englewood', 'Erie',
       'Estes Park', 'Evans', 'Federal Heights', 'Firestone',
       'Fort Collins', 'Foxfield', 'Frederick', 'Fruita', 'Garden City',
       'Georgetown', 'Glendale', 'Golden', 'Grand Junction', 'Greeley',
       'Greenwood Village', 'Gunnison', 'Highlands Ranch',
       'Idaho Springs', 'Johnstown', 'La Salle', 'Lafayette', 'Lakewood',
       'Larkspur', 'Littleton', 'Lochbuie', 'Lone Tree', 'Longmont',
       'Louisville', 'Loveland', 'Lyons', 'Manitou Springs', 'Mead',
       'Milliken', 'Mon

In [9]:
# Lower the letters in the City column in the df_bna dataframe 
df_co['City'] = df_co['City'].str.lower()

# Merge them for comparison
df_merged = df_crash.merge(df_co, how="inner", on="City")

# Show dataframe
df_merged

Unnamed: 0,NAME,POPULATION,BIKE_TO_WORK_EST,BIKE_TO_WORK_MARG,WALK_TO_WORK_EST,WALK_TO_WORK_MARG,DRIVE_SOLO_TO_WORK_EST,DRIVE_SOLO_TO_WORK_MARG,CARPOOL_TO_WORK_EST,CARPOOL_TO_WORK_MARG,...,COUNTY_NAME,City,County,BICYCLE_BY_AUTO_COUNT,BICYCLE_DEATH_BY_AUTO_COUNT,BICYCLE_SUSP_SERIOUS_INJ_BY_AUTO_COUNT,PED_BY_AUTO_COUNT,PED_DEATH_BY_AUTO_COUNT,PED_SUSP_SERIOUS_INJ_BY_AUTO_COUNT,BNA Score
0,"Brighton CCD, Adams County",109934,41,56,440,209,43804,1622,5040,679,...,Adams County,brighton,adams,0.0,0.0,0.0,0.0,0.0,0.0,17
1,"Commerce City CCD, Adams County",33395,17,30,149,80,10735,922,2272,591,...,Adams County,commerce city,adams,4.0,2.0,2.0,3.0,2.0,1.0,14
2,"Boulder CCD, Boulder County",123266,5247,551,5255,630,32118,1248,2904,399,...,Boulder County,boulder,boulder,42.0,2.0,40.0,4.0,1.0,3.0,68
3,"Longmont CCD, Boulder County",112640,428,185,840,242,42117,1452,5111,559,...,Boulder County,longmont,boulder,43.0,5.0,38.0,12.0,1.0,11.0,59
4,"Broomfield CCD, Broomfield County",72697,304,139,482,160,27371,778,2730,473,...,Broomfield County,broomfield,broomfield,2.0,0.0,2.0,5.0,1.0,4.0,47
5,"Buena Vista CCD, Chaffee County",8656,0,18,54,72,2696,436,337,186,...,Chaffee County,buena vista,chaffee,1.0,0.0,1.0,0.0,0.0,0.0,14
6,"Salida CCD, Chaffee County",10780,150,116,357,167,3278,391,387,171,...,Chaffee County,salida,chaffee,0.0,0.0,0.0,0.0,0.0,0.0,64
7,"Denver CCD, Denver County",706799,7963,738,17863,1063,252765,3315,27575,1743,...,Denver County,denver,denver,143.0,9.0,134.0,96.0,12.0,84.0,43
8,"Castle Rock CCD, Douglas County",56349,32,38,601,253,21644,1237,1371,351,...,Douglas County,castle rock,douglas,2.0,0.0,2.0,1.0,0.0,1.0,23
9,"Parker CCD, Douglas County",138900,59,59,766,228,52902,1707,5167,617,...,Douglas County,parker,douglas,1.0,0.0,1.0,1.0,0.0,1.0,20


In [13]:
# Assuming 'data' is a subdirectory in your current working directory
folder_path = 'data/'
file_name = 'df_col_bna.csv'

# Combine the folder path and file name to create the full file path
full_file_path = folder_path + file_name

# Export dataframe to csv file
df_col_bna.to_csv(full_file_path, index=True)