# Geocoding and Mapping of Fire and Police Departments: Colorado Focus

- Import and clean data from multiple sources.
- Merge datasets based on the common `agency_name` column to consolidate information.
- Identify and fill in missing data from the primary dataset using supplementary data sources.
- Geocode addresses to obtain latitude and longitude coordinates.
- Export the cleaned, organized, and geocoded data to CSV files for further analysis and reporting.

In [None]:
# Install required libraries with conda
# !conda install -y pandas geopy folium -c conda-forge

In [None]:
# Dependencies and Setup
import pandas as pd
from geopy.geocoders import GoogleV3
from geopy.distance import geodesic
import folium
from folium import plugins
from dotenv import load_dotenv
import os

load_dotenv()
google_api_key = 'my api creds' # Hard coding works

# os.environ.get('GOOGLE_MAPS_API_KEY') Using this causes problems

In [None]:
# Store filepath in a variable
police_data = 'resources/data/police_departments.csv'
fire_data = 'resources/data/usfa-registry-national.txt'
agency_addrs = 'resources/data/27681-0001-Data.txt'
co_leads =  'resources/data/co_leads.csv'

# Read each of the respective files (police, fire, agency_n, agency_addrs) and store into Pandas dataframe
police_data_df = pd.read_csv(police_data)
fire_data_df = pd.read_csv(fire_data, delimiter='\t', encoding='ISO-8859-1')
agency_addrs_df = pd.read_csv(agency_addrs, delimiter='\t', encoding='ISO-8859-1')
co_leads_df = pd.read_csv(co_leads)

In [None]:
# Show header for DataFrames
display(police_data_df.head(), fire_data_df.head(), agency_addrs_df.head(), co_leads_df.head())


In [None]:
# Define dictionaries for renaming columns, and standardize column names that have same data
police_rename_dict = {
    'name': 'agency_name',
    'sworn' : 'sworn_active_persnl',
    'type' : 'agency_type'
}

fire_rename_dict = {
    'Fire dept name' : 'agency_name',
    'HQ addr1' : 'hq_addr1',
    'HQ addr2' : 'hq_addr2',
    'HQ city' : 'hq_city',
    'HQ state' : 'hq_state',
    'HQ zip' : 'hq_zip',
    'Mail addr1' : 'addr1',
    'Mail addr2' : 'addr2',
    'Mail PO box' : 'po_box',
    'Mail city' : 'city',
    'Mail state' : 'state',
    'Mail zip' : 'zip',
    'HQ phone' : 'hq_ph',
    'HQ fax' : 'hq_fax',
    'County' : 'county',
    'Dept Type' : 'agency_type',
    'Organization Type' : 'org_type',
    'Website' : 'website',
    'Active Firefighters - Career' : 'sworn_active_persnl'
}

addrs_rename_dict = {
    'AGCYNAME' : 'agency_name',
    'ADDR1' : 'addr1', 
    'ADDR2' : 'addr2',
    'CITY' : 'city',
    'STATE' : 'state',
    'ZIP' : 'zip',
    'COUNTY' : 'county'
}

# Rename columns in police_data_df
police_data_df.rename(columns=police_rename_dict, inplace=True)

# Rename columns in fire_data_df
fire_data_df.rename(columns=fire_rename_dict, inplace=True)

# Rename columns in agency_addrs_df
agency_addrs_df.rename(columns=addrs_rename_dict, inplace=True)

# Verify: Display the first few rows to verify the renaming
display(police_data_df.head(), fire_data_df.head(), agency_addrs_df.head())

In [None]:
# Drop named columns
police_data_df = police_data_df.drop(['ftciv', 'ptciv', 'ptsworn'], axis=1)
fire_data_df = fire_data_df.drop(['FDID', 'Number Of Stations', 'Active Firefighters - Volunteer', 'Active Firefighters - Paid per Call', 'Non-Firefighting - Civilian', 'Non-Firefighting - Volunteer', 'Primary agency for emergency mgmt'], axis=1)

#Save dataframe to a new file
police_data_df.to_csv('resources/output/police_data_clean_df.csv', index=False)
fire_data_df.to_csv('resources/output/fire_data_clean_df.csv', index=False)


In [None]:
# Select several columns to keep.
columns_to_select = ['agency_name', 'addr1', 'addr2']

# Select the specified columns
agency_addrs_df = agency_addrs_df[columns_to_select]

print(agency_addrs_df.head())

In [None]:
# For ease of commenting df1 is police_data_df and df2 is agency_addrs_df
# Test add column and iterate over rows to plug in missing data.
# Step 1: Add df2 columns to df1 with no data (initialize with empty strings)
for column in agency_addrs_df.columns:
    if column != 'agency_name':  # Avoid adding the common column again
        police_data_df[column] = ""

# For each row in df1, find matching agency_name in in df2  
# For each matching row in df2, fill in missing data in df1 columns
for index, row in police_data_df.iterrows():

    #Find df2 row where agency_name matches current row in df1
    matching_row = agency_addrs_df[agency_addrs_df['agency_name'] == row['agency_name']]
    
    if not matching_row.empty:
        # Step 3: Insert corresponding data into df1
        for column in agency_addrs_df.columns:
            if column != 'agency_name':  # Skip the common column
                police_data_df.at[index, column] = matching_row.iloc[0][column]

# Display the updated df1
print(police_data_df.head())

In [None]:
# Sort DataFrame by agency_name in alphabetical order (A to Z)
police_data_sorted = police_data_df.sort_values(by='agency_name', ascending=True)

# Display the sorted DataFrame
display(police_data_sorted.head())

In [None]:
# Concatenate the DataFrames vertically
merged_df = pd.concat([police_data_df, fire_data_df], ignore_index=True)

# Replace NaN values with a blank space if needed
merged_df.fillna(" ", inplace=True)

# Display the merged DataFrame
print(merged_df.head())


In [None]:
# List the columns in the desired order
new_column_order = [
    'agency_name', 'agency_type', 'sworn_active_persnl', 'hq_addr1', 'hq_addr2',
    'hq_city', 'hq_state', 'hq_zip', 'addr1', 'addr2', 'po_box', 'city', 'state', 
    'county', 'zip', 'hq_ph', 'hq_fax', 'org_type', 'website', 'fips'
]
# Reorder the DataFrame columns
merged_df = merged_df[new_column_order]

# Display the DataFrame to check the new order
print(merged_df.head())

In [None]:
#Save merged dataframe to a new file
merged_df.to_csv('resources/output/merged_data2.csv', index=False)


## Geocode Colorado Fire Deptartment Addresses


In [None]:
# Debug google maps api
print(google_api_key)


In [None]:
# Initialize geolocator. 
geolocator = GoogleV3(api_key=google_api_key)


In [None]:
# Function to get latitude and longitude from address
def get_lat_long(address):
    try:
        location = geolocator.geocode(address)
        if location:
            return location.latitude, location.longitude
        else:
            return None, None
    except Exception as e:
        print(f"Error geocoding {address}: {e}")
        return None, None

In [None]:
# Test with a known address
location = geolocator.geocode("1600 Pennsylvania Ave NW, Washington, DC 20500")
print(location.latitude, location.longitude)

In [None]:
# Combine address fields and get latitude/longitude
co_leads_df['Full_Address'] = co_leads_df['hq_addr1'] + ', ' + co_leads_df['hq_city'] + ', ' + co_leads_df['hq_state'] + ' ' + co_leads_df['hq_zip'].astype(str)
co_leads_df['Latitude'], co_leads_df['Longitude'] = zip(*co_leads_df['Full_Address'].apply(get_lat_long))
print(co_leads_df.head())

In [None]:
# CSI360 location
csi360_location = (39.0911, -104.8726)  # Approximate lat/long for 590 Highway 105 Suite # 276, Monument, CO 80132

# Function to check if within 30 miles
def within_30_miles(row):
    try:
        department_location = (row['Latitude'], row['Longitude'])
        return geodesic(csi360_location, department_location).miles <= 30
    except:
        return False

# Apply the function to filter departments within and outside of 30 miles
co_leads_df['Within_30_Miles'] = co_leads_df.apply(within_30_miles, axis=1)


In [None]:
# Separate the dataframes for departments within and outside the 30-mile radius
within_30_df = co_leads_df[co_leads_df['Within_30_Miles'] == True]
outside_30_df = co_leads_df[co_leads_df['Within_30_Miles'] == False]

# Save the filtered dataframes to CSV files
within_30_df.to_csv('resources/output/co_fd_within_30_miles.csv', index=False)
outside_30_df.to_csv('resources/output/co_fd_outside_30_miles.csv', index=False)

## Create Map with Overlays

In [None]:
# Create base map centered on CSI360 location
m = folium.Map(location=csi360_location, zoom_start=10)

# Add a circular overlay for within 30 miles
folium.Circle(
    location=csi360_location,
    radius=30*1609.34,  # 30 miles in meters
    color='blue',
    fill=True,
    fill_color='blue',
    fill_opacity=0.2,
    popup='Within 30 Miles'
).add_to(m)

# Add fire department markers
for _, row in within_30_df.iterrows():
    folium.Marker(
        location=(row['Latitude'], row['Longitude']),
        popup=f"{row['agency_name']}<br>{row['Full_Address']}",
        icon=folium.Icon(color='blue')
    ).add_to(m)

for _, row in outside_30_df.iterrows():
    folium.Marker(
        location=(row['Latitude'], row['Longitude']),
        popup=f"{row['agency_name']}<br>{row['Full_Address']}",
        icon=folium.Icon(color='red')
    ).add_to(m)

# Add a layer control to toggle overlays
folium.LayerControl().add_to(m)

# Save the map to an HTML file
m.save('resources/output/colorado_fire_departments_map.html')
