# Geocoding Police Fire Departments and Mapping to CSI360 Customer Locations

- Import and data from CSI360 Police and Fire List.
- Geocode addresses to obtain latitude and longitude coordinates.
- Insert latitude and longitude columns into data file.
- Check each department location and identify if it is within 100 miles of the customer locations.
- Separate the dataframe for departments within 100 miles for each customer location.
- Export the data to CSV files for further analysis and reporting.

In [None]:
# Dependencies and Setup
import pandas as pd
from geopy.geocoders import GoogleV3
from geopy.distance import geodesic
# import folium # Use to create html map
# from folium import plugins # Use to create html map
from ratelimit import limits, sleep_and_retry
import time
import numpy as np # Use for Haversine formula

# Manually load API key from .env file. # Remove code associated with dotenv if this works
with open('C:/Users/jchan/csi360_fire_police/lefd-custy-targets/.env', 'r') as f:
    for line in f:
        if line.startswith('GOOGLE_MAPS_API_KEY'):
            google_api_key = line.split('=')[1].strip()

In [None]:
# Store filepath in a variable
lefd_1_data = 'resources/data/lefd_1_data.csv'
csi360_customers = 'resources/data/csi360_customers.csv'

# Read each of the respective files (police, fire, agency_n, agency_addrs) and store into Pandas dataframe
lefd_1_data_df = pd.read_csv(lefd_1_data)
csi360_customers_df = pd.read_csv(csi360_customers)

# Convert the 'zip' column to integer, then back to string to remove decimals
csi360_customers_df['zip'] = csi360_customers_df['zip'].fillna("").apply(lambda x: str(int(float(x))) if x != "" else "")

display(lefd_1_data_df.head(), csi360_customers_df.head())

In [None]:
## Geocode CSI360 customer zip codes

In [None]:
# Initialize geolocator. 
geolocator = GoogleV3(api_key=google_api_key)

In [None]:
# Function to get geocode based on zip code

def get_lat_long(zip_code):
    if zip_code == "":  # Skip empty ZIP codes
        return None, None
    try:
        # Use the ZIP code directly to get the location
        location = geolocator.geocode(zip_code)
        if location:
            return location.latitude, location.longitude
        else:
            print(f"Geocoding failed for ZIP code: {zip_code}")  # Log failed geocodes
            return None, None
    except Exception as e:
        print(f"Error geocoding {zip_code}: {e}")
        return None, None

# Apply the geocode function to get Latitude and Longitude for each row in the cleaned DataFrame
csi360_customers_df['Latitude'], csi360_customers_df['Longitude'] = zip(*csi360_customers_df['zip'].apply(get_lat_long))

# Fill NaN values in 'Latitude' and 'Longitude' with empty strings
csi360_customers_df['Latitude'] = csi360_customers_df['Latitude'].fillna("")
csi360_customers_df['Longitude'] = csi360_customers_df['Longitude'].fillna("")

# Print the first few rows to verify
print(csi360_customers_df.head())


## Geocode Colorado Fire Deptartment Addresses


In [None]:
# Function to get latitude and longitude from address
# Set rate limit to 25 requests per second
# Google geocoding API allows around 50 queries per second, I've adjusted to 25 to be safe
RATE_LIMIT = 25  # requests per second
TIME_PERIOD = 1  # time period in seconds

# Function to get latitude and longitude from address with rate limiter
@sleep_and_retry
@limits(calls=RATE_LIMIT, period=TIME_PERIOD)
def get_lat_long(address):
    try:
        location = geolocator.geocode(address)
        if location:
            return location.latitude, location.longitude
        else:
            return None, None
    except Exception as e:
        print(f"Error geocoding {address}: {e}")
        return None, None

In [None]:
# Combine address fields and get latitude/longitude
lefd_1_data_df['Full_Address'] = lefd_1_data_df['addr1'] + ', ' + lefd_1_data_df['city'] + ', ' + lefd_1_data_df['state'] + ' ' + lefd_1_data_df['zip'].astype(str)
lefd_1_data_df['Latitude'], lefd_1_data_df['Longitude'] = zip(*lefd_1_data_df['Full_Address'].apply(get_lat_long))
print(lefd_1_data_df.head())

In [None]:
# Save the dataframes to a new CSV file
csi360_customers_df.to_csv('C:/Users/jchan/csi360_fire_police/lefd-custy-targets/resources/Output/csi360_custy_latlong.csv', index=False)
lefd_1_data_df.to_csv('C:/Users/jchan/csi360_fire_police/lefd-custy-targets/resources/Output/lefd_latlong.csv', index=False)

## Use Haversine Formula to calculate distances between latitude and longitude pairs 
### Source: https://stackoverflow.com/questions/29545704/fast-haversine-approximation-python-pandas

In [None]:
# Load the CSV files into dataframes
csi360_customers_df = pd.read_csv('resources/output/csi360_custy_latlong.csv')
lefd_latlong_df = pd.read_csv('resources/output/lefd_latlong.csv')

print(csi360_customers_df.head(2), lefd_latlong_df.head(2))

In [None]:
# Haversine formula to calculate the distance between two lat/lon pairs
def haversine(lat1, lon1, lat2, lon2):
    R = 3958.8  # Earth radius in miles
    lat1, lon1, lat2, lon2 = map(np.radians, [lat1, lon1, lat2, lon2])
    dlat = lat2 - lat1
    dlon = lon2 - lon1
    a = np.sin(dlat / 2) ** 2 + np.cos(lat1) * np.cos(lat2) * np.sin(dlon / 2) ** 2
    c = 2 * np.arcsin(np.sqrt(a))
    return R * c

# Function to filter agencies within 100 miles AND in the same state using Haversine formula
def filter_agencies_within_100_miles_and_same_state(lefd_latlong_df, csi360_customers_df):
    results = [] # Initialize an empty list to store results

 # Iterate over each company in csi360_customers_df
    for _, company_row in csi360_customers_df.iterrows():
        company_name = company_row['company']
        company_coords = (company_row['Latitude'], company_row['Longitude'])
        company_state = company_row['state']
        
 # Check each agency in lefd_latlong_df
        for _, agency_row in lefd_latlong_df.iterrows():
            try:
                agency_coords = (agency_row['Latitude'], agency_row['Longitude'])
                agency_state = agency_row['state']                

                # First, check if the agency is in the same state
                if agency_state == company_state:
                    # If in the same state, then calculate the distance
                    distance = haversine(company_coords[0], company_coords[1], agency_coords[0], agency_coords[1])
                
                    # If distance is within 100 miles, store the result
                    if distance <= 100:
                        results.append({
                            'company': company_name,
                            **agency_row.to_dict()  # Include all columns from lefd_latlong_df
                        })
            except Exception as e:
                print(f"Error calculating distance for {agency_row['agency_name']}: {e}")
    
    # Convert results to a DataFrame
    results_df = pd.DataFrame(results)

    # Print the first 5 results for verification
    print(results_df.head(5))

    # Save the results to a CSV file
    results_df.to_csv('C:/Users/jchan/csi360_fire_police/lefd-custy-targets/resources/output/lefd_100mi_csicusty.csv', index=False)
  
    return results_df

# Use the function with the loaded data
filter_agencies_within_100_miles_and_same_state(lefd_latlong_df, csi360_customers_df)