# SPAH Postcode Mapping - Retrieving Coordinates

In [49]:
import pandas as pd
import googlemaps
import os

### Import data

In [50]:
# define file pathways
raw_folder_pathway = r'C:\Users\scotth07\OneDrive - NHS Scotland\Documents\Networks & Projects\SPAH\Postcode Mapping\raw_data'
cleaned_folder_pathway = r'C:\Users\scotth07\OneDrive - NHS Scotland\Documents\Networks & Projects\SPAH\Postcode Mapping\cleaned_data'
patient_postcodes_pathway = cleaned_folder_pathway + r'\patient_postcodes_data.xlsx'
hospital_data_pathway = raw_folder_pathway + r'\hospital_postcodes.xlsx'

In [51]:
# import raw data
patient_postcodes_df = pd.read_excel(patient_postcodes_pathway, sheet_name='Sheet1')
hospital_df = pd.read_excel(hospital_data_pathway, sheet_name='Sheet1')

### Retrieve coordinates for each postcode

In [52]:
key=os.getenv('API_KEY')

In [53]:
# initialise client with API key
gmaps = googlemaps.Client(key=key)

In [54]:
# Function to retrieve latitude and longitude coordinates for a given postcode
# Arguments:
#   -postcode: A string containing the postcode for each patient.
# Returns:
#   -A tuple (lat, lng) representing the postcode.
#   -Returns None if no coordinates are found or if an error occurs.

def get_coordinates(postcode):
    try:
        # Use Google Maps Geocoding API to get location data for the given postcode
        location = gmaps.geocode(postcode)
        
        # check if a location is returned from the API
        if location:
            # extract latitude and longitude from the API response
            lat = location[0]['geometry']['location']['lat']
            lng = location[0]['geometry']['location']['lng']
            return (lat, lng)
        else:
            # if no coordinates are found for the postcode
            print(f"No coordinates found for postcode: {postcode}")
            return None
    except Exception as e:
        # if exception occurs during the API call
        print(f"Error occurred: {e}")
        return None


In [55]:
# # define a function to assign coordinates to postcode
# def get_coordinates(postcode):
#     try:
#         location = gmaps.geocode(postcode)
#         if location:
#             lat = location[0]['geometry']['location']['lat']
#             lng = location[0]['geometry']['location']['lng']
#             return (lat, lng)
#         else:
#             print(f"No coordinates found for postcode: {postcode}")
#             return None
#     except Exception as e:
#         print(f"Error occurred: {e}")
#         return None


In [56]:
# assign postcode coordinates to a new column for each patient
patient_postcodes_df['patient_postcode_coordinates'] = patient_postcodes_df['patient_postcode'].apply(get_coordinates)

No coordinates found for postcode: G31 1JB
No coordinates found for postcode: G31 1JB
No coordinates found for postcode: DD111BA


In [57]:
hospital_df['hospital_postcode_coordinates'] = hospital_df['hospital_postcode'].apply(get_coordinates)

In [58]:
# Split the coordinates into two separate columns
patient_postcodes_df[['patient_postcode_lat', 'patient_postcode_long']] = pd.DataFrame(
    patient_postcodes_df['patient_postcode_coordinates'].tolist(), 
    index=patient_postcodes_df.index
)

### Calculate closest hospital to each patient based on distance

In [59]:
#  Calculate the distances between a given patient and list of hospitals to return the hospital with shortest distance
# Params:
#    -patient_coords: A tuple containing latitude, longitude of the patient's postcode.
#    -hospitals: A dataframe containing hospital information
# Returns:
#   A tuple containing:
#   -closest_hospital: The name of the closest hospital
#   -min_distance: The driving distance in metres

def find_closest_hospital_distance(patient_coords, hospitals):
    # If no patient coordinates are provided, return None for both hospital and duration
    if patient_coords is None:
        return None, None
    
    # initialize variables to keep track of the closest hospital and minimum distance
    closest_hospital = None
    min_distance = float('inf')
    
    # iterate over each hospital
    for index, row in hospitals.iterrows():
        # extract coordinates from the current row
        hospital_coords = row['hospital_postcode_coordinates']
        
        # calculate driving distance between patient and hospital
        result = gmaps.distance_matrix(origins=[patient_coords], destinations=[hospital_coords], mode='driving')
        
        # extract driving distance from the json response
        distance = result['rows'][0]['elements'][0]['distance']['value']
        
        # update the closest hospital and minimum duration if the current hospital is closer
        if distance < min_distance:
            min_distance = distance
            closest_hospital = row['hospital']
            
    return closest_hospital, min_distance


In [61]:
# assigns the name of the closest hospital for each patient
patient_postcodes_df['closest_hospital_distance'] = patient_postcodes_df['patient_postcode_coordinates'].apply(
    lambda x: find_closest_hospital_distance(x, hospital_df)[0]
)


KeyboardInterrupt



In [None]:
# assigns the distance to the closest hospital for each patient
patient_postcodes_df['metres_to_closest_hospital'] = patient_postcodes_df['patient_postcode_coordinates'].apply(
    lambda x: find_closest_hospital_distance(x, hospital_df)[1]
)

In [None]:
# assigns the distance to the closest hospital for each patient
patient_postcodes_df['miles_to_closest_hospital'] = (patient_postcodes_df['metres_to_closest_hospital'] / 1609.34).round(2)

In [None]:
# drop the metres column
patient_postcodes_df = patient_postcodes_df.drop(['metres_to_closest_hospital'], axis=1)

### Calculate closest hospital to each patient based on journey duration

In [None]:
# Test coordinates
# Patient G22 7QZ (55.8977572, -4.249776)
# Hospital KW1 5NS (58.44132879999999, -3.0957442)

In [None]:
# # Calculate the distances between a given patient and list of hospitals to return the hospital with shortest driving duration.
# # Params:
# #    -patient_coords: A tuple containing latitude, longitude of the patient's postcode.
# #    -hospitals: A dataframe containing hospital information
# # Returns:
# #   A tuple containing:
# #   -closest_hospital: The name of the closest hospital
# #   -min_duration: The driving duration in seconds
# def find_closest_hospital_duration(patient_coords, hospitals):
#     # if no patient coordinates are provided, return None for both hospital and duration
#     if patient_coords is None:
#         return None, None
    
#     # initialize variables to keep track of the closest hospital and minimum duration
#     closest_hospital = None
#     min_duration = float('inf')  # Start with an infinitely large duration
    
#     # iterate over each hospital
#     for index, row in hospitals.iterrows():
#         # extract coordinates from the current row
#         hospital_coords = row['hospital_postcode_coordinates']
        
#         # calculate driving distance between patient and hospital
#         result = gmaps.distance_matrix(origins=[patient_coords], destinations=[hospital_coords], mode='driving')
        
#         # extract driving duration from the json response
#         duration = result['rows'][0]['elements'][0]['duration']['value']
        
#         # update the closest hospital and minimum duration if the current hospital is shorter duration
#         if duration < min_duration:
#             min_duration = duration
#             closest_hospital = row['hospital']
            
#     return closest_hospital, min_duration


In [None]:
# # assigns the name of the closest hospital for each patient
# patient_postcodes_df['closest_hospital_duration'] = patient_postcodes_df['patient_postcode_coordinates'].apply(
#     lambda x: find_closest_hospital_duration(x, hospital_df)[0]
# )

In [None]:
# # assigns the name of the closest hospital for each patient
# patient_postcodes_df['seconds_duration_to_closest_hospital'] = patient_postcodes_df['patient_postcode_coordinates'].apply(
#     lambda x: find_closest_hospital_duration(x, hospital_df)[1]
# )

In [None]:
# patient_postcodes_df['minutes_duration_to_closest_hospital'] = (patient_postcodes_df['seconds_duration_to_closest_hospital'] / 60).round(1)

### Export to Excel

In [None]:
patient_postcodes_df.to_excel(cleaned_folder_pathway + r'\nearest_hospitals.xlsx',
                             index=False)