# SPAH Postcode Mapping - Retrieving Coordinates

In [1]:
import pandas as pd
import googlemaps
import os

### Import data

In [4]:
# define file pathways
raw_folder_pathway = r'C:\Users\scotth07\OneDrive - NHS Scotland\Documents\Networks & Projects\SPAH\Postcode Mapping\raw_data'
cleaned_folder_pathway = r'C:\Users\scotth07\OneDrive - NHS Scotland\Documents\Networks & Projects\SPAH\Postcode Mapping\cleaned_data'
patient_postcodes_pathway = cleaned_folder_pathway + r'\patient_postcodes_data.xlsx'
hospital_data_pathway = raw_folder_pathway + r'\hospital_postcodes.xlsx'

In [5]:
# import raw data
patient_postcodes_df = pd.read_excel(patient_postcodes_pathway, sheet_name='Sheet1')
hospital_df = pd.read_excel(hospital_data_pathway, sheet_name='Sheet1')

### Retrieve coordinates for each postcode

In [6]:
# initialise client with API key
gmaps = googlemaps.Client(key="AIzaSyDjbhxDW-sbQYdD7IKecezedTeFsAkAt8A")

In [7]:
# define a function to assign coordinates to postcode
def get_coordinates(postcode):
    try:
        location = gmaps.geocode(postcode)
        if location:
            lat = location[0]['geometry']['location']['lat']
            lng = location[0]['geometry']['location']['lng']
            return (lat, lng)
        else:
            print(f"No coordinates found for postcode: {postcode}")
            return None
    except Exception as e:
        print(f"Error occurred: {e}")
        return None


In [8]:
# assign postcode coordinates to a new column for each patient
patient_postcodes_df['patient_postcode_coordinates'] = patient_postcodes_df['patient_postcode'].apply(get_coordinates)

In [9]:
hospital_df['hospital_postcode_coordinates'] = hospital_df['hospital_postcode'].apply(get_coordinates)

In [10]:
# Split the coordinates into two separate columns
patient_postcodes_df[['patient_postcode_lat', 'patient_postcode_long']] = pd.DataFrame(
    patient_postcodes_df['patient_postcode_coordinates'].tolist(), 
    index=patient_postcodes_df.index
)

### Calculate closest hospital to each patient

In [11]:
# Calculate the distances and find the closest hospital
# Returns: closest hospital, minimum distance
def find_closest_hospital(patient_coords, hospitals):
    if patient_coords is None:
        return None, None
        
    closest_hospital = None
    min_distance = float('inf')
    
    for index, row in hospitals.iterrows():
        hospital_coords = row['hospital_postcode_coordinates']
        coordinates = [patient_coords, hospital_coords]
        result = gmaps.distance_matrix(origins=[patient_coords], destinations=[hospital_coords], mode='driving')
        distance = result['rows'][0]['elements'][0]['distance']['value']
        if distance < min_distance:
            min_distance = distance
            closest_hospital = row['hospital']
            
    return closest_hospital, min_distance

In [12]:
# assigns the name of the closest hospital for each patient
patient_postcodes_df['closest_hospital'] = patient_postcodes_df['patient_postcode_coordinates'].apply(
    lambda x: find_closest_hospital(x, hospital_df)[0]
)

In [13]:
# assigns the distance to the closest hospital for each patient
patient_postcodes_df['metres_to_closest_hospital'] = patient_postcodes_df['patient_postcode_coordinates'].apply(
    lambda x: find_closest_hospital(x, hospital_df)[1]
)

In [15]:
# assigns the distance to the closest hospital for each patient
patient_postcodes_df['miles_to_closest_hospital'] = (patient_postcodes_df['metres_to_closest_hospital'] / 1609.34).round(2)

In [16]:
patient_postcodes_df.to_excel(cleaned_folder_pathway + r'\nearest_hospitals.xlsx',
                             index=False)