In [46]:
import pandas as pd
import numpy as np
import plotly.express as px
import plotly.graph_objects as go
from dotenv import load_dotenv
from sqlalchemy import create_engine
import os
import requests
import time

### Load data

In [22]:
load_dotenv()
DB_PW = os.getenv('DB_PW')

# Database connection parameters
username = 'root'
password = DB_PW
host = 'localhost'
port = '3306'
database = 'hospital_register'

# Create SQLAlchemy engine
engine = create_engine(f'mysql+pymysql://{username}:{password}@{host}:{port}/{database}')

query = """
    SELECT hospital_id, name, beds_number, latitude, longitude
    FROM hospital_locations
    """

# Read data from SQL database into a DataFrame
df_hospitals = pd.read_sql(query, engine)

In [23]:
df_places = pd.read_csv('../data/in/staging/atlas_places.csv')

### Calculate distance to nearest hospitals

In [24]:
# Calculate distance between two coordinates
def haversine(lat_a, lon_a, lat_b, lon_b):
    """
    Calculate the distance between two sets of latitude and longitude coordinates using the Haversine formula.
    """
    # Convert latitude and longitude to radians
    lat_a_rad = np.radians(lat_a)
    lon_a_rad = np.radians(lon_a)
    lat_b_rad = np.radians(lat_b)
    lon_b_rad = np.radians(lon_b)

    # Calculate the differences between the coordinates
    d_lat = lat_b_rad - lat_a_rad
    d_lon = lon_b_rad - lon_a_rad

    # Apply the Haversine formula
    a = np.sin(d_lat / 2) ** 2 + np.cos(lat_a_rad) * np.cos(lat_b_rad) * np.sin(d_lon / 2) ** 2
    c = 2 * np.arcsin(np.sqrt(a))
    r = 6371  # Radius of the Earth in kilometers

    # Calculate the distance
    distance = c * r

    return distance

# Calculate the distances to all hospitals for each place and store top 3 distances and hospital IDs
def calculate_top_3_distances(row):
    distances = haversine(row['latitude'], row['longitude'], df_hospitals['latitude'], df_hospitals['longitude'])
    sorted_indices = np.argsort(distances)
    top_3_distances = distances[sorted_indices][:3]
    top_3_hospital_ids = df_hospitals['hospital_id'].iloc[sorted_indices[:3]].values
    return pd.Series(
        list(top_3_distances) + list(top_3_hospital_ids),
        index=['nearest_distance_1', 'nearest_distance_2', 'nearest_distance_3', 'nearest_hospital_1', 'nearest_hospital_2', 'nearest_hospital_3']
    )

# Apply the function to each row in df_places
df_places[['nearest_distance_1', 'nearest_distance_2', 'nearest_distance_3', 'nearest_hospital_1', 'nearest_hospital_2', 'nearest_hospital_3']] = df_places.apply(calculate_top_3_distances, axis=1)

In [26]:
df_places.to_csv('../data/in/staging/atlas_places_with_distances_top_3.csv', index=False)

### Retrieve routes from API

In [74]:
df_places = pd.read_csv('../data/in/staging/atlas_places_with_distances_top_3.csv', dtype={'zip': str, 'nearest_hospital_1': str, 'nearest_hospital_2': str, 'nearest_hospital_3': str})
df_places = df_places.drop(columns=['zip', 'city_district', 'city', 'is_city', 'rural_district', 'nearest_distance_1', 'nearest_distance_2', 'nearest_distance_3'])

In [76]:
df_places

Unnamed: 0,latitude,longitude,nearest_hospital_1,nearest_hospital_2,nearest_hospital_3
0,54.78643,9.43339,772898,773287,771320
1,54.78643,9.43339,772898,773287,771320
2,54.79203,9.47724,772898,773287,771320
3,54.77985,9.41820,772898,773287,771320
4,54.77985,9.41820,772898,773287,771320
...,...,...,...,...,...
81944,51.00745,12.50337,773479,771016,772880
81945,51.00356,12.47453,773479,771016,772880
81946,50.99752,12.47565,773479,771016,772880
81947,51.01629,12.46924,773479,771016,773082


In [70]:
route_data = {}

for place_index in df_places[0:1000].index:
    route_data[place_index] = {'place_index': place_index}
    place = df_places.loc[place_index]
    lat_a = place['latitude']
    lon_a = place['longitude']
    k = 1
    for hospital_id in place[['nearest_hospital_1', 'nearest_hospital_2', 'nearest_hospital_3']]:
        print(k)
        hospital = df_hospitals[df_hospitals['hospital_id'] == hospital_id].iloc[0]
        lat_b = hospital['latitude']
        lon_b = hospital['longitude']
        request_url = f'https://router.project-osrm.org/route/v1/driving/{lon_a},{lat_a};{lon_b},{lat_b}?geometries=geojson&alternatives=false&steps=true&generate_hints=false'
        response = requests.get(request_url)
        duration = response.json()['routes'][0]['duration']
        distance = response.json()['routes'][0]['distance']
        time.sleep(1)
        route_data[place_index].update({f'hospital_{k}': hospital_id, f'route_duration_{k}': duration, f'route_distance_{k}': distance})
        
        k += 1
        


1
772898 - 107.6 sec - 880.6 m
2
773287 - 80.5 sec - 699.9 m
3
771320 - 142.1 sec - 1324.8 m
1
772898 - 107.6 sec - 880.6 m
2
773287 - 80.5 sec - 699.9 m
3
771320 - 142.1 sec - 1324.8 m


In [80]:
df_places_with_routes = pd.DataFrame(route_data).T

In [81]:
df_places_with_routes

Unnamed: 0,place_index,hospital_1,route_duration_1,route_distance_1,hospital_2,route_duration_2,route_distance_2,hospital_3,route_duration_3,route_distance_3
0,0,772898,107.6,880.6,773287,80.5,699.9,771320,142.1,1324.8
1,1,772898,107.6,880.6,773287,80.5,699.9,771320,142.1,1324.8
