# Generate hospital dataset

## Get mapping of existing hospitals from OpenStreetMap to cities

In [3]:
import requests
import json
import pandas as pd
import numpy as np

In [4]:
# spin-out from 'LK_data_clean.csv' & 'staedte_data.csv'
full_data = pd.read_csv('../../data/data_gatherer/full_population_data.csv')

In [5]:
datapath_hospitals = '../../data/hospitals/hospitals-realistic.json'

In [7]:
# helper function: coordinates to distance

from math import sin, asin, cos, sqrt, radians

# distance in kilometers between two positions
# position is given as spherical coordinates (lat, lon)

def get_distance(a, b):
    (lat1, lon1) = a
    (lat2, lon2) = b
    lon1, lat1, lon2, lat2 = map(radians, [lon1, lat1, lon2, lat2])
    dlon = lon2 - lon1
    dlat = lat2 - lat1
    a = sin(dlat / 2) ** 2 + cos(lat1) * cos(lat2) * sin(dlon / 2) ** 2
    return 2 * 6371.0 * asin(sqrt(a))

In [33]:
import json
from pprint import pprint

with open(datapath_hospitals) as f:
    hospitals = json.load(f)

In [10]:
mapping_dict = {}
for key,value in hospitals.items():
    a = (value['position']['lat'], value['position']['lon'])
    min_dist = 1e10
    best_city = ''
    for index, row in full_data.iterrows():
        b = (row['latitude'],row['longitude'])
        curr_dist = get_distance(a,b)
        if curr_dist < min_dist:
            min_dist = curr_dist
            best_city = row['stadt']
            BL = row['BL']
    mapping_dict[key] = {
        "stadt": best_city,
        "distance": min_dist,
        "BL": BL
    }

In [11]:
print(mapping_dict)

{'0': {'stadt': 'torgau', 'distance': 1.2044902731450768, 'BL': 'SN'}, '1': {'stadt': 'worms', 'distance': 8.882247469805518, 'BL': 'RP'}, '2': {'stadt': 'hof', 'distance': 26.52630913781155, 'BL': 'BY'}, '3': {'stadt': 'gera', 'distance': 2.272182457572733, 'BL': 'TH'}, '4': {'stadt': 'karlsruhe', 'distance': 4.0289634570543855, 'BL': 'BW'}, '5': {'stadt': 'donauworth', 'distance': 26.84089437254769, 'BL': 'BY'}, '6': {'stadt': 'saalfeld', 'distance': 18.874505691057763, 'BL': 'TH'}, '7': {'stadt': 'osnabruck', 'distance': 1.9866969063551654, 'BL': 'NI'}, '8': {'stadt': 'badtolz', 'distance': 19.15273309197755, 'BL': 'BY'}, '9': {'stadt': 'munchen', 'distance': 1.1424844684370898, 'BL': 'BY'}, '10': {'stadt': 'munchen', 'distance': 1.2948905427012425, 'BL': 'BY'}, '11': {'stadt': 'munchen', 'distance': 1.4203092748416233, 'BL': 'BY'}, '12': {'stadt': 'munchen', 'distance': 1.4111720725179087, 'BL': 'BY'}, '13': {'stadt': 'munchen', 'distance': 1.4283565387245956, 'BL': 'BY'}, '14': {'

## Check how many hospitals are assigned for each BL

In [13]:
kh_per_bl = {}
for key in mapping_dict:
    curr_bl = mapping_dict[key]['BL']
    if curr_bl not in kh_per_bl:
        kh_per_bl[curr_bl] = 1
    else:
        kh_per_bl[curr_bl] += 1
print(kh_per_bl)

{'SN': 10, 'RP': 16, 'BY': 152, 'TH': 15, 'BW': 65, 'NI': 31, 'HE': 36, 'NW': 144, 'HH': 1, 'BB': 17, 'SH': 18, 'ST': 8, 'SL': 8, 'BE': 3, 'MV': 8, 'HB': 1}


## Get hospitals per city

In [15]:
get_cities_kh = {}
for key in mapping_dict:
    curr_city = mapping_dict[key]['stadt']
    if curr_city not in get_cities_kh:
        get_cities_kh[curr_city] = 1
    else:
        get_cities_kh[curr_city] += 1

# Get number of cities per BL & Einwohner per BL

In [16]:
BL_list = ['BW','BY','BE','BB','HB','HH','HE','MV','NI','NW','RP','SL','SN','ST','SH','TH']

In [17]:
# Get mega dictionary with all relevant information per BL

pop_list = []
overall_pop_list = []
ratio_list = []
cities = []
positions = []
nr_kh_per_city = []


for index, bl in enumerate(BL_list):
    curr_data = full_data.loc[full_data['BL'] == bl]
    curr_pop_list = np.array(list(curr_data['population']))
    pop_list.append(curr_pop_list)
    overall_pop = sum(curr_pop_list)
    overall_pop_list.append(overall_pop)
    curr_ratio_list = curr_pop_list/overall_pop
    ratio_list.append(curr_ratio_list)
    curr_cities = np.array(list(curr_data['stadt']))
    cities.append(curr_cities)
    curr_latitude = np.array(list(curr_data['latitude']))
    curr_longitude = np.array(list(curr_data['longitude']))
    zpd = list(zip(curr_latitude,curr_longitude))
    positions.append(zpd)
    
    mylist = []
    for elm in curr_cities:
        if elm in get_cities_kh:
            curr_nr_kh_per_city = get_cities_kh[elm]
        else:
            curr_nr_kh_per_city = 0
        mylist.append(curr_nr_kh_per_city)
    nr_kh_per_city.append(mylist)

In [18]:
zpd_mega = list(zip(pop_list,ratio_list,cities,positions,nr_kh_per_city))

In [19]:
mega_bl_dict = {} 
for key in BL_list: 
    for index, value in enumerate(zpd_mega): 
        mega_bl_dict[key] = {
            "pop_insges": overall_pop_list[index],
            "pop_list": value[0],
            "pop_ratio": value[1],
            "city": value[2],
            "nr_kh_per_city": value[4],
            "city_positions": value[3]
        }
        zpd_mega.remove(value) 
        break 

In [34]:
print(mega_bl_dict)

{'BW': {'pop_insges': 8544640, 'pop_list': array([314002,  54802, 199742,  39164, 158397, 165383, 213920, 262795,
       117935, 216227, 257253, 132472, 464890, 729018, 285325,  11201,
       228639, 543984, 309626, 143535, 429479, 319271, 231018, 284285,
       286748, 139455, 195861, 130873, 591688, 132321, 316792, 212381,
       426158]), 'pop_ratio': array([0.03674842, 0.00641361, 0.02337629, 0.00458346, 0.01853759,
       0.01935517, 0.02503558, 0.03075554, 0.01380222, 0.02530557,
       0.03010694, 0.01550352, 0.05440721, 0.08531875, 0.03339228,
       0.00131088, 0.02675818, 0.06366377, 0.03623628, 0.01679825,
       0.05026297, 0.03736506, 0.0270366 , 0.03327056, 0.03355882,
       0.01632076, 0.02292209, 0.01531639, 0.06924669, 0.01548585,
       0.03707494, 0.02485546, 0.04987431]), 'city': array(['aalen', 'baden-baden', 'biberachanderriss', 'boblingen', 'calw',
       'emmendingen', 'freiburg', 'freiburgimbreisgau', 'freudenstadt',
       'friedrichshafen', 'goppingen', 'hei

# Generate new hospital locations based on population ratio

## Get hospital data

In [6]:
kh_capacity_data = pd.read_csv('../../data/data_gatherer/KH_BL_data.csv')
# source: https://www.destatis.de/DE/Themen/Gesellschaft-Umwelt/Gesundheit/Krankenhaeuser/_inhalt.html#sprg234206

In [21]:
BL_kuerzel = {
    'DL' : 'Deutschland',
    'BW' : 'Baden-Wurttemberg',
    'BY' : 'Bayern',
    'BE' : 'Berlin',
    'BB' : 'Brandenburg',
    'HB' : 'Bremen',
    'HH' : 'Hamburg',
    'HE' : 'Hessen',
    'MV' : 'Mecklenburg-Vorpommern',
    'NI' : 'Niedersachsen',
    'NW' : 'Nordrhein-Westfalen',
    'RP' : 'Rheinland-Pfalz',
    'SL' : 'Saarland',
    'SN' : 'Sachsen',
    'ST' : 'Sachsen-Anhalt',
    'SH' : 'Schleswig-Holstein',
    'TH' : 'Thuringen'
}

In [22]:
kh_capacity_data = kh_capacity_data.assign(BL = BL_kuerzel) 

KH_insges = np.array(list(kh_capacity_data['KH_insgesamt']))

ratio_kh_per_BL = KH_insges/KH_insges[0]
kh_capacity_data = kh_capacity_data.assign(Ratio_KH_per_BL = ratio_kh_per_BL)

## See how many hospitals per BL still need to be scheduled

In [24]:
df_kh = kh_capacity_data[['BL','KH_insgesamt','Ratio_KH_per_BL']]

In [25]:
bl = np.array(list(df_kh['BL']))
dictionary = dict(zip(bl, KH_insges))
del dictionary['DL']
print(dictionary)

{'BW': 265, 'BY': 354, 'BE': 83, 'BB': 57, 'HB': 14, 'HH': 58, 'HE': 159, 'MV': 39, 'NI': 180, 'NW': 344, 'RP': 87, 'SL': 23, 'SN': 77, 'ST': 48, 'SH': 111, 'TH': 43}


In [26]:
missing_kh_per_bl = {}
for key,value in dictionary.items():
    if key in kh_per_bl:
        missing_kh_per_bl[key] = value - kh_per_bl[key]
    else:
        missing_kh_per_bl[key] = value
print(missing_kh_per_bl)

{'BW': 200, 'BY': 202, 'BE': 80, 'BB': 40, 'HB': 13, 'HH': 57, 'HE': 123, 'MV': 31, 'NI': 149, 'NW': 200, 'RP': 71, 'SL': 15, 'SN': 67, 'ST': 40, 'SH': 93, 'TH': 28}


# Core: Generate hospitals in proportion to how many hospitals already exist in each city proportionally to population count

In [29]:
# Algorithm
# (1) get number of hospitals for a city/LK
# (2) find city/LK with the least hospitals per population
# (3) randomly place a hospital in a bounded box around the city
# (3) update hospital count for the given city/LK
# (4) reevaluate and loop until all hospitals are assigned


import random
new_hospitals = []

# added for reproducability
random.seed(42)

for key,value in missing_kh_per_bl.items():
    for i in range(value):
        curr_lst = mega_bl_dict[key]['nr_kh_per_city'] / mega_bl_dict[key]['pop_list']
        index_min = np.argmin(curr_lst)
        PT = mega_bl_dict[key]['city_positions'][index_min] #gets coordinates of city which should get new KH
        PT = (PT[0] + 0.1 - random.random()/5, PT[1] + 0.1 - random.random()/5)
        new_hospitals.append(PT)
        mega_bl_dict[key]['nr_kh_per_city'][index_min] += 1

In [31]:
# get coordinates from already existing hospitals
existing_hospitals = []
for key,value in hospitals.items():
    hosp = (value['position']['lat'], value['position']['lon'])
    existing_hospitals.append(hosp)

In [292]:
# merge into big hospital list
all_hospitals = new_hospitals + existing_hospitals

In [295]:
# dump to file
with open('../../data/data_gatherer/all_hospitals.txt', 'w') as file:
    file.write(str(all_hospitals))