In [47]:
import pandas as pd
import numpy as np
import seaborn as sns
import plotly.express as px
import matplotlib.pyplot as plt
from sklearn.preprocessing import OneHotEncoder
from sklearn.preprocessing import StandardScaler, MinMaxScaler
pd.set_option('display.max_columns', None)
# eye candy plots
plt.style.use('https://github.com/dhaitz/matplotlib-stylesheets/raw/master/pitayasmoothie-light.mplstyle')

In [48]:
df = pd.read_csv("../../data/processed/2_aggregated_pueblos.csv")
df.shape

(7840, 41)

In [49]:
df['province'].unique()
df[df.towns_in_vicinity.isnull()]

df = df.dropna(subset=['towns_in_vicinity'])


In [50]:
province_to_autonomous_community = {
    'Álava': 'País Vasco',
    'Albacete': 'Castilla-La Mancha',
    'Alicante': 'Comunidad Valenciana',
    'Almería': 'Andalucía',
    'Ávila': 'Castilla y León',
    'Badajoz': 'Extremadura',
    'Illes Balears': 'Illes Balears',
    'Barcelona': 'Cataluña',
    'Burgos': 'Castilla y León',
    'Cáceres': 'Extremadura',
    'Cádiz': 'Andalucía',
    'Castellón': 'Comunidad Valenciana',
    'Ciudad Real': 'Castilla-La Mancha',
    'Córdoba': 'Andalucía',
    'A Coruña': 'Galicia',
    'Cuenca': 'Castilla-La Mancha',
    'Girona': 'Cataluña',
    'Granada': 'Andalucía',
    'Guadalajara': 'Castilla-La Mancha',
    'Gipuzkoa': 'País Vasco',
    'Huelva': 'Andalucía',
    'Huesca': 'Aragón',
    'Jaén': 'Andalucía',
    'León': 'Castilla y León',
    'Lleida': 'Cataluña',
    'La Rioja': 'La Rioja',
    'Lugo': 'Galicia',
    'Madrid': 'Comunidad de Madrid',
    'Málaga': 'Andalucía',
    'Murcia': 'Murcia',
    'Navarra': 'Navarra',
    'Ourense': 'Galicia',
    'Asturias': 'Asturias',
    'Palencia': 'Castilla y León',
    'Pontevedra': 'Galicia',
    'Salamanca': 'Castilla y León',
    'Santa Cruz de Tenerife': 'Canarias',
    'Cantabria': 'Cantabria',
    'Segovia': 'Castilla y León',
    'Sevilla': 'Andalucía',
    'Soria': 'Castilla y León',
    'Tarragona': 'Cataluña',
    'Teruel': 'Aragón',
    'Toledo': 'Castilla-La Mancha',
    'Valencia': 'Comunidad Valenciana',
    'Valladolid': 'Castilla y León',
    'Bizkaia': 'País Vasco',
    'Zamora': 'Castilla y León',
    'Zaragoza': 'Aragón'
}

In [51]:
df['autonomous_community'] = df['province'].map(province_to_autonomous_community)

In [52]:
df = df.drop(columns=[col for col in df.columns if col.startswith('n_')])
df = df.sort_values(['latitude', 'longitude'], ascending=False)

## Scoring Hospitals

In [53]:
df.reset_index(drop=True, inplace=True)
df

Unnamed: 0,cmun,province,municipality,altitude,longitude,latitude,0-17,18-24,25-34,35-54,55+,male,female,total_population,nearest_airport,closest_distance_airport,nearest_train_station,closest_distance_train,closest_distance_hospital,nearest_hospital,closest_distance_school,nearest_school,koppen_climate,description,number_of_houses,town_size,connectivity_score,connectivity_category,lat_bin,lon_bin,towns_in_vicinity,economy_score,economy_score_area,autonomous_community
0,48067,Bizkaia,Muxika,18.95764,-2.692941,43.28967,302,96,101,463,584,810,736,1546,Aeropuerto de Bilbao,17.538797,Vitoria/Gasteiz,49.813235,12.530042,Hospital Galdakao-Usansolo,0.288898,Urretxindorra Hlhi,Cfb,Oceanic Climate - Mild temperatures and consis...,784.0,Mid,0.7180,Good,15,13,"1002, 1003, 1004, 1010, 1018, 1036, 1042, 1054...",4.10,16.823071,País Vasco
1,15064,A Coruña,Paderne,151.44100,-8.176355,43.28677,265,119,160,648,1199,1173,1218,2391,Aeropuerto de A Coruña,16.418949,Betanzos-Cidade,3.219046,18.060709,Hospital Maritimo De Oza,2.891490,CPR Plurilingüe Nuestra Señora del Carmen,Csb,"Mediterranean Warm Summer - Cooler, wet winter...",1277.0,Mid,0.8460,Excellent,15,2,"15001, 15003, 15006, 15009, 15010, 15021, 1502...",5.90,14.034524,Galicia
2,15039,A Coruña,Irixoa,362.87660,-8.058925,43.28474,119,61,95,364,694,666,667,1333,Aeropuerto de A Coruña,25.929988,Oza Dos Rios,12.729931,24.662261,Hospital Juan Cardona,2.337612,CPI Plurilingüe Virxe da Cela,Cfb,Oceanic Climate - Mild temperatures and consis...,788.0,Mid,0.6420,Good,15,2,"15001, 15003, 15009, 15010, 15021, 15024, 1502...",0.00,8.933750,Galicia
3,48062,Bizkaia,Mendata,107.88860,-2.633333,43.28333,58,28,32,113,151,201,181,382,Aeropuerto de Bilbao,22.420417,Vitoria/Gasteiz,49.185946,16.396774,Hospital Galdakao-Usansolo,3.845322,Munitibar Hlhi,Cfb,Oceanic Climate - Mild temperatures and consis...,213.0,Small,0.7410,Good,15,13,"1002, 1003, 1018, 1036, 1042, 1063, 20003, 200...",0.00,15.153360,País Vasco
4,48905,Bizkaia,Zamudio,44.55854,-2.866667,43.28333,528,226,291,1016,1251,1650,1662,3312,Aeropuerto de Bilbao,3.998229,Vitoria/Gasteiz,51.562825,4.828458,Imq Clínica Virgen Blanca,0.272147,Zamudio Hlhi,Cfb,Oceanic Climate - Mild temperatures and consis...,1497.0,Big,0.9425,Excellent,15,12,"1002, 1003, 1004, 1010, 1018, 1036, 1042, 1054...",35.25,31.005961,País Vasco
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7611,11007,Cádiz,Barbate,15.97273,-5.918903,36.19282,3720,1797,2572,6793,7843,11326,11399,22725,Helipuerto de Algeciras,43.604312,Los Barrios,41.263907,28.159006,Hospital Viamed Novo Sancti Petri,0.084531,C.E.I.P. Estrella del Mar,Csa,"Mediterranean Hot Summer - Hot, dry summers an...",11870.0,Big,0.9915,Excellent,1,6,"11001, 11014, 11015, 11025, 11031, 11035, 1103...",69.35,98.668750,Andalucía
7612,11022,Cádiz,"Línea de la Concepción, La",10.40490,-5.348256,36.16118,11924,4933,8008,19871,20251,32036,32951,64987,Helipuerto de Algeciras,9.068408,San Roque-La Linea,9.561508,1.236263,Hospital De La Línea De La Concepción,0.343316,C.D.P. Providencia del Sagrado Corazón,Csa,"Mediterranean Hot Summer - Hot, dry summers an...",29253.0,Big,0.9830,Excellent,1,7,"11004, 11013, 11021, 11033, 11035, 29041, 2905...",167.90,160.098442,Andalucía
7613,11004,Cádiz,Algeciras,22.40544,-5.447698,36.12978,24311,10234,13808,38778,37916,61554,63493,125047,Helipuerto de Algeciras,0.632320,Algeciras,0.383585,2.095711,Hospital Punta De Europa,0.187878,I.E.S. Levante,Csa,"Mediterranean Hot Summer - Hot, dry summers an...",54701.0,Big,0.9525,Excellent,1,7,"11013, 11021, 11022, 11033, 11035, 29041, 2906...",383.25,248.112500,Andalucía
7614,11035,Cádiz,Tarifa,14.66109,-5.602954,36.01271,2983,1261,2021,6330,6062,9325,9332,18657,Helipuerto de Algeciras,19.477945,Algeciras,18.758218,17.964609,Hospital Punta De Europa,0.239704,C.E.I.P. Guzmán El Bueno,Csa,"Mediterranean Hot Summer - Hot, dry summers an...",12100.0,Big,0.9335,Excellent,1,7,"11004, 11007, 11013, 11022, 11033, 51001",92.60,134.566667,Andalucía


In [54]:
# first step

# first step

# get summary statistics to decide on bin thresholds
print(df['closest_distance_hospital'].describe())
# define bins for the hospital distance (adjust thresholds as needed)
bins = [df['closest_distance_hospital'].min() - 0.001, 5, 10, 15, 20, 25, 30, df['closest_distance_hospital'].max()]
labels = ['<5', '5-10', '10-15', '15-20', '20-25', '25-30', '30+']

# bin the values into categories
df['closest_distance_hospital_bin'] = pd.cut(df['closest_distance_hospital'], bins=bins, labels=labels, include_lowest=True)
print(df['closest_distance_hospital_bin'].value_counts().sort_index())

# apply score based on the bins (10 is best)
score_mapping = {
    '<5': 10,
    '5-10': 8,
    '10-15': 6,
    '15-20': 4,
    '20-25': 3,
    '25-30': 2,
    '30+': 1
}

df['hospital_distance_score'] = df['closest_distance_hospital_bin'].map(score_mapping)

# second step
def combine_scores(row):
    base_score = row["hospital_distance_score"]
    vicinity = row["towns_in_vicinity"]
    if vicinity:
        neighbor_scores = df.loc[df["cmun"].isin([vicinity]), "hospital_distance_score"]
        if not neighbor_scores.empty:
            avg_neighbor_score = neighbor_scores.mean()
        else:
            avg_neighbor_score = 0
        return (base_score + avg_neighbor_score) / 2
    else:
        return base_score

df["hospital_score"] = df.apply(combine_scores, axis=1)

count    7616.000000
mean       25.076989
std        15.696044
min         0.010411
25%        12.547082
50%        22.991133
75%        34.921442
max        95.093729
Name: closest_distance_hospital, dtype: float64
closest_distance_hospital_bin
<5        554
5-10      832
10-15     963
15-20     932
20-25     867
25-30     814
30+      2654
Name: count, dtype: int64


In [55]:
df

Unnamed: 0,cmun,province,municipality,altitude,longitude,latitude,0-17,18-24,25-34,35-54,55+,male,female,total_population,nearest_airport,closest_distance_airport,nearest_train_station,closest_distance_train,closest_distance_hospital,nearest_hospital,closest_distance_school,nearest_school,koppen_climate,description,number_of_houses,town_size,connectivity_score,connectivity_category,lat_bin,lon_bin,towns_in_vicinity,economy_score,economy_score_area,autonomous_community,closest_distance_hospital_bin,hospital_distance_score,hospital_score
0,48067,Bizkaia,Muxika,18.95764,-2.692941,43.28967,302,96,101,463,584,810,736,1546,Aeropuerto de Bilbao,17.538797,Vitoria/Gasteiz,49.813235,12.530042,Hospital Galdakao-Usansolo,0.288898,Urretxindorra Hlhi,Cfb,Oceanic Climate - Mild temperatures and consis...,784.0,Mid,0.7180,Good,15,13,"1002, 1003, 1004, 1010, 1018, 1036, 1042, 1054...",4.10,16.823071,País Vasco,10-15,6,3.0
1,15064,A Coruña,Paderne,151.44100,-8.176355,43.28677,265,119,160,648,1199,1173,1218,2391,Aeropuerto de A Coruña,16.418949,Betanzos-Cidade,3.219046,18.060709,Hospital Maritimo De Oza,2.891490,CPR Plurilingüe Nuestra Señora del Carmen,Csb,"Mediterranean Warm Summer - Cooler, wet winter...",1277.0,Mid,0.8460,Excellent,15,2,"15001, 15003, 15006, 15009, 15010, 15021, 1502...",5.90,14.034524,Galicia,15-20,4,2.0
2,15039,A Coruña,Irixoa,362.87660,-8.058925,43.28474,119,61,95,364,694,666,667,1333,Aeropuerto de A Coruña,25.929988,Oza Dos Rios,12.729931,24.662261,Hospital Juan Cardona,2.337612,CPI Plurilingüe Virxe da Cela,Cfb,Oceanic Climate - Mild temperatures and consis...,788.0,Mid,0.6420,Good,15,2,"15001, 15003, 15009, 15010, 15021, 15024, 1502...",0.00,8.933750,Galicia,20-25,3,1.5
3,48062,Bizkaia,Mendata,107.88860,-2.633333,43.28333,58,28,32,113,151,201,181,382,Aeropuerto de Bilbao,22.420417,Vitoria/Gasteiz,49.185946,16.396774,Hospital Galdakao-Usansolo,3.845322,Munitibar Hlhi,Cfb,Oceanic Climate - Mild temperatures and consis...,213.0,Small,0.7410,Good,15,13,"1002, 1003, 1018, 1036, 1042, 1063, 20003, 200...",0.00,15.153360,País Vasco,15-20,4,2.0
4,48905,Bizkaia,Zamudio,44.55854,-2.866667,43.28333,528,226,291,1016,1251,1650,1662,3312,Aeropuerto de Bilbao,3.998229,Vitoria/Gasteiz,51.562825,4.828458,Imq Clínica Virgen Blanca,0.272147,Zamudio Hlhi,Cfb,Oceanic Climate - Mild temperatures and consis...,1497.0,Big,0.9425,Excellent,15,12,"1002, 1003, 1004, 1010, 1018, 1036, 1042, 1054...",35.25,31.005961,País Vasco,<5,10,5.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7611,11007,Cádiz,Barbate,15.97273,-5.918903,36.19282,3720,1797,2572,6793,7843,11326,11399,22725,Helipuerto de Algeciras,43.604312,Los Barrios,41.263907,28.159006,Hospital Viamed Novo Sancti Petri,0.084531,C.E.I.P. Estrella del Mar,Csa,"Mediterranean Hot Summer - Hot, dry summers an...",11870.0,Big,0.9915,Excellent,1,6,"11001, 11014, 11015, 11025, 11031, 11035, 1103...",69.35,98.668750,Andalucía,25-30,2,1.0
7612,11022,Cádiz,"Línea de la Concepción, La",10.40490,-5.348256,36.16118,11924,4933,8008,19871,20251,32036,32951,64987,Helipuerto de Algeciras,9.068408,San Roque-La Linea,9.561508,1.236263,Hospital De La Línea De La Concepción,0.343316,C.D.P. Providencia del Sagrado Corazón,Csa,"Mediterranean Hot Summer - Hot, dry summers an...",29253.0,Big,0.9830,Excellent,1,7,"11004, 11013, 11021, 11033, 11035, 29041, 2905...",167.90,160.098442,Andalucía,<5,10,5.0
7613,11004,Cádiz,Algeciras,22.40544,-5.447698,36.12978,24311,10234,13808,38778,37916,61554,63493,125047,Helipuerto de Algeciras,0.632320,Algeciras,0.383585,2.095711,Hospital Punta De Europa,0.187878,I.E.S. Levante,Csa,"Mediterranean Hot Summer - Hot, dry summers an...",54701.0,Big,0.9525,Excellent,1,7,"11013, 11021, 11022, 11033, 11035, 29041, 2906...",383.25,248.112500,Andalucía,<5,10,5.0
7614,11035,Cádiz,Tarifa,14.66109,-5.602954,36.01271,2983,1261,2021,6330,6062,9325,9332,18657,Helipuerto de Algeciras,19.477945,Algeciras,18.758218,17.964609,Hospital Punta De Europa,0.239704,C.E.I.P. Guzmán El Bueno,Csa,"Mediterranean Hot Summer - Hot, dry summers an...",12100.0,Big,0.9335,Excellent,1,7,"11004, 11007, 11013, 11022, 11033, 51001",92.60,134.566667,Andalucía,15-20,4,2.0
