In [77]:
import pandas as pd
import plotly.express as px
import numpy as np

In [78]:
df = pd.read_csv("../data/raw/hospitals_spain.csv")
df.columns

Index(['OBJECTID', 'CODCNH', 'NOMBRE', 'DIRECCION', 'TELEFONO', 'TELEFONO2',
       'TELEFAX', 'CODMU', 'MUNICIPIOS', 'CODPROV', 'PROVINCIAS', 'CODAUTO',
       'COMUNIDADES', 'CODPOSTAL', 'NCAMAS', 'CODFI', 'FINALIDAD_ASISITENCIAL',
       'CODPAT', 'DEPENDENCIA_PATRIMONIAL', 'CODFU', 'DEPENDENCIA_FUNCIONAL',
       'ACREDOCENT', 'ESCOMPLE', 'FORCOMPLE', 'CODIDCOM', 'ALTA', 'CERRADO',
       'CAPITAL', 'CIERREFECH', 'CONCIERTO', 'EMAIL', 'TAC', 'RM', 'GAM',
       'HEM', 'ASD', 'LIT', 'BCO', 'ALI', 'SPECT', 'PET', 'MAMOS', 'DO',
       'DIAL', 'X', 'Y', 'CalidadGeocodificacion'],
      dtype='object')

The column names in the hospitals dataset are:

- **Identification**
    - `OBJECTID`: Object identifier
    - `CODCNH`: Hospital code
    - `NOMBRE`: Hospital name
    - `CODIDCOM`: Community identifier code

- **Contact Information**
    - `DIRECCION`: Address
    - `TELEFONO`: Phone
    - `TELEFONO2`: Secondary phone
    - `TELEFAX`: Fax
    - `EMAIL`: Email address
    - `CODPOSTAL`: Postal code

- **Location**
    - `CODMU`: Municipality code
    - `MUNICIPIOS`: Municipality name 
    - `CODPROV`: Province code
    - `PROVINCIAS`: Province name
    - `CODAUTO`: Autonomous community code 
    - `COMUNIDADES`: Autonomous community name
    - `X`: Longitude coordinate
    - `Y`: Latitude coordinate
    - `CalidadGeocodificacion`: Geocoding quality
    - `CAPITAL`: Capital city indicator

- **Hospital Information**
    - `NCAMAS`: Number of beds
    - `CODFI`: Healthcare purpose code
    - `FINALIDAD_ASISITENCIAL`: Healthcare purpose description
    - `CODPAT`: Asset ownership code
    - `DEPENDENCIA_PATRIMONIAL`: Asset ownership description
    - `CODFU`: Functional dependency code
    - `DEPENDENCIA_FUNCIONAL`: Functional dependency description

- **Status & Certifications**
    - `ACREDOCENT`: Teaching accreditation
    - `ESCOMPLE`: Complementary center
    - `FORCOMPLE`: Complementary form
    - `ALTA`: Active status
    - `CERRADO`: Closed status
    - `CIERREFECH`: Closing date
    - `CONCIERTO`: Agreement status

- **Equipment & Services**
    - `TAC`: CT scanner
    - `RM`: MRI
    - `GAM`: Gamma camera
    - `HEM`: Hemodynamics
    - `ASD`: Assistant diagnostic systems
    - `LIT`: Lithotripsy
    - `BCO`: Blood bank
    - `ALI`: Other
    - `SPECT`: SPECT scanner
    - `PET`: PET scanner
    - `MAMOS`: Mammography
    - `DO`: Other diagnostic
    - `DIAL`: Dialysis

In [79]:
df.columns

Index(['OBJECTID', 'CODCNH', 'NOMBRE', 'DIRECCION', 'TELEFONO', 'TELEFONO2',
       'TELEFAX', 'CODMU', 'MUNICIPIOS', 'CODPROV', 'PROVINCIAS', 'CODAUTO',
       'COMUNIDADES', 'CODPOSTAL', 'NCAMAS', 'CODFI', 'FINALIDAD_ASISITENCIAL',
       'CODPAT', 'DEPENDENCIA_PATRIMONIAL', 'CODFU', 'DEPENDENCIA_FUNCIONAL',
       'ACREDOCENT', 'ESCOMPLE', 'FORCOMPLE', 'CODIDCOM', 'ALTA', 'CERRADO',
       'CAPITAL', 'CIERREFECH', 'CONCIERTO', 'EMAIL', 'TAC', 'RM', 'GAM',
       'HEM', 'ASD', 'LIT', 'BCO', 'ALI', 'SPECT', 'PET', 'MAMOS', 'DO',
       'DIAL', 'X', 'Y', 'CalidadGeocodificacion'],
      dtype='object')

In [80]:
df.rename(columns={"NOMBRE": "hospital_name", 
                   "CODMU": "cmun",
                   "MUNICIPIOS": "municipality", 
                   "NCAMAS": "n_beds",
                   "FINALIDAD_ASISITENCIAL": "type", 
                   "DEPENDENCIA_PATRIMONIAL": "management",  
                   "X": "latitude", 
                   "Y": "longitude"}, inplace=True)


In [81]:
hospital_columns = [
    'hospital_name',
    "municipality",
    'cmun',
    'n_beds',
    'type',
    'management',
    'latitude',
    'longitude'
]
df = df[hospital_columns]


In [82]:
df.sample(10)

Unnamed: 0,hospital_name,municipality,cmun,n_beds,type,management,latitude,longitude
611,HOSPITAL COMARCAL DEL NOROESTE,Caravaca de la Cruz,300158,103,GENERAL,SEGURIDAD SOCIAL,-1.867384,38.10379
663,"HOSPITAL BEGOÑA DE GIJÓN, S.L.",Gijón,330241,41,GENERAL,PRIVADO NO BENÉFICO,-5.647629,43.53523
833,HOSPITAL PROVINCIAL NUESTRA SEÑORA DE LA MISER...,Toledo,451685,0,GENERAL,DIPUTACIÓN O CABILDO,-4.014511,39.861311
632,CENTRO SAN FRANCISCO JAVIER,Pamplona/Iruña,312016,28,PSIQUIÁTRICO,COMUNIDAD AUTÓNOMA,-1.624813,42.833333
461,FUNDACIÓN HOSPITAL CALAHORRA,Calahorra,260368,80,GENERAL,SEGURIDAD SOCIAL,-1.98034,42.313527
608,HOSPITAL MESA DEL CASTILLO,Murcia,300308,76,MÉDICO-QUIRÚRGICO,PRIVADO NO BENÉFICO,-1.129509,37.968679
73,HOSPITAL PARQUE VEGAS ALTAS,Don Benito,60444,21,GENERAL,PRIVADO NO BENÉFICO,-5.856511,38.963341
390,ORGANIZACIÓN SANITARIA INTEGRADA BIDASOA (OSI ...,Hondarribia,200369,101,GENERAL,COMUNIDAD AUTÓNOMA,-1.814776,43.342131
532,CLINICA CEMTRO,Madrid,280796,94,MÉDICO-QUIRÚRGICO,PRIVADO NO BENÉFICO,-3.716977,40.493514
639,HOSPITAL GARCÍA ORCOYEN,Estella-Lizarra,310977,93,GENERAL,COMUNIDAD AUTÓNOMA,-2.038905,42.683316


In [83]:
df['hospital_name'] =df['hospital_name'].str.title()
df['municipality'] = df['municipality'].str.lower()
df['type'] = df['type'].str.capitalize()
df['management'] = df['management'].str.title()

In [84]:
df["type"].unique()

array(['Psiquiátrico', 'Médico-quirúrgico', 'General',
       'Geriatría y/o larga estancia', 'Rehabilitación psicofísica',
       'Materno-infantil', 'Quirúrgico',
       'Traumatología y/o rehabilitación', 'Otra finalidad', 'Infantil',
       'Otros monográficos', 'Oftálmico u orl', 'Oncológico', 'Maternal'],
      dtype=object)

In [85]:
translation_dict = {
    'Psiquiátrico': 'Psychiatric',
    'Médico-quirúrgico': 'Medical-surgical',
    'General': 'General',
    'Geriatría y/o larga estancia': 'Geriatrics and/or long-term care',
    'Rehabilitación psicofísica': 'Psychophysical rehabilitation',
    'Materno-infantil': 'Maternal-infant',
    'Quirúrgico': 'Surgical',
    'Traumatología y/o rehabilitación': 'Traumatology and/or rehabilitation',
    'Otra finalidad': 'Other purpose',
    'Infantil': 'Pediatric',
    'Otros monográficos': 'Other specialized',
    'Oftálmico u orl': 'Ophthalmologic or ENT',
    'Oncológico': 'Oncological',
    'Maternal': 'Maternity'
}


In [86]:
df['type'] = df['type'].map(translation_dict)

In [87]:
df["management"].unique()

array(['Comunidad Autónoma', 'Privado No Benéfico', 'Seguridad Social',
       'Diputación O Cabildo', 'Otro Privado Benéfico',
       'Ministerio De Interior', 'Entidades Públicas',
       'Privado-Benéfico (Iglesia)', 'Municipio', 'Matep',
       'Privado-Benéfico (Cruz Roja)', 'Otra Dependencia Patrimonial',
       'Ministerio De Defensa'], dtype=object)

In [88]:
management_translation_dict = {
    'Comunidad Autónoma': 'Autonomous Community',
    'Privado No Benéfico': 'Private Non-Profit',
    'Seguridad Social': 'Social Security',
    'Diputación O Cabildo': 'Provincial Council or Island Council',
    'Otro Privado Benéfico': 'Other Private Non-Profit',
    'Ministerio De Interior': 'Ministry of Interior',
    'Entidades Públicas': 'Public Entities',
    'Privado-Benéfico (Iglesia)': 'Private Non-Profit (Church)',
    'Municipio': 'Municipality',
    'Matep': 'Matep',  # Assuming Matep is a specific term or acronym.
    'Privado-Benéfico (Cruz Roja)': 'Private Non-Profit (Red Cross)',
    'Otra Dependencia Patrimonial': 'Other Patrimonial Dependency',
    'Ministerio De Defensa': 'Ministry of Defense'
}


In [89]:
df['management'] = df['management'].map(management_translation_dict)

In [90]:
df.head()

Unnamed: 0,hospital_name,municipality,cmun,n_beds,type,management,latitude,longitude
0,Red De Salud Mental De Araba (Hospital Psiquiá...,vitoria-gasteiz,10590,207,Psychiatric,Autonomous Community,-2.678612,42.835656
1,Hospital San José,vitoria-gasteiz,10590,63,Medical-surgical,Private Non-Profit,-2.67664,42.849661
2,Hospital Quirónsalud Vitoria,vitoria-gasteiz,10590,26,General,Private Non-Profit,-2.668035,42.849761
3,"Hospital De Cuidados San Onofre, S.L. (Hospita...",vitoria-gasteiz,10590,82,Geriatrics and/or long-term care,Private Non-Profit,-2.680386,42.837582
4,Hospital De Leza,laguardia,10318,63,General,Autonomous Community,-2.638635,42.575989


In [91]:
df

Unnamed: 0,hospital_name,municipality,cmun,n_beds,type,management,latitude,longitude
0,Red De Salud Mental De Araba (Hospital Psiquiá...,vitoria-gasteiz,10590,207,Psychiatric,Autonomous Community,-2.678612,42.835656
1,Hospital San José,vitoria-gasteiz,10590,63,Medical-surgical,Private Non-Profit,-2.676640,42.849661
2,Hospital Quirónsalud Vitoria,vitoria-gasteiz,10590,26,General,Private Non-Profit,-2.668035,42.849761
3,"Hospital De Cuidados San Onofre, S.L. (Hospita...",vitoria-gasteiz,10590,82,Geriatrics and/or long-term care,Private Non-Profit,-2.680386,42.837582
4,Hospital De Leza,laguardia,10318,63,General,Autonomous Community,-2.638635,42.575989
...,...,...,...,...,...,...,...,...
919,Hospital De Rehabilitacion Psiquiatrica Prisma,zaragoza,502973,50,Psychiatric,Private Non-Profit,-0.841290,41.711449
920,Centro Sanitario Cinco Villas,ejea de los caballeros,500956,32,Other purpose,Autonomous Community,-1.125290,42.126721
921,Hospital Viamed Montecanal,zaragoza,502973,68,General,Private Non-Profit,-0.948738,41.639042
922,Hospital Universitario De Ceuta,ceuta,510013,252,General,Social Security,-5.341853,35.880332


In [92]:
df["cmun"] = df["cmun"].apply(lambda x: int(str(x)[:-1]) if len(str(x)) > 5 else x)


In [93]:
df

Unnamed: 0,hospital_name,municipality,cmun,n_beds,type,management,latitude,longitude
0,Red De Salud Mental De Araba (Hospital Psiquiá...,vitoria-gasteiz,10590,207,Psychiatric,Autonomous Community,-2.678612,42.835656
1,Hospital San José,vitoria-gasteiz,10590,63,Medical-surgical,Private Non-Profit,-2.676640,42.849661
2,Hospital Quirónsalud Vitoria,vitoria-gasteiz,10590,26,General,Private Non-Profit,-2.668035,42.849761
3,"Hospital De Cuidados San Onofre, S.L. (Hospita...",vitoria-gasteiz,10590,82,Geriatrics and/or long-term care,Private Non-Profit,-2.680386,42.837582
4,Hospital De Leza,laguardia,10318,63,General,Autonomous Community,-2.638635,42.575989
...,...,...,...,...,...,...,...,...
919,Hospital De Rehabilitacion Psiquiatrica Prisma,zaragoza,50297,50,Psychiatric,Private Non-Profit,-0.841290,41.711449
920,Centro Sanitario Cinco Villas,ejea de los caballeros,50095,32,Other purpose,Autonomous Community,-1.125290,42.126721
921,Hospital Viamed Montecanal,zaragoza,50297,68,General,Private Non-Profit,-0.948738,41.639042
922,Hospital Universitario De Ceuta,ceuta,51001,252,General,Social Security,-5.341853,35.880332


In [94]:
df.isna().sum()

hospital_name    0
municipality     0
cmun             0
n_beds           0
type             0
management       0
latitude         0
longitude        0
dtype: int64

In [95]:
df.to_csv("../data/processed/filtered_hospitals.csv", index=False)