In [1]:
import pandas as pd
import plotly.express as px
import numpy as np

In [2]:
df = pd.read_csv("../data/raw/hospitals_spain.csv")
df.columns

Index(['OBJECTID', 'CODCNH', 'NOMBRE', 'DIRECCION', 'TELEFONO', 'TELEFONO2',
       'TELEFAX', 'CODMU', 'MUNICIPIOS', 'CODPROV', 'PROVINCIAS', 'CODAUTO',
       'COMUNIDADES', 'CODPOSTAL', 'NCAMAS', 'CODFI', 'FINALIDAD_ASISITENCIAL',
       'CODPAT', 'DEPENDENCIA_PATRIMONIAL', 'CODFU', 'DEPENDENCIA_FUNCIONAL',
       'ACREDOCENT', 'ESCOMPLE', 'FORCOMPLE', 'CODIDCOM', 'ALTA', 'CERRADO',
       'CAPITAL', 'CIERREFECH', 'CONCIERTO', 'EMAIL', 'TAC', 'RM', 'GAM',
       'HEM', 'ASD', 'LIT', 'BCO', 'ALI', 'SPECT', 'PET', 'MAMOS', 'DO',
       'DIAL', 'X', 'Y', 'CalidadGeocodificacion'],
      dtype='object')

The column names in the hospitals dataset are:

- **Identification**
    - `OBJECTID`: Object identifier
    - `CODCNH`: Hospital code
    - `NOMBRE`: Hospital name
    - `CODIDCOM`: Community identifier code

- **Contact Information**
    - `DIRECCION`: Address
    - `TELEFONO`: Phone
    - `TELEFONO2`: Secondary phone
    - `TELEFAX`: Fax
    - `EMAIL`: Email address
    - `CODPOSTAL`: Postal code

- **Location**
    - `CODMU`: Municipality code
    - `MUNICIPIOS`: Municipality name 
    - `CODPROV`: Province code
    - `PROVINCIAS`: Province name
    - `CODAUTO`: Autonomous community code 
    - `COMUNIDADES`: Autonomous community name
    - `X`: Longitude coordinate
    - `Y`: Latitude coordinate
    - `CalidadGeocodificacion`: Geocoding quality
    - `CAPITAL`: Capital city indicator

- **Hospital Information**
    - `NCAMAS`: Number of beds
    - `CODFI`: Healthcare purpose code
    - `FINALIDAD_ASISITENCIAL`: Healthcare purpose description
    - `CODPAT`: Asset ownership code
    - `DEPENDENCIA_PATRIMONIAL`: Asset ownership description
    - `CODFU`: Functional dependency code
    - `DEPENDENCIA_FUNCIONAL`: Functional dependency description

- **Status & Certifications**
    - `ACREDOCENT`: Teaching accreditation
    - `ESCOMPLE`: Complementary center
    - `FORCOMPLE`: Complementary form
    - `ALTA`: Active status
    - `CERRADO`: Closed status
    - `CIERREFECH`: Closing date
    - `CONCIERTO`: Agreement status

- **Equipment & Services**
    - `TAC`: CT scanner
    - `RM`: MRI
    - `GAM`: Gamma camera
    - `HEM`: Hemodynamics
    - `ASD`: Assistant diagnostic systems
    - `LIT`: Lithotripsy
    - `BCO`: Blood bank
    - `ALI`: Other
    - `SPECT`: SPECT scanner
    - `PET`: PET scanner
    - `MAMOS`: Mammography
    - `DO`: Other diagnostic
    - `DIAL`: Dialysis

In [3]:
df.columns

Index(['OBJECTID', 'CODCNH', 'NOMBRE', 'DIRECCION', 'TELEFONO', 'TELEFONO2',
       'TELEFAX', 'CODMU', 'MUNICIPIOS', 'CODPROV', 'PROVINCIAS', 'CODAUTO',
       'COMUNIDADES', 'CODPOSTAL', 'NCAMAS', 'CODFI', 'FINALIDAD_ASISITENCIAL',
       'CODPAT', 'DEPENDENCIA_PATRIMONIAL', 'CODFU', 'DEPENDENCIA_FUNCIONAL',
       'ACREDOCENT', 'ESCOMPLE', 'FORCOMPLE', 'CODIDCOM', 'ALTA', 'CERRADO',
       'CAPITAL', 'CIERREFECH', 'CONCIERTO', 'EMAIL', 'TAC', 'RM', 'GAM',
       'HEM', 'ASD', 'LIT', 'BCO', 'ALI', 'SPECT', 'PET', 'MAMOS', 'DO',
       'DIAL', 'X', 'Y', 'CalidadGeocodificacion'],
      dtype='object')

In [4]:
df.rename(columns={"NOMBRE": "hospital_name", 
                   "CODMU": "cmun",
                   "MUNICIPIOS": "municipality", 
                   "NCAMAS": "n_beds",
                   "FINALIDAD_ASISITENCIAL": "type", 
                   "DEPENDENCIA_PATRIMONIAL": "management",  
                   "X": "longitude", 
                   "Y": "latitude"}, inplace=True)


In [5]:
hospital_columns = [
    'hospital_name',
    "municipality",
    'cmun',
    'n_beds',
    'type',
    'management',
    'latitude',
    'longitude'
]
df = df[hospital_columns]


In [6]:
df.sample(10)

Unnamed: 0,hospital_name,municipality,cmun,n_beds,type,management,latitude,longitude
461,FUNDACIÓN HOSPITAL CALAHORRA,Calahorra,260368,80,GENERAL,SEGURIDAD SOCIAL,42.313527,-1.98034
800,COMPLEJO ASISTENCIAL DE SORIA,Soria,421736,319,GENERAL,ENTIDADES PÚBLICAS,41.771349,-2.472561
674,CENTRO TERAPÉUTICO VISTA ALEGRE,Gijón,330241,28,PSIQUIÁTRICO,PRIVADO NO BENÉFICO,43.532465,-5.628416
177,PRYTANIS SANT BOI CENTRE SOCIOSANITARI,Sant Boi de Llobregat,82009,122,GERIATRÍA Y/O LARGA ESTANCIA,PRIVADO NO BENÉFICO,41.348794,2.041203
480,HOSPITAL INFANTIL UNIVERSITARIO NIÑO JESUS,Madrid,280796,170,INFANTIL,COMUNIDAD AUTÓNOMA,40.414357,-3.676089
118,INSTITUT GUTTMANN,Badalona,80155,152,TRAUMATOLOGÍA Y/O REHABILITACIÓN,OTRO PRIVADO BENÉFICO,41.480006,2.239542
120,HESTIA PALAU.,Barcelona,80193,317,GERIATRÍA Y/O LARGA ESTANCIA,PRIVADO NO BENÉFICO,41.410048,2.172578
392,HOSPITAL AITA MENNI,Arrasate/Mondragón,200551,442,PSIQUIÁTRICO,PRIVADO-BENÉFICO (IGLESIA),43.063076,-2.529712
501,HOSPITAL RUBER JUAN BRAVO 39,Madrid,280796,70,GENERAL,PRIVADO NO BENÉFICO,40.432636,-3.677759
229,CENTRE GERONTOLÒGIC AMMA SANT CUGAT,Sant Cugat del Vallès,82055,4,GERIATRÍA Y/O LARGA ESTANCIA,PRIVADO NO BENÉFICO,41.484049,2.085441


In [7]:
df['hospital_name'] =df['hospital_name'].str.title()
df['municipality'] = df['municipality'].str.lower()
df['type'] = df['type'].str.capitalize()
df['management'] = df['management'].str.title()

In [8]:
df["type"].unique()

array(['Psiquiátrico', 'Médico-quirúrgico', 'General',
       'Geriatría y/o larga estancia', 'Rehabilitación psicofísica',
       'Materno-infantil', 'Quirúrgico',
       'Traumatología y/o rehabilitación', 'Otra finalidad', 'Infantil',
       'Otros monográficos', 'Oftálmico u orl', 'Oncológico', 'Maternal'],
      dtype=object)

In [9]:
translation_dict = {
    'Psiquiátrico': 'Psychiatric',
    'Médico-quirúrgico': 'Medical-surgical',
    'General': 'General',
    'Geriatría y/o larga estancia': 'Geriatrics and/or long-term care',
    'Rehabilitación psicofísica': 'Psychophysical rehabilitation',
    'Materno-infantil': 'Maternal-infant',
    'Quirúrgico': 'Surgical',
    'Traumatología y/o rehabilitación': 'Traumatology and/or rehabilitation',
    'Otra finalidad': 'Other purpose',
    'Infantil': 'Pediatric',
    'Otros monográficos': 'Other specialized',
    'Oftálmico u orl': 'Ophthalmologic or ENT',
    'Oncológico': 'Oncological',
    'Maternal': 'Maternity'
}


In [10]:
df['type'] = df['type'].map(translation_dict)

In [11]:
df["management"].unique()

array(['Comunidad Autónoma', 'Privado No Benéfico', 'Seguridad Social',
       'Diputación O Cabildo', 'Otro Privado Benéfico',
       'Ministerio De Interior', 'Entidades Públicas',
       'Privado-Benéfico (Iglesia)', 'Municipio', 'Matep',
       'Privado-Benéfico (Cruz Roja)', 'Otra Dependencia Patrimonial',
       'Ministerio De Defensa'], dtype=object)

In [12]:
management_translation_dict = {
    'Comunidad Autónoma': 'Autonomous Community',
    'Privado No Benéfico': 'Private Non-Profit',
    'Seguridad Social': 'Social Security',
    'Diputación O Cabildo': 'Provincial Council or Island Council',
    'Otro Privado Benéfico': 'Other Private Non-Profit',
    'Ministerio De Interior': 'Ministry of Interior',
    'Entidades Públicas': 'Public Entities',
    'Privado-Benéfico (Iglesia)': 'Private Non-Profit (Church)',
    'Municipio': 'Municipality',
    'Matep': 'Matep',  # Assuming Matep is a specific term or acronym.
    'Privado-Benéfico (Cruz Roja)': 'Private Non-Profit (Red Cross)',
    'Otra Dependencia Patrimonial': 'Other Patrimonial Dependency',
    'Ministerio De Defensa': 'Ministry of Defense'
}


In [13]:
df['management'] = df['management'].map(management_translation_dict)

In [14]:
df.head()

Unnamed: 0,hospital_name,municipality,cmun,n_beds,type,management,latitude,longitude
0,Red De Salud Mental De Araba (Hospital Psiquiá...,vitoria-gasteiz,10590,207,Psychiatric,Autonomous Community,42.835656,-2.678612
1,Hospital San José,vitoria-gasteiz,10590,63,Medical-surgical,Private Non-Profit,42.849661,-2.67664
2,Hospital Quirónsalud Vitoria,vitoria-gasteiz,10590,26,General,Private Non-Profit,42.849761,-2.668035
3,"Hospital De Cuidados San Onofre, S.L. (Hospita...",vitoria-gasteiz,10590,82,Geriatrics and/or long-term care,Private Non-Profit,42.837582,-2.680386
4,Hospital De Leza,laguardia,10318,63,General,Autonomous Community,42.575989,-2.638635


In [15]:
df

Unnamed: 0,hospital_name,municipality,cmun,n_beds,type,management,latitude,longitude
0,Red De Salud Mental De Araba (Hospital Psiquiá...,vitoria-gasteiz,10590,207,Psychiatric,Autonomous Community,42.835656,-2.678612
1,Hospital San José,vitoria-gasteiz,10590,63,Medical-surgical,Private Non-Profit,42.849661,-2.676640
2,Hospital Quirónsalud Vitoria,vitoria-gasteiz,10590,26,General,Private Non-Profit,42.849761,-2.668035
3,"Hospital De Cuidados San Onofre, S.L. (Hospita...",vitoria-gasteiz,10590,82,Geriatrics and/or long-term care,Private Non-Profit,42.837582,-2.680386
4,Hospital De Leza,laguardia,10318,63,General,Autonomous Community,42.575989,-2.638635
...,...,...,...,...,...,...,...,...
919,Hospital De Rehabilitacion Psiquiatrica Prisma,zaragoza,502973,50,Psychiatric,Private Non-Profit,41.711449,-0.841290
920,Centro Sanitario Cinco Villas,ejea de los caballeros,500956,32,Other purpose,Autonomous Community,42.126721,-1.125290
921,Hospital Viamed Montecanal,zaragoza,502973,68,General,Private Non-Profit,41.639042,-0.948738
922,Hospital Universitario De Ceuta,ceuta,510013,252,General,Social Security,35.880332,-5.341853


In [16]:
df["cmun"] = df["cmun"].apply(lambda x: int(str(x)[:-1]) if len(str(x)) > 5 else x)


In [17]:
df

Unnamed: 0,hospital_name,municipality,cmun,n_beds,type,management,latitude,longitude
0,Red De Salud Mental De Araba (Hospital Psiquiá...,vitoria-gasteiz,10590,207,Psychiatric,Autonomous Community,42.835656,-2.678612
1,Hospital San José,vitoria-gasteiz,10590,63,Medical-surgical,Private Non-Profit,42.849661,-2.676640
2,Hospital Quirónsalud Vitoria,vitoria-gasteiz,10590,26,General,Private Non-Profit,42.849761,-2.668035
3,"Hospital De Cuidados San Onofre, S.L. (Hospita...",vitoria-gasteiz,10590,82,Geriatrics and/or long-term care,Private Non-Profit,42.837582,-2.680386
4,Hospital De Leza,laguardia,10318,63,General,Autonomous Community,42.575989,-2.638635
...,...,...,...,...,...,...,...,...
919,Hospital De Rehabilitacion Psiquiatrica Prisma,zaragoza,50297,50,Psychiatric,Private Non-Profit,41.711449,-0.841290
920,Centro Sanitario Cinco Villas,ejea de los caballeros,50095,32,Other purpose,Autonomous Community,42.126721,-1.125290
921,Hospital Viamed Montecanal,zaragoza,50297,68,General,Private Non-Profit,41.639042,-0.948738
922,Hospital Universitario De Ceuta,ceuta,51001,252,General,Social Security,35.880332,-5.341853


In [19]:
df = df[["cmun", "hospital_name", "n_beds", "type", "management", "latitude", "longitude"]]

In [20]:
df.isna().sum()

cmun             0
hospital_name    0
n_beds           0
type             0
management       0
latitude         0
longitude        0
dtype: int64

In [21]:
df.to_csv("../data/processed/filtered_hospitals.csv", index=False)