In [48]:
import pandas as pd
import plotly.express as px
import numpy as np

In [49]:
df = pd.read_csv("../data/raw/hospitals_spain.csv")
df.columns

Index(['OBJECTID', 'CODCNH', 'NOMBRE', 'DIRECCION', 'TELEFONO', 'TELEFONO2',
       'TELEFAX', 'CODMU', 'MUNICIPIOS', 'CODPROV', 'PROVINCIAS', 'CODAUTO',
       'COMUNIDADES', 'CODPOSTAL', 'NCAMAS', 'CODFI', 'FINALIDAD_ASISITENCIAL',
       'CODPAT', 'DEPENDENCIA_PATRIMONIAL', 'CODFU', 'DEPENDENCIA_FUNCIONAL',
       'ACREDOCENT', 'ESCOMPLE', 'FORCOMPLE', 'CODIDCOM', 'ALTA', 'CERRADO',
       'CAPITAL', 'CIERREFECH', 'CONCIERTO', 'EMAIL', 'TAC', 'RM', 'GAM',
       'HEM', 'ASD', 'LIT', 'BCO', 'ALI', 'SPECT', 'PET', 'MAMOS', 'DO',
       'DIAL', 'X', 'Y', 'CalidadGeocodificacion'],
      dtype='object')

The column names in the hospitals dataset are:

- **Identification**
    - `OBJECTID`: Object identifier
    - `CODCNH`: Hospital code
    - `NOMBRE`: Hospital name
    - `CODIDCOM`: Community identifier code

- **Contact Information**
    - `DIRECCION`: Address
    - `TELEFONO`: Phone
    - `TELEFONO2`: Secondary phone
    - `TELEFAX`: Fax
    - `EMAIL`: Email address
    - `CODPOSTAL`: Postal code

- **Location**
    - `CODMU`: Municipality code
    - `MUNICIPIOS`: Municipality name 
    - `CODPROV`: Province code
    - `PROVINCIAS`: Province name
    - `CODAUTO`: Autonomous community code 
    - `COMUNIDADES`: Autonomous community name
    - `X`: Longitude coordinate
    - `Y`: Latitude coordinate
    - `CalidadGeocodificacion`: Geocoding quality
    - `CAPITAL`: Capital city indicator

- **Hospital Information**
    - `NCAMAS`: Number of beds
    - `CODFI`: Healthcare purpose code
    - `FINALIDAD_ASISITENCIAL`: Healthcare purpose description
    - `CODPAT`: Asset ownership code
    - `DEPENDENCIA_PATRIMONIAL`: Asset ownership description
    - `CODFU`: Functional dependency code
    - `DEPENDENCIA_FUNCIONAL`: Functional dependency description

- **Status & Certifications**
    - `ACREDOCENT`: Teaching accreditation
    - `ESCOMPLE`: Complementary center
    - `FORCOMPLE`: Complementary form
    - `ALTA`: Active status
    - `CERRADO`: Closed status
    - `CIERREFECH`: Closing date
    - `CONCIERTO`: Agreement status

- **Equipment & Services**
    - `TAC`: CT scanner
    - `RM`: MRI
    - `GAM`: Gamma camera
    - `HEM`: Hemodynamics
    - `ASD`: Assistant diagnostic systems
    - `LIT`: Lithotripsy
    - `BCO`: Blood bank
    - `ALI`: Other
    - `SPECT`: SPECT scanner
    - `PET`: PET scanner
    - `MAMOS`: Mammography
    - `DO`: Other diagnostic
    - `DIAL`: Dialysis

In [50]:
df.columns

Index(['OBJECTID', 'CODCNH', 'NOMBRE', 'DIRECCION', 'TELEFONO', 'TELEFONO2',
       'TELEFAX', 'CODMU', 'MUNICIPIOS', 'CODPROV', 'PROVINCIAS', 'CODAUTO',
       'COMUNIDADES', 'CODPOSTAL', 'NCAMAS', 'CODFI', 'FINALIDAD_ASISITENCIAL',
       'CODPAT', 'DEPENDENCIA_PATRIMONIAL', 'CODFU', 'DEPENDENCIA_FUNCIONAL',
       'ACREDOCENT', 'ESCOMPLE', 'FORCOMPLE', 'CODIDCOM', 'ALTA', 'CERRADO',
       'CAPITAL', 'CIERREFECH', 'CONCIERTO', 'EMAIL', 'TAC', 'RM', 'GAM',
       'HEM', 'ASD', 'LIT', 'BCO', 'ALI', 'SPECT', 'PET', 'MAMOS', 'DO',
       'DIAL', 'X', 'Y', 'CalidadGeocodificacion'],
      dtype='object')

In [51]:
df.rename(columns={"NOMBRE": "hospital_name", 
                   "CODMU": "cmun",
                   "MUNICIPIOS": "municipality_name", 
                   "NCAMAS": "n_beds",
                   "FINALIDAD_ASISITENCIAL": "type", 
                   "DEPENDENCIA_PATRIMONIAL": "management",  
                   "X": "latitude", 
                   "Y": "longitude"}, inplace=True)


In [53]:
hospital_columns = [
    'hospital_name',
    "municipality_name",
    'cmun',
    'n_beds',
    'type',
    'management',
    'latitude',
    'longitude'
]
df = df[hospital_columns]


In [61]:
df.sample(10)

Unnamed: 0,hospital_name,municipality_name,cmun,n_beds,type,management,latitude,longitude
292,Hospital Rey D. Jaime,castellón de la plana/castelló de la plana,120402,96,General,Privado No Benéfico,-0.036073,39.997317
75,Hospital Psiquiàtric,palma de mallorca,70407,168,Psiquiátrico,Diputación O Cabildo,2.629754,39.59557
757,Centro Hospitalario Padre Menni,santander,390759,403,Psiquiátrico,Privado-Benéfico (Iglesia),-3.79696,43.476898
140,Hospital Comarcal De Sant Bernabé,berga,80229,150,General,Entidades Públicas,1.852374,42.106033
398,Hospital Universitario Donostia-Donostia Unibe...,donostia/san sebastián,200697,1054,General,Comunidad Autónoma,-1.969039,43.29246
657,Hospital Carmen Y Severo Ochoa,cangas del narcea,330117,103,General,Seguridad Social,-6.547333,43.175275
755,Hospital Ramón Negrete,santander,390759,30,Traumatología y/o rehabilitación,Matep,-3.789342,43.483779
160,Parc Sanitari Sant Joan De Déu - Hospital General,sant boi de llobregat,82009,246,General,Privado-Benéfico (Iglesia),2.030522,41.356347
778,Hospital El Tomillar,alcalá de guadaíra,410042,0,General,Comunidad Autónoma,-5.900672,37.298018
717,Vithas Hospital Nosa Señora De Fatima,vigo,360577,200,General,Privado No Benéfico,-8.709221,42.234314


In [58]:
df['hospital_name'] =df['hospital_name'].str.title()
df['municipality_name'] = df['municipality_name'].str.lower()
df['type'] = df['type'].str.capitalize()
df['management'] = df['management'].str.title()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['hospital_name'] =df['hospital_name'].str.title()
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['municipality_name'] = df['municipality_name'].str.lower()
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['type'] = df['type'].str.capitalize()
A value is trying to be set on a copy of a slice 

In [62]:
df["type"].unique()

array(['Psiquiátrico', 'Médico-quirúrgico', 'General',
       'Geriatría y/o larga estancia', 'Rehabilitación psicofísica',
       'Materno-infantil', 'Quirúrgico',
       'Traumatología y/o rehabilitación', 'Otra finalidad', 'Infantil',
       'Otros monográficos', 'Oftálmico u orl', 'Oncológico', 'Maternal'],
      dtype=object)

In [65]:
translation_dict = {
    'Psiquiátrico': 'Psychiatric',
    'Médico-quirúrgico': 'Medical-surgical',
    'General': 'General',
    'Geriatría y/o larga estancia': 'Geriatrics and/or long-term care',
    'Rehabilitación psicofísica': 'Psychophysical rehabilitation',
    'Materno-infantil': 'Maternal-infant',
    'Quirúrgico': 'Surgical',
    'Traumatología y/o rehabilitación': 'Traumatology and/or rehabilitation',
    'Otra finalidad': 'Other purpose',
    'Infantil': 'Pediatric',
    'Otros monográficos': 'Other specialized',
    'Oftálmico u orl': 'Ophthalmologic or ENT',
    'Oncológico': 'Oncological',
    'Maternal': 'Maternity'
}


In [66]:
df['type'] = df['type'].map(translation_dict)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['type'] = df['type'].map(translation_dict)


In [63]:
df["management"].unique()

array(['Comunidad Autónoma', 'Privado No Benéfico', 'Seguridad Social',
       'Diputación O Cabildo', 'Otro Privado Benéfico',
       'Ministerio De Interior', 'Entidades Públicas',
       'Privado-Benéfico (Iglesia)', 'Municipio', 'Matep',
       'Privado-Benéfico (Cruz Roja)', 'Otra Dependencia Patrimonial',
       'Ministerio De Defensa'], dtype=object)

In [67]:
management_translation_dict = {
    'Comunidad Autónoma': 'Autonomous Community',
    'Privado No Benéfico': 'Private Non-Profit',
    'Seguridad Social': 'Social Security',
    'Diputación O Cabildo': 'Provincial Council or Island Council',
    'Otro Privado Benéfico': 'Other Private Non-Profit',
    'Ministerio De Interior': 'Ministry of Interior',
    'Entidades Públicas': 'Public Entities',
    'Privado-Benéfico (Iglesia)': 'Private Non-Profit (Church)',
    'Municipio': 'Municipality',
    'Matep': 'Matep',  # Assuming Matep is a specific term or acronym.
    'Privado-Benéfico (Cruz Roja)': 'Private Non-Profit (Red Cross)',
    'Otra Dependencia Patrimonial': 'Other Patrimonial Dependency',
    'Ministerio De Defensa': 'Ministry of Defense'
}


In [None]:
df['management'] = df['management'].map(management_translation_dict)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['management'] = df['management'].map(management_translation_dict)


In [69]:
df.head()

Unnamed: 0,hospital_name,municipality_name,cmun,n_beds,type,management,latitude,longitude
0,Red De Salud Mental De Araba (Hospital Psiquiá...,vitoria-gasteiz,10590,207,Psychiatric,Autonomous Community,-2.678612,42.835656
1,Hospital San José,vitoria-gasteiz,10590,63,Medical-surgical,Private Non-Profit,-2.67664,42.849661
2,Hospital Quirónsalud Vitoria,vitoria-gasteiz,10590,26,General,Private Non-Profit,-2.668035,42.849761
3,"Hospital De Cuidados San Onofre, S.L. (Hospita...",vitoria-gasteiz,10590,82,Geriatrics and/or long-term care,Private Non-Profit,-2.680386,42.837582
4,Hospital De Leza,laguardia,10318,63,General,Autonomous Community,-2.638635,42.575989


In [5]:
df

Unnamed: 0,NOMBRE,CODMU,MUNICIPIOS,CODAUTO,COMUNIDADES,NCAMAS,FINALIDAD_ASISITENCIAL,DEPENDENCIA_PATRIMONIAL,X,Y
0,RED DE SALUD MENTAL DE ARABA (HOSPITAL PSIQUIÁ...,10590,Vitoria-Gasteiz,16,PAÍS VASCO,207,PSIQUIÁTRICO,COMUNIDAD AUTÓNOMA,-2.678612,42.835656
1,HOSPITAL SAN JOSÉ,10590,Vitoria-Gasteiz,16,PAÍS VASCO,63,MÉDICO-QUIRÚRGICO,PRIVADO NO BENÉFICO,-2.676640,42.849661
2,HOSPITAL QUIRÓNSALUD VITORIA,10590,Vitoria-Gasteiz,16,PAÍS VASCO,26,GENERAL,PRIVADO NO BENÉFICO,-2.668035,42.849761
3,"HOSPITAL DE CUIDADOS SAN ONOFRE, S.L. (HOSPITA...",10590,Vitoria-Gasteiz,16,PAÍS VASCO,82,GERIATRÍA Y/O LARGA ESTANCIA,PRIVADO NO BENÉFICO,-2.680386,42.837582
4,HOSPITAL DE LEZA,10318,Laguardia,16,PAÍS VASCO,63,GENERAL,COMUNIDAD AUTÓNOMA,-2.638635,42.575989
...,...,...,...,...,...,...,...,...,...,...
919,HOSPITAL DE REHABILITACION PSIQUIATRICA PRISMA,502973,Zaragoza,2,ARAGÓN,50,PSIQUIÁTRICO,PRIVADO NO BENÉFICO,-0.841290,41.711449
920,CENTRO SANITARIO CINCO VILLAS,500956,Ejea de los Caballeros,2,ARAGÓN,32,OTRA FINALIDAD,COMUNIDAD AUTÓNOMA,-1.125290,42.126721
921,HOSPITAL VIAMED MONTECANAL,502973,Zaragoza,2,ARAGÓN,68,GENERAL,PRIVADO NO BENÉFICO,-0.948738,41.639042
922,HOSPITAL UNIVERSITARIO DE CEUTA,510013,Ceuta,18,CEUTA,252,GENERAL,SEGURIDAD SOCIAL,-5.341853,35.880332


In [70]:
df["cmun"] = df["cmun"].apply(lambda x: int(str(x)[:-1]) if len(str(x)) > 5 else x)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df["cmun"] = df["cmun"].apply(lambda x: int(str(x)[:-1]) if len(str(x)) > 5 else x)


In [71]:
df

Unnamed: 0,hospital_name,municipality_name,cmun,n_beds,type,management,latitude,longitude
0,Red De Salud Mental De Araba (Hospital Psiquiá...,vitoria-gasteiz,10590,207,Psychiatric,Autonomous Community,-2.678612,42.835656
1,Hospital San José,vitoria-gasteiz,10590,63,Medical-surgical,Private Non-Profit,-2.676640,42.849661
2,Hospital Quirónsalud Vitoria,vitoria-gasteiz,10590,26,General,Private Non-Profit,-2.668035,42.849761
3,"Hospital De Cuidados San Onofre, S.L. (Hospita...",vitoria-gasteiz,10590,82,Geriatrics and/or long-term care,Private Non-Profit,-2.680386,42.837582
4,Hospital De Leza,laguardia,10318,63,General,Autonomous Community,-2.638635,42.575989
...,...,...,...,...,...,...,...,...
919,Hospital De Rehabilitacion Psiquiatrica Prisma,zaragoza,50297,50,Psychiatric,Private Non-Profit,-0.841290,41.711449
920,Centro Sanitario Cinco Villas,ejea de los caballeros,50095,32,Other purpose,Autonomous Community,-1.125290,42.126721
921,Hospital Viamed Montecanal,zaragoza,50297,68,General,Private Non-Profit,-0.948738,41.639042
922,Hospital Universitario De Ceuta,ceuta,51001,252,General,Social Security,-5.341853,35.880332


In [72]:
df.isna().sum()

hospital_name        0
municipality_name    0
cmun                 0
n_beds               0
type                 0
management           0
latitude             0
longitude            0
dtype: int64

In [73]:
df.to_csv("../data/processed/filtered_hospitals.csv", index=False)