In [None]:
import pandas as pd
import plotly.express as px
import numpy as np

In [None]:
df = pd.read_csv("../data/raw/hospitals_spain.csv")
df.columns

The column names in the hospitals dataset are:

- **Identification**
    - `OBJECTID`: Object identifier
    - `CODCNH`: Hospital code
    - `NOMBRE`: Hospital name
    - `CODIDCOM`: Community identifier code

- **Contact Information**
    - `DIRECCION`: Address
    - `TELEFONO`: Phone
    - `TELEFONO2`: Secondary phone
    - `TELEFAX`: Fax
    - `EMAIL`: Email address
    - `CODPOSTAL`: Postal code

- **Location**
    - `CODMU`: Municipality code
    - `MUNICIPIOS`: Municipality name 
    - `CODPROV`: Province code
    - `PROVINCIAS`: Province name
    - `CODAUTO`: Autonomous community code 
    - `COMUNIDADES`: Autonomous community name
    - `X`: Longitude coordinate
    - `Y`: Latitude coordinate
    - `CalidadGeocodificacion`: Geocoding quality
    - `CAPITAL`: Capital city indicator

- **Hospital Information**
    - `NCAMAS`: Number of beds
    - `CODFI`: Healthcare purpose code
    - `FINALIDAD_ASISITENCIAL`: Healthcare purpose description
    - `CODPAT`: Asset ownership code
    - `DEPENDENCIA_PATRIMONIAL`: Asset ownership description
    - `CODFU`: Functional dependency code
    - `DEPENDENCIA_FUNCIONAL`: Functional dependency description

- **Status & Certifications**
    - `ACREDOCENT`: Teaching accreditation
    - `ESCOMPLE`: Complementary center
    - `FORCOMPLE`: Complementary form
    - `ALTA`: Active status
    - `CERRADO`: Closed status
    - `CIERREFECH`: Closing date
    - `CONCIERTO`: Agreement status

- **Equipment & Services**
    - `TAC`: CT scanner
    - `RM`: MRI
    - `GAM`: Gamma camera
    - `HEM`: Hemodynamics
    - `ASD`: Assistant diagnostic systems
    - `LIT`: Lithotripsy
    - `BCO`: Blood bank
    - `ALI`: Other
    - `SPECT`: SPECT scanner
    - `PET`: PET scanner
    - `MAMOS`: Mammography
    - `DO`: Other diagnostic
    - `DIAL`: Dialysis

In [None]:
df.columns

In [None]:
df.rename(columns={"NOMBRE": "hospital_name", 
                   "CODMU": "cmun",
                   "MUNICIPIOS": "municipality", 
                   "NCAMAS": "n_beds",
                   "FINALIDAD_ASISITENCIAL": "type", 
                   "DEPENDENCIA_PATRIMONIAL": "management",  
                   "X": "longitude", 
                   "Y": "latitude"}, inplace=True)


In [None]:
hospital_columns = [
    'hospital_name',
    "municipality",
    'cmun',
    'n_beds',
    'type',
    'management',
    'latitude',
    'longitude'
]
df = df[hospital_columns]


In [None]:
df.sample(10)

In [None]:
df['hospital_name'] =df['hospital_name'].str.title()
df['municipality'] = df['municipality'].str.lower()
df['type'] = df['type'].str.capitalize()
df['management'] = df['management'].str.title()

In [None]:
df["type"].unique()

In [None]:
translation_dict = {
    'Psiquiátrico': 'Psychiatric',
    'Médico-quirúrgico': 'Medical-surgical',
    'General': 'General',
    'Geriatría y/o larga estancia': 'Geriatrics and/or long-term care',
    'Rehabilitación psicofísica': 'Psychophysical rehabilitation',
    'Materno-infantil': 'Maternal-infant',
    'Quirúrgico': 'Surgical',
    'Traumatología y/o rehabilitación': 'Traumatology and/or rehabilitation',
    'Otra finalidad': 'Other purpose',
    'Infantil': 'Pediatric',
    'Otros monográficos': 'Other specialized',
    'Oftálmico u orl': 'Ophthalmologic or ENT',
    'Oncológico': 'Oncological',
    'Maternal': 'Maternity'
}


In [None]:
df['type'] = df['type'].map(translation_dict)

In [None]:
df["management"].unique()

In [None]:
management_translation_dict = {
    'Comunidad Autónoma': 'Autonomous Community',
    'Privado No Benéfico': 'Private Non-Profit',
    'Seguridad Social': 'Social Security',
    'Diputación O Cabildo': 'Provincial Council or Island Council',
    'Otro Privado Benéfico': 'Other Private Non-Profit',
    'Ministerio De Interior': 'Ministry of Interior',
    'Entidades Públicas': 'Public Entities',
    'Privado-Benéfico (Iglesia)': 'Private Non-Profit (Church)',
    'Municipio': 'Municipality',
    'Matep': 'Matep',  # Assuming Matep is a specific term or acronym.
    'Privado-Benéfico (Cruz Roja)': 'Private Non-Profit (Red Cross)',
    'Otra Dependencia Patrimonial': 'Other Patrimonial Dependency',
    'Ministerio De Defensa': 'Ministry of Defense'
}


In [None]:
df['management'] = df['management'].map(management_translation_dict)

In [None]:
df.head()

In [None]:
df

In [None]:
df["cmun"] = df["cmun"].apply(lambda x: int(str(x)[:-1]) if len(str(x)) > 5 else x)


In [None]:
df

In [None]:
df = df[["cmun", "hospital_name", "n_beds", "type", "management", "latitude", "longitude"]]

In [None]:
df.isna().sum()

In [None]:
df.to_csv("../data/processed/filtered_hospitals.csv", index=False)