## Basic EDA for first incoming data

In [None]:
import pandas as pd
import plotly.express as px
import numpy as np

In [None]:
df_communities = pd.read_csv("../data/population_towns.csv", sep=";")
df_coordinates = pd.read_csv(
    "../data/coordinates_towns_spain.csv",
    encoding="ISO-8859-1",  # or "latin1"
    sep=";",
    decimal=","
)


In [None]:
hospital_dict = {
    'NOMBRE': 1,
    'CODMU': 1,
    'MUNICIPIOS': 1,
    'CODAUTO': 1,
    'COMUNIDADES': 1,
    'NCAMAS': 1,
    'FINALIDAD_ASISITENCIAL': 1,
    'DEPENDENCIA_PATRIMONIAL': 1,
    'X': 1,
    'Y': 1,
}

In [None]:
df_hospitals

In [None]:
columns_communities_english = ['province_code', 'province', 'municipality_code', 'municipality_name', 'population', 'male', 'female']
columns_coordinates_english = ['community', 'province', 'municipality_name', 'latitude', 'longitude', 'altitude',
    'population', 'male', 'female']


def map_cols_es_en(es: list, en: list):
    dict_cols = dict(zip(es, en))
    return dict_cols.values()


df_communities.columns = map_cols_es_en(df_communities.columns.to_list(), columns_communities_english)
df_coordinates.columns = map_cols_es_en(df_coordinates.columns.to_list(), columns_coordinates_english)

In [None]:
df_communities[df_communities['municipality_name'] == "Galapagar"]

In [None]:
df_coordinates[df_coordinates['municipality_name'] == "Galapagar"]

In [None]:
df = pd.merge(df_communities, df_coordinates[["latitude", "longitude", "altitude", "municipality_name"]], 
              on='municipality_name', 
              how='outer')

In [None]:
df.dropna(thresh=4)

In [None]:
df.info()

In [None]:
# Display rows with any NaN values and count them
nan_rows = df[df.isna().any(axis=1)]
nan_rows

In [None]:
title = "Communities in Spain"

lats = df.latitude
lons = df.longitude

fig = px.scatter_map(df, 
                     lat=lats, 
                     lon=lons,
                     hover_data=["municipality_name", "altitude"], 
                     size='population',
                     color='population',
                     color_continuous_scale=px.colors.carto.Aggrnyl,
                     zoom=5,
                     size_max=50  # Increase max size of markers
                     )

# Adjust the size reference to make small points more visible
fig.update_traces(marker=dict(sizeref=1000))  # Decrease this value to make points larger

fig.update_geos(fitbounds="locations")
fig.update_layout(height=1000, width=1000)
fig.update_layout(mapbox_style="open-street-map")
fig.update_layout(margin={"r":50,"t":50,"l":50,"b":50})
fig.update_layout(
    coloraxis_colorbar=dict(title='Population')
)
fig.update_layout(title="Communities in Spain by population size")

fig.show()

In [None]:
df[df["municipality_name"] == "València"]

In [None]:
df_communities[df_communities["municipality_name"] == "València"]

In [None]:
df_coordinates[df_coordinates["municipality_name"] == "Valencia"]