In [None]:
!pip install -U kaleido

Collecting kaleido
  Downloading kaleido-0.2.1-py2.py3-none-manylinux1_x86_64.whl.metadata (15 kB)
Downloading kaleido-0.2.1-py2.py3-none-manylinux1_x86_64.whl (79.9 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m79.9/79.9 MB[0m [31m6.2 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: kaleido
Successfully installed kaleido-0.2.1


In [None]:
import pandas as pd
import plotly.graph_objects as go
import plotly.express as px
import numpy as np
import plotly.io as pio

In [None]:
# urls = []
# for y in ["04", "05", "06", "07", "08", "09", "10", "11", "12", "13", "14", "15", "16", "17", "18", "19", "20", "21", "22", "23", "24"]:
#   url = f"https://dgsiaf-repo.mecon.gob.ar/repository/pa/datasets/20{y}/credito-anual-20{y}.zip"
#   urls.append(url)

In [None]:
url_2024 = "https://dgsiaf-repo.mecon.gob.ar/repository/pa/datasets/2024/credito-anual-2024.zip"

In [None]:
df_2024 = pd.read_csv(url_2024)

In [None]:
df_2024["credito_vigente"] = df_2024["credito_vigente"].replace(regex={',': '.'}).astype(float)
df_2024["credito_presupuestado"] = df_2024["credito_presupuestado"].replace(regex={',': '.'}).astype(float)

In [None]:
df_2024_grouped = df_2024.groupby(["fuente_financiamiento_desc",
                                   "jurisdiccion_desc",
                                   "entidad_desc"], as_index=False)[["credito_presupuestado"]].sum()

df_2024_grouped["credito_presupuestado"] = df_2024_grouped["credito_presupuestado"] / df_2024_grouped["credito_presupuestado"].sum() * 100

df_2024_grouped = df_2024_grouped.sort_values(by=["fuente_financiamiento_desc", "jurisdiccion_desc", "entidad_desc"], ascending=True)

relevant_entidad = []
for entidad in pd.unique(df_2024_grouped["entidad_desc"]):
  credito_entidad = df_2024_grouped[df_2024_grouped["entidad_desc"] == entidad]["credito_presupuestado"].sum()
  if credito_entidad >= 1.5:
    relevant_entidad.append(entidad)

In [None]:
df_2024_grouped["entidad_desc"] = np.where(df_2024_grouped["entidad_desc"].isin(relevant_entidad), df_2024_grouped["entidad_desc"], "Otras entidades")

In [None]:
all_nodes = pd.concat([
    df_2024_grouped["fuente_financiamiento_desc"],
    df_2024_grouped["jurisdiccion_desc"],
    df_2024_grouped["entidad_desc"]
]).unique()

In [None]:
# relevant_entidad = []
# for entidad in pd.unique(df_2024_grouped["entidad_desc"]):
#   credito_entidad = df_2024_grouped[df_2024_grouped["entidad_desc"] == entidad]["credito_presupuestado"].sum()
#   if credito_entidad >= 1:
#     relevant_entidad.append(entidad)

# relevants = pd.unique(df_2024_grouped["fuente_financiamiento_desc"]).tolist() + pd.unique(df_2024_grouped["jurisdiccion_desc"]).tolist() + relevant_entidad

# relevant_nodes = [node if node in relevants else "" for node in all_nodes]

In [None]:
node_indices = {node: idx for idx, node in enumerate(all_nodes)}

In [None]:
sources = []
targets = []
values = []

columns = ["fuente_financiamiento_desc",
           "jurisdiccion_desc",
           "entidad_desc"]

In [None]:
for i in range(len(columns) - 1):
    source_col = columns[i]
    target_col = columns[i + 1]
    for _, row in df_2024_grouped.iterrows():
      source = node_indices[row[source_col]]
      target = node_indices[row[target_col]]
      if source < target:
        sources.append(source)
        targets.append(target)
        values.append(row["credito_presupuestado"])

In [None]:
colors_for_nodes = px.colors.qualitative.Plotly

In [None]:
sankey_presup = go.Figure()

sankey_presup.add_trace(go.Sankey(
    node=dict(
        pad=20,
        thickness=10,
        line=dict(color="black", width=0.5),
        label=list(all_nodes),
        customdata=list(all_nodes),
        hovertemplate='%{customdata}: %{value}% del total del presupuesto<extra></extra>',
    ),
    link=dict(
        source=sources,
        target=targets,
        value=values,
    )
  ))

sankey_presup.update_layout(
    margin=dict(
      t = 125,
      b = 50,
      r = 25,
      l = 25
  ),
    height=1500,
    width=1000,
    plot_bgcolor='rgba(0,0,0,0)',
    font=dict(
        size=14,
        family="Serif",
        ),
    title = dict(
    text = "<b>¿Quién gasta? ¿En qué gasta? ¿De dónde provienen los recursos?</b><br><sup>Crédito presupuestado según jurisdicción y entidad.<br><sup><b>Fuente:</b> Ejecuciones presupuestarias - presupuesto abierto.</sup></sup>",
    xanchor = 'left',
    x = 0.0325,
    yanchor = 'top',
    y = 0.95,
    font = dict(
        size = 24,
        color = 'black'
    )
  ),
  )

In [None]:
pio.write_html(sankey_presup,
               file='sankey_presup.html',
               full_html=True)