In [None]:
""" %pip install --quiet hvplot
%pip install --quiet holoviews[all]
%pip install --quiet jupyter_bokeh """

In [None]:
# Imports
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

import holoviews as hv
from holoviews import opts, dim


In [None]:
# Load data

path_2022 = "../data/movilidad_provincias_2022.csv"
path_2023 = "../data/movilidad_provincias_2023.csv"
path_2024 = "../data/movilidad_provincias_2024.csv"

original_data_2022 = pd.read_csv(path_2022, sep=",")
original_data_2023 = pd.read_csv(path_2023, sep=",")
original_data_2024 = pd.read_csv(path_2024, sep=",")

df_2022 = original_data_2022.copy()
df_2023 = original_data_2023.copy()
df_2024 = original_data_2024.copy()

df = pd.concat([df_2022, df_2023, df_2024])

In [None]:
df.info()
df.shape

In [None]:
# Con estos datos, generamos un diagrama de flujo de los viajeros de la peninsula hacia las islas
#%pip install networkx

In [None]:
# Filtrar datos para viajeros de la península a las islas
codigos_provincias_insulares = [7, 35, 38] # Baleares, Las Palmas y Santa Cruz de Tenerife

viajeros_islas = df[df['provincia_destino'].isin(codigos_provincias_insulares)]
#print(viajeros_islas)

In [None]:
import networkx as nx

# Crear un grafo dirigido
G = nx.DiGraph()

# Añadir nodos
G.add_nodes_from(viajeros_islas['provincia_origen_name'].unique(), isla=True)
G.add_nodes_from(viajeros_islas['provincia_destino_name'].unique(), isla=False)

# Añadir aristas
for _, row in viajeros_islas.iterrows():
    G.add_edge(row['provincia_origen_name'], row['provincia_destino_name'], weight=row['viajeros'])

# Dibujar el grafo
plt.figure(figsize=(10, 10))
pos = nx.spring_layout(G)
nx.draw(G, pos, with_labels=True, node_size=3000, node_color=['blue' if G.nodes[n]['isla'] else 'red' for n in G.nodes])
labels = nx.get_edge_attributes(G, 'weight')
nx.draw_networkx_edge_labels(G, pos, edge_labels=labels)
plt.show()

In [None]:
# Viajeros islas sumarizados entre provincias de origen y destino

viajeros_totales_baleares = df[df['provincia_destino'].isin([7])].groupby(
    ['provincia_origen_name', 'provincia_destino_name']
).agg({'viajeros': 'sum'}).reset_index()
print(viajeros_totales_baleares)

viajeros_totales_laspalmas = df[df['provincia_destino'].isin([35])].groupby(
    ['provincia_origen_name', 'provincia_destino_name']
).agg({'viajeros': 'sum'}).reset_index()
print(viajeros_totales_laspalmas)

viajeros_totales_tenerife = df[df['provincia_destino'].isin([38])].groupby(
    ['provincia_origen_name', 'provincia_destino_name']
).agg({'viajeros': 'sum'}).reset_index()
print(viajeros_totales_tenerife)

#### Diagrama Sankey Baleares

In [None]:
top10_viajeros_totales_baleares = viajeros_totales_baleares.sort_values(by='viajeros', ascending=False).head(10)

# Set up holoviews
hv.extension('bokeh')
hv.output(size=200)

# Prepare data structure for Sankey diagram
sources = []
targets = []
values = []

# Create nodes mapping (provinces to indices)
provinces = pd.concat([
    top10_viajeros_totales_baleares['provincia_origen_name'],
    top10_viajeros_totales_baleares['provincia_destino_name']
]).unique()
node_map = {province: idx for idx, province in enumerate(provinces)}

# Create links data
for _, row in top10_viajeros_totales_baleares.iterrows():
    sources.append(node_map[row['provincia_origen_name']])
    targets.append(node_map[row['provincia_destino_name']]) 
    values.append(row['viajeros'])
edges = [[source, target, value] for source, target, value in zip(sources, targets, values)]

# Create nodes dataset
nodes = hv.Dataset(enumerate(list(provinces)), 'index', 'label')

# Create Sankey diagram
sankey = hv.Sankey(
    data=(edges, nodes),
    kdims=['source', 'target'],
    vdims=['viajeros']
)

# Format visualization
sankey.opts(
    opts.Sankey(
        labels='label',
        label_position='outer',
        edge_color=dim('source').str(),
        node_color=dim('index').str(),
        width=700,
        height=400,
        cmap='Category20',
        title='Flujo total de viajeros hacia Baleares 2022-2024'
    )
)

sankey

#### Diagrama Sankey Las Palmas

In [None]:
top10_viajeros_totales_laspalmas = viajeros_totales_laspalmas.sort_values(by='viajeros', ascending=False).head(10)

# Set up holoviews
hv.extension('bokeh')
hv.output(size=200)

# Prepare data structure for Sankey diagram
sources = []
targets = []
values = []

# Create nodes mapping (provinces to indices)
provinces = pd.concat([
    top10_viajeros_totales_laspalmas['provincia_origen_name'],
    top10_viajeros_totales_laspalmas['provincia_destino_name']
]).unique()
node_map = {province: idx for idx, province in enumerate(provinces)}

# Create links data
for _, row in top10_viajeros_totales_laspalmas.iterrows():
    sources.append(node_map[row['provincia_origen_name']])
    targets.append(node_map[row['provincia_destino_name']]) 
    values.append(row['viajeros'])
edges = [[source, target, value] for source, target, value in zip(sources, targets, values)]

# Create nodes dataset
nodes = hv.Dataset(enumerate(list(provinces)), 'index', 'label')

# Create Sankey diagram
sankey = hv.Sankey(
    data=(edges, nodes),
    kdims=['source', 'target'],
    vdims=['viajeros']
)

# Format visualization
sankey.opts(
    opts.Sankey(
        labels='label',
        label_position='outer',
        edge_color=dim('source').str(),
        node_color=dim('index').str(),
        width=700,
        height=400,
        cmap='Category20',
        title='Flujo total de viajeros hacia Las Palmas 2022-2024'
    )
)

sankey

#### Diagrama Sankey Tenerife

In [None]:
top10_viajeros_totales_tenerife = viajeros_totales_tenerife.sort_values(by='viajeros', ascending=False).head(10)

# Set up holoviews
hv.extension('bokeh')
hv.output(size=200)

# Prepare data structure for Sankey diagram
sources = []
targets = []
values = []

# Create nodes mapping (provinces to indices)
provinces = pd.concat([
    top10_viajeros_totales_tenerife['provincia_origen_name'],
    top10_viajeros_totales_tenerife['provincia_destino_name']
]).unique()
node_map = {province: idx for idx, province in enumerate(provinces)}

# Create links data
for _, row in top10_viajeros_totales_tenerife.iterrows():
    sources.append(node_map[row['provincia_origen_name']])
    targets.append(node_map[row['provincia_destino_name']]) 
    values.append(row['viajeros'])
edges = [[source, target, value] for source, target, value in zip(sources, targets, values)]

# Create nodes dataset
nodes = hv.Dataset(enumerate(list(provinces)), 'index', 'label')

# Create Sankey diagram
sankey = hv.Sankey(
    data=(edges, nodes),
    kdims=['source', 'target'],
    vdims=['viajeros']
)

# Format visualization
sankey.opts(
    opts.Sankey(
        labels='label',
        label_position='outer',
        edge_color=dim('source').str(),
        node_color=dim('index').str(),
        width=700,
        height=400,
        cmap='Category20',
        title='Flujo total de viajeros hacia Santa Cruz de Tenerife 2022-2024'
    )
)

sankey