## 1. Imports y Configuraci√≥n

In [None]:
import pandas as pd
import numpy as np
from pathlib import Path
import pickle

# Visualizaci√≥n
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.graph_objects as go
import plotly.express as px
from plotly.subplots import make_subplots

# Widgets interactivos
import ipywidgets as widgets
from IPython.display import display, HTML

# DTW
from dtaidistance import dtw

# Configuraci√≥n de estilo
plt.style.use('seaborn-v0_8')
plt.rcParams['figure.figsize'] = (14, 8)
sns.set_palette("husl")

# Paths
project_root = Path.cwd().parent.parent
data_dir = project_root / 'data'
cache_dir = data_dir / 'outputs' / 'cache'
clusters_dir = data_dir / 'clusters_input'

print(f"üìÅ Directorio de proyecto: {project_root}")
print(f"üìÅ Directorio de cache: {cache_dir}")
print(f"\n‚úÖ Librer√≠as cargadas exitosamente")

üìÅ Directorio de proyecto: /Users/mkurno/Documents/GitHub/evolucion_economica_argentina
üìÅ Directorio de cache: /Users/mkurno/Documents/GitHub/evolucion_economica_argentina/data/outputs/cache

‚úÖ Librer√≠as cargadas exitosamente


## 2. Cargar Datos

Cargamos:
1. Cache de distancias DTW
2. Datos completos de GNI
3. Informaci√≥n de pa√≠ses

In [2]:
# Cargar cache de distancias DTW
print("üì¶ Cargando cache de distancias DTW...")
distances_file = cache_dir / 'dtw_distances_matrix.csv'

if not distances_file.exists():
    raise FileNotFoundError(f"‚ùå No se encontr√≥ el archivo de cache: {distances_file}\n"
                          "   Ejecuta primero el notebook 01_Generate_DTW_Cache.ipynb")

df_distances = pd.read_csv(distances_file)
print(f"   ‚úÖ Distancias cargadas: {len(df_distances):,} pares")
print(f"   üìä Columnas: {list(df_distances.columns)}")

# Filtrar pares v√°lidos (con superposici√≥n)
df_distances_valid = df_distances[~np.isinf(df_distances['dtw_distance'])].copy()
print(f"   ‚úÖ Pares v√°lidos (con superposici√≥n): {len(df_distances_valid):,}")

# Obtener lista de pa√≠ses √∫nicos en la cache
countries_in_cache = set(df_distances['country1'].unique()) | set(df_distances['country2'].unique())
print(f"   üåç Pa√≠ses √∫nicos en cache: {len(countries_in_cache)}")

# Cargar datos completos de GNI
print("\nüìä Cargando datos de GNI...")
indicadores_file = data_dir / 'indicadores' / 'todos_los_datos.csv'
df_indicadores = pd.read_csv(indicadores_file, index_col=0)

# Extraer datos de GNI
gni_data = df_indicadores[['country_code', 'country_name', 'year', 'gni']].copy()
gni_data = gni_data.dropna(subset=['gni'])

# FILTRAR solo pa√≠ses en cache
gni_data = gni_data[gni_data['country_code'].isin(countries_in_cache)].copy()

# FILTRAR solo a√±os que participaron en la cache
# (para detectar el rango, buscamos el min/max a√±o que tienen TODOS los pa√≠ses de la cache)
year_ranges = []
for country in countries_in_cache:
    country_years = gni_data[gni_data['country_code'] == country]['year']
    if len(country_years) > 0:
        year_ranges.append((country_years.min(), country_years.max()))

if year_ranges:
    # El rango com√∫n es el m√°ximo de los m√≠nimos y el m√≠nimo de los m√°ximos
    cache_year_start = max(yr[0] for yr in year_ranges)
    cache_year_end = min(yr[1] for yr in year_ranges)
    
    # Filtrar a√±os
    gni_data = gni_data[
        (gni_data['year'] >= cache_year_start) & 
        (gni_data['year'] <= cache_year_end)
    ].copy()
    
    print(f"   ‚úÖ Datos GNI (filtrados a pa√≠ses y a√±os de cache): {gni_data.shape}")
    print(f"   üåç Pa√≠ses: {gni_data['country_code'].nunique()}")
    print(f"   üìÖ Rango a√±os de la cache: {cache_year_start} - {cache_year_end}")
else:
    print(f"   ‚úÖ Datos GNI (filtrados a pa√≠ses en cache): {gni_data.shape}")
    print(f"   üåç Pa√≠ses: {gni_data['country_code'].nunique()}")
    print(f"   üìÖ Rango a√±os: {gni_data['year'].min()} - {gni_data['year'].max()}")

# Crear lookup de nombres de pa√≠ses
country_names = gni_data[['country_code', 'country_name']].drop_duplicates().set_index('country_code')['country_name'].to_dict()
print(f"\n‚úÖ Total de pa√≠ses disponibles: {len(country_names)}")

üì¶ Cargando cache de distancias DTW...
   ‚úÖ Distancias cargadas: 9,870 pares
   üìä Columnas: ['country1', 'country2', 'dtw_distance', 'overlap_years']
   ‚úÖ Pares v√°lidos (con superposici√≥n): 9,870
   üåç Pa√≠ses √∫nicos en cache: 141

üìä Cargando datos de GNI...
   ‚úÖ Datos GNI (filtrados a pa√≠ses y a√±os de cache): (4653, 4)
   üåç Pa√≠ses: 141
   üìÖ Rango a√±os de la cache: 1989 - 2021

‚úÖ Total de pa√≠ses disponibles: 141


## 3. Funciones de Utilidad

In [3]:
def get_country_series(country_code, standardize=False):
    """
    Obtiene la serie temporal de GNI para un pa√≠s.
    
    Args:
        country_code: C√≥digo del pa√≠s
        standardize: Si True, retorna serie estandarizada (Z-score)
    
    Returns:
        years, values: Arrays con a√±os y valores GNI
    """
    country_data = gni_data[gni_data['country_code'] == country_code].sort_values('year')
    
    years = country_data['year'].values
    values = country_data['gni'].values
    
    if standardize and len(values) > 1:
        mean_val = np.mean(values)
        std_val = np.std(values)
        if std_val > 0:
            values = (values - mean_val) / std_val
    
    return years, values


def get_dtw_distance(country1, country2):
    """
    Obtiene la distancia DTW entre dos pa√≠ses desde la cache.
    
    Args:
        country1: C√≥digo del primer pa√≠s
        country2: C√≥digo del segundo pa√≠s
    
    Returns:
        dtw_distance, overlap_years: Distancia DTW y a√±os de superposici√≥n
    """
    # Buscar en ambas direcciones
    mask1 = (df_distances_valid['country1'] == country1) & (df_distances_valid['country2'] == country2)
    mask2 = (df_distances_valid['country1'] == country2) & (df_distances_valid['country2'] == country1)
    
    result = df_distances_valid[mask1 | mask2]
    
    if len(result) > 0:
        row = result.iloc[0]
        return row['dtw_distance'], int(row['overlap_years'])
    else:
        return None, 0


def get_overlap_years(country1, country2):
    """
    Encuentra los a√±os superpuestos entre dos pa√≠ses.
    
    Args:
        country1: C√≥digo del primer pa√≠s
        country2: C√≥digo del segundo pa√≠s
    
    Returns:
        overlap_years: Array con a√±os superpuestos
    """
    years1, _ = get_country_series(country1)
    years2, _ = get_country_series(country2)
    
    return np.intersect1d(years1, years2)


def get_top_similar_countries(country_code, n=10):
    """
    Obtiene los N pa√≠ses m√°s similares a un pa√≠s dado.
    
    Args:
        country_code: C√≥digo del pa√≠s
        n: N√∫mero de pa√≠ses m√°s similares a retornar
    
    Returns:
        DataFrame con pa√≠ses m√°s similares
    """
    # Buscar todas las comparaciones con este pa√≠s
    mask1 = df_distances_valid['country1'] == country_code
    mask2 = df_distances_valid['country2'] == country_code
    
    comparisons = df_distances_valid[mask1 | mask2].copy()
    
    # Normalizar para que el pa√≠s est√© siempre en country1
    mask = comparisons['country2'] == country_code
    comparisons.loc[mask, ['country1', 'country2']] = comparisons.loc[mask, ['country2', 'country1']].values
    
    # Agregar nombres
    comparisons['country2_name'] = comparisons['country2'].map(country_names)
    
    # Ordenar por distancia y retornar top N
    top_similar = comparisons.nsmallest(n, 'dtw_distance')
    
    return top_similar[['country2', 'country2_name', 'dtw_distance', 'overlap_years']]


print("‚úÖ Funciones de utilidad definidas")

‚úÖ Funciones de utilidad definidas


In [4]:
get_country_series ('ARG', standardize=True)
get_country_series ('BRA', standardize=True)


(array([1989, 1990, 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999,
        2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010,
        2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018, 2019, 2020, 2021]),
 array([-1.0897927 , -1.08814671, -1.16099379, -1.18220873, -1.11934513,
        -0.98118032, -0.65086598, -0.53696277, -0.49509331, -0.52731085,
        -0.90201841, -0.82358099, -0.95773266, -1.02724897, -0.9595494 ,
        -0.80525877, -0.49901555, -0.19601912,  0.21099021,  0.61701487,
         0.58281246,  1.30544105,  1.88343872,  1.67788861,  1.72706434,
         1.6871711 ,  0.77842627,  0.76327574,  1.14170704,  0.91337108,
         0.8558483 ,  0.32122211,  0.53665227]))

In [5]:
get_overlap_years ('ARG', 'BRA')

array([1989, 1990, 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999,
       2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010,
       2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018, 2019, 2020, 2021])

## 4. Dashboard Interactivo Principal

Selecciona dos pa√≠ses para comparar sus trayectorias de GNI y visualizar el an√°lisis DTW.

In [6]:
def create_comparison_dashboard():
    """
    Crea un dashboard interactivo para comparar dos pa√≠ses.
    """
    # Obtener lista de pa√≠ses ordenada
    countries = sorted(country_names.keys())
    country_options = [(f"{code} - {country_names[code]}", code) for code in countries]
    
    # Widgets de selecci√≥n de pa√≠ses
    country1_dropdown = widgets.Dropdown(
        options=country_options,
        value='ARG',
        description='Pa√≠s 1:',
        style={'description_width': '100px'},
        layout=widgets.Layout(width='400px')
    )
    
    country2_dropdown = widgets.Dropdown(
        options=country_options,
        value='URY',
        description='Pa√≠s 2:',
        style={'description_width': '100px'},
        layout=widgets.Layout(width='400px')
    )
    
    # Checkbox para series estandarizadas
    standardize_checkbox = widgets.Checkbox(
        value=True,
        description='Usar series estandarizadas (Z-score)',
        style={'description_width': 'initial'}
    )
    
    # Checkbox para mostrar path DTW
    show_path_checkbox = widgets.Checkbox(
        value=True,
        description='Mostrar path de alineamiento DTW',
        style={'description_width': 'initial'}
    )
    
    # Bot√≥n de actualizaci√≥n
    update_button = widgets.Button(
        description='üîÑ Actualizar Visualizaci√≥n',
        button_style='primary',
        layout=widgets.Layout(width='250px')
    )
    
    # Output widget
    output = widgets.Output()
    
    def update_visualization(b):
        """Actualiza la visualizaci√≥n cuando se hace clic en el bot√≥n."""
        with output:
            output.clear_output(wait=True)
            
            country1 = country1_dropdown.value
            country2 = country2_dropdown.value
            use_standardized = standardize_checkbox.value
            show_path = show_path_checkbox.value
            
            if country1 == country2:
                print("‚ö†Ô∏è  Por favor selecciona dos pa√≠ses diferentes")
                return
            
            # Obtener nombres
            name1 = country_names[country1]
            name2 = country_names[country2]
            
            # Obtener series temporales
            years1, values1 = get_country_series(country1, use_standardized)
            years2, values2 = get_country_series(country2, use_standardized)
            
            # Obtener a√±os superpuestos
            overlap_years = get_overlap_years(country1, country2)
            
            # Obtener distancia DTW
            dtw_distance, overlap_count = get_dtw_distance(country1, country2)
            
            # Crear visualizaci√≥n
            data_type = "Estandarizado (Z-score)" if use_standardized else "Absoluto (USD)"
            y_label = "GNI (Z-score)" if use_standardized else "GNI (USD)"
            
            if show_path:
                # Crear subplot con 2 columnas: series + path (lado a lado)
                fig = make_subplots(
                    rows=1, cols=2,
                    column_widths=[0.5, 0.5],
                    subplot_titles=(
                        f'Series Temporales de GNI - {data_type}',
                        'Path de Alineamiento DTW'
                    ),
                    horizontal_spacing=0.10
                )
                
                # Series temporales completas (fila 1)
                fig.add_trace(
                    go.Scatter(
                        x=years1,
                        y=values1,
                        mode='lines+markers',
                        name=f"{name1} ({country1})",
                        line=dict(color='darkgreen', width=2.5),
                        marker=dict(size=5),
                        hovertemplate=f'<b>{name1}</b><br>A√±o: %{{x}}<br>{y_label}: %{{y:.2f}}<extra></extra>',
                        legendgroup='countries',
                        legendgrouptitle_text='Pa√≠ses'
                    ),
                    row=1, col=1
                )
                
                fig.add_trace(
                    go.Scatter(
                        x=years2,
                        y=values2,
                        mode='lines+markers',
                        name=f"{name2} ({country2})",
                        line=dict(color='darkblue', width=2.5),
                        marker=dict(size=5),
                        hovertemplate=f'<b>{name2}</b><br>A√±o: %{{x}}<br>{y_label}: %{{y:.2f}}<extra></extra>',
                        legendgroup='countries',
                        legendgrouptitle_text='Pa√≠ses'
                    ),
                    row=1, col=1
                )
                
                # Resaltar per√≠odo superpuesto
                if len(overlap_years) > 0:
                    fig.add_vrect(
                        x0=overlap_years[0],
                        x1=overlap_years[-1],
                        fillcolor="lightgray",
                        opacity=0.2,
                        line_width=0,
                        annotation_text="Per√≠odo superpuesto",
                        annotation_position="top left",
                        row=1, col=1
                    )
                
                # Path de alineamiento DTW (columna 2)
                # Extraer solo datos superpuestos para calcular path
                idx1 = np.isin(years1, overlap_years)
                idx2 = np.isin(years2, overlap_years)
                
                series1_overlap = values1[idx1]
                series2_overlap = values2[idx2]
                years1_overlap = years1[idx1]
                years2_overlap = years2[idx2]
                
                # Calcular path DTW
                path = dtw.warping_path(series1_overlap, series2_overlap)
                
                # Convertir √≠ndices a a√±os reales
                path_years1 = [years1_overlap[i] for i, j in path]
                path_years2 = [years2_overlap[j] for i, j in path]
                
                # Graficar path
                fig.add_trace(
                    go.Scatter(
                        x=path_years1,
                        y=path_years2,
                        mode='lines+markers',
                        name='Path DTW',
                        line=dict(color='red', width=1.5),
                        marker=dict(size=3, color='red'),
                        hovertemplate=f'{name1}: %{{x}}<br>{name2}: %{{y}}<extra></extra>',
                        showlegend=False  # Ocultamos de la leyenda principal
                    ),
                    row=1, col=2
                )
                
                # L√≠nea diagonal (alineamiento perfecto)
                fig.add_trace(
                    go.Scatter(
                        x=[overlap_years[0], overlap_years[-1]],
                        y=[overlap_years[0], overlap_years[-1]],
                        mode='lines',
                        name='Alineamiento 1:1',
                        line=dict(color='gray', width=1, dash='dash'),
                        hoverinfo='skip',
                        showlegend=False  # Ocultamos de la leyenda principal
                    ),
                    row=1, col=2
                )
                
                # Actualizar ejes
                fig.update_xaxes(title_text="A√±o", row=1, col=1)
                fig.update_yaxes(title_text=y_label, row=1, col=1)
                fig.update_xaxes(title_text=f"A√±o - {name1}", dtick=5, row=1, col=2)
                fig.update_yaxes(title_text=f"A√±o - {name2}", dtick=5, row=1, col=2)
                
                fig.update_layout(
                    height=700,
                    width=1400,
                    showlegend=True,
                    template='plotly_white',
                    hovermode='closest',
                    legend=dict(
                        x=0.01,
                        y=0.99,
                        xanchor='left',
                        yanchor='top',
                        bgcolor='rgba(255,255,255,0.8)',
                        bordercolor='rgba(0,0,0,0.2)',
                        borderwidth=1
                    )
                )
                
                # Agregar leyenda personalizada para el subplot 2 como anotaci√≥n
                fig.add_annotation(
                    x=0.55,
                    y=0.98,
                    xref='paper',
                    yref='paper',
                    text='<b>Alineamiento</b><br>' +
                         '<span style="color:red;">‚îÅ‚îÅ‚îÅ</span> Path DTW<br>' +
                         '<span style="color:gray;">- - -</span> Alineamiento 1:1',
                    showarrow=False,
                    align='left',
                    xanchor='left',
                    yanchor='top',
                    bgcolor='rgba(255,255,255,0.8)',
                    bordercolor='rgba(0,0,0,0.2)',
                    borderwidth=1,
                    borderpad=8
                )
                
            else:
                # Solo series temporales
                fig = go.Figure()
                
                fig.add_trace(
                    go.Scatter(
                        x=years1,
                        y=values1,
                        mode='lines+markers',
                        name=f"{name1} ({country1})",
                        line=dict(color='darkgreen', width=3),
                        marker=dict(size=6),
                        hovertemplate=f'<b>{name1}</b><br>A√±o: %{{x}}<br>{y_label}: %{{y:.2f}}<extra></extra>'
                    )
                )
                
                fig.add_trace(
                    go.Scatter(
                        x=years2,
                        y=values2,
                        mode='lines+markers',
                        name=f"{name2} ({country2})",
                        line=dict(color='darkblue', width=3),
                        marker=dict(size=6),
                        hovertemplate=f'<b>{name2}</b><br>A√±o: %{{x}}<br>{y_label}: %{{y:.2f}}<extra></extra>'
                    )
                )
                
                # Resaltar per√≠odo superpuesto
                if len(overlap_years) > 0:
                    fig.add_vrect(
                        x0=overlap_years[0],
                        x1=overlap_years[-1],
                        fillcolor="lightgray",
                        opacity=0.2,
                        line_width=0,
                        annotation_text="Per√≠odo superpuesto para DTW",
                        annotation_position="top left"
                    )
                
                fig.update_layout(
                    title=f'Comparaci√≥n de Series Temporales - {data_type}<br>' +
                          f'<sub>DTW Distance: {dtw_distance:.4f} | {overlap_count} a√±os superpuestos</sub>',
                    xaxis_title='A√±o',
                    yaxis_title=y_label,
                    height=600,
                    width=1100,
                    template='plotly_white',
                    hovermode='x unified'
                )
            
            fig.show()
            
            # Informaci√≥n general (DESPU√âS del gr√°fico)
            print(f"\n{'='*80}")
            print(f"üìä COMPARACI√ìN: {name1} ({country1}) vs {name2} ({country2})")
            print(f"{'='*80}\n")
            
            print(f"üìà Informaci√≥n de series:")
            print(f"   {name1}: {years1[0]} - {years1[-1]} ({len(years1)} a√±os)")
            print(f"   {name2}: {years2[0]} - {years2[-1]} ({len(years2)} a√±os)")
            print(f"   Superposici√≥n: {overlap_years[0]} - {overlap_years[-1]} ({len(overlap_years)} a√±os)\n")
            
            if dtw_distance is not None:
                print(f"üéØ Distancia DTW: {dtw_distance:.4f}")
                print(f"   (Calculada sobre {overlap_count} a√±os superpuestos)\n")
            else:
                print(f"‚ö†Ô∏è  No hay distancia DTW calculada para este par\n")
            
            # Mostrar rankings
            print(f"\n{'='*80}")
            print(f"üèÜ Top 10 pa√≠ses m√°s similares a {name1} ({country1}):")
            print(f"{'='*80}\n")
            
            top_similar_1 = get_top_similar_countries(country1, 10)
            for idx, row in top_similar_1.iterrows():
                marker = "üëâ" if row['country2'] == country2 else "  "
                print(f"{marker} {row['country2']:5s} - {row['country2_name']:30s} | "
                      f"DTW: {row['dtw_distance']:7.4f} | {int(row['overlap_years']):2d} a√±os")
            
            print(f"\n{'='*80}")
            print(f"üèÜ Top 10 pa√≠ses m√°s similares a {name2} ({country2}):")
            print(f"{'='*80}\n")
            
            top_similar_2 = get_top_similar_countries(country2, 10)
            for idx, row in top_similar_2.iterrows():
                marker = "üëâ" if row['country2'] == country1 else "  "
                print(f"{marker} {row['country2']:5s} - {row['country2_name']:30s} | "
                      f"DTW: {row['dtw_distance']:7.4f} | {int(row['overlap_years']):2d} a√±os")
    
    # Conectar bot√≥n con funci√≥n
    update_button.on_click(update_visualization)
    
    # Layout del dashboard
    controls = widgets.VBox([
        widgets.HTML("<h3>üéõÔ∏è Controles de Comparaci√≥n</h3>"),
        widgets.HBox([country1_dropdown, country2_dropdown]),
        widgets.HBox([standardize_checkbox, show_path_checkbox]),
        update_button,
        widgets.HTML("<hr>")
    ])
    
    display(controls, output)
    
    # Trigger inicial
    update_visualization(None)


# Crear dashboard
create_comparison_dashboard()

VBox(children=(HTML(value='<h3>üéõÔ∏è Controles de Comparaci√≥n</h3>'), HBox(children=(Dropdown(description='Pa√≠s 1‚Ä¶

Output()

## 5. Heatmap de Distancias DTW

Visualizaci√≥n de matriz de distancias entre grupos de pa√≠ses seleccionados.

In [7]:
def create_heatmap_widget():
    """
    Crea un widget interactivo para visualizar heatmap de distancias DTW.
    """
    # Pa√≠ses predefinidos interesantes
    regions = {
        'Am√©rica Latina': ['ARG', 'BRA', 'CHL', 'COL', 'MEX', 'PER', 'URY', 'VEN'],
        'Europa Occidental': ['FRA', 'DEU', 'GBR', 'ITA', 'ESP', 'NLD', 'BEL', 'AUT'],
        'Asia-Pac√≠fico': ['CHN', 'JPN', 'KOR', 'IND', 'IDN', 'THA', 'MYS', 'SGP'],
        'BRICS': ['BRA', 'RUS', 'IND', 'CHN', 'ZAF'],
        'G7': ['USA', 'JPN', 'DEU', 'GBR', 'FRA', 'ITA', 'CAN'],
        'Top 10 Similar a Argentina': None  # Se llenar√° din√°micamente
    }
    
    # Widget de selecci√≥n de regi√≥n
    region_dropdown = widgets.Dropdown(
        options=list(regions.keys()),
        value='Am√©rica Latina',
        description='Regi√≥n:',
        style={'description_width': '100px'},
        layout=widgets.Layout(width='400px')
    )
    
    # Bot√≥n de actualizaci√≥n
    update_button = widgets.Button(
        description='üîÑ Generar Heatmap',
        button_style='info',
        layout=widgets.Layout(width='200px')
    )
    
    # Output
    output = widgets.Output()
    
    def update_heatmap(b):
        with output:
            output.clear_output(wait=True)
            
            region_name = region_dropdown.value
            
            # Obtener lista de pa√≠ses
            if region_name == 'Top 10 Similar a Argentina':
                top_similar = get_top_similar_countries('ARG', 10)
                selected_countries = ['ARG'] + top_similar['country2'].tolist()
            else:
                selected_countries = regions[region_name]
            
            # Filtrar pa√≠ses que existen en los datos
            selected_countries = [c for c in selected_countries if c in country_names]
            
            if len(selected_countries) < 2:
                print(f"‚ö†Ô∏è  No hay suficientes pa√≠ses disponibles para {region_name}")
                return
            
            print(f"üìä Generando heatmap para: {region_name}")
            print(f"   Pa√≠ses: {', '.join(selected_countries)}\n")
            
            # Crear matriz de distancias
            n = len(selected_countries)
            distance_matrix = np.zeros((n, n))
            
            for i, c1 in enumerate(selected_countries):
                for j, c2 in enumerate(selected_countries):
                    if i == j:
                        distance_matrix[i, j] = 0
                    else:
                        dist, _ = get_dtw_distance(c1, c2)
                        distance_matrix[i, j] = dist if dist is not None else np.nan
            
            # Crear labels con nombres de pa√≠ses
            labels = [f"{c}\n{country_names[c][:15]}" for c in selected_countries]
            
            # Crear heatmap con plotly
            fig = go.Figure(data=go.Heatmap(
                z=distance_matrix,
                x=labels,
                y=labels,
                colorscale='RdYlGn_r',
                text=distance_matrix,
                texttemplate='%{text:.3f}',
                textfont={"size": 10},
                colorbar=dict(title="DTW Distance"),
                hovertemplate='%{y} vs %{x}<br>DTW: %{z:.4f}<extra></extra>'
            ))
            
            fig.update_layout(
                title=f'Heatmap de Distancias DTW - {region_name}<br>' +
                      '<sub>Colores m√°s verdes = mayor similitud</sub>',
                width=900,
                height=800,
                xaxis=dict(side='bottom'),
                yaxis=dict(side='left')
            )
            
            fig.show()
            
            # Estad√≠sticas
            valid_distances = distance_matrix[~np.isnan(distance_matrix) & (distance_matrix > 0)]
            if len(valid_distances) > 0:
                print(f"\nüìà Estad√≠sticas de distancias:")
                print(f"   M√≠nima: {valid_distances.min():.4f}")
                print(f"   M√°xima: {valid_distances.max():.4f}")
                print(f"   Media: {valid_distances.mean():.4f}")
                print(f"   Mediana: {np.median(valid_distances):.4f}")
    
    update_button.on_click(update_heatmap)
    
    controls = widgets.VBox([
        widgets.HTML("<h3>üó∫Ô∏è Heatmap de Distancias por Regi√≥n</h3>"),
        region_dropdown,
        update_button,
        widgets.HTML("<hr>")
    ])
    
    display(controls, output)
    
    # Trigger inicial
    update_heatmap(None)


# Crear widget de heatmap
create_heatmap_widget()

VBox(children=(HTML(value='<h3>üó∫Ô∏è Heatmap de Distancias por Regi√≥n</h3>'), Dropdown(description='Regi√≥n:', lay‚Ä¶

Output()

## 6. Explorador de Rankings

Explora los pa√≠ses m√°s y menos similares a un pa√≠s seleccionado.

In [8]:
# Cargar datos de clusters para el explorador de rankings
print("üìä Cargando datos de clusters para rankings...")

# Cluster Banco Mundial
clusters_bm_file = data_dir / 'analisis' / 'clusters_2022_bco_mundial.csv'
df_clusters_bm = pd.read_csv(clusters_bm_file)
print(f"   ‚úÖ Clusters BM cargados: {len(df_clusters_bm)} pa√≠ses")

# Cluster K-Means (4 clusters basado en GNI index)
clusters_km_file = data_dir / 'analisis' / 'clusters_2022_4C_gni_index.csv'
df_clusters_km = pd.read_csv(clusters_km_file)
print(f"   ‚úÖ Clusters K-Means cargados: {len(df_clusters_km)} pa√≠ses")

# Cluster K-Means DTW (4 clusters basado en distancias DTW)
clusters_km_dtw_file = data_dir / 'analisis' / 'clusters_dtw_4C.csv'
df_clusters_km_dtw = pd.read_csv(clusters_km_dtw_file)
print(f"   ‚úÖ Clusters K-Means DTW cargados: {len(df_clusters_km_dtw)} pa√≠ses")

# Crear diccionarios de lookup
cluster_bm_lookup = df_clusters_bm.set_index('country_code')['Cluster Mapped'].to_dict()
cluster_km_lookup = df_clusters_km.set_index('country_code')['Cluster Mapped'].to_dict()
cluster_km_dtw_lookup = df_clusters_km_dtw.set_index('country_code')['cluster_name'].to_dict()

# Crear lookup de regiones desde el CSV del Banco Mundial
region_lookup = df_clusters_bm.set_index('country_code')['Region'].to_dict()

print(f"\n‚úÖ Lookups creados: {len(cluster_bm_lookup)} pa√≠ses BM, {len(cluster_km_lookup)} K-Means, {len(cluster_km_dtw_lookup)} K-Means DTW")
print(f"‚úÖ Lookup de regiones: {len(region_lookup)} pa√≠ses")

üìä Cargando datos de clusters para rankings...


FileNotFoundError: [Errno 2] No such file or directory: '/Users/mkurno/Documents/GitHub/evolucion_economica_argentina/data/analisis/clusters_2022_bco_mundial.csv'

In [None]:
# Desagregar regiones agrupadas del Banco Mundial
print("\nüó∫Ô∏è  Desagregando regiones agrupadas...")

# Mapeo de pa√≠ses a regiones espec√≠ficas
region_mapping = {
    # Europe & Central Asia ‚Üí separar en Europa y Asia Central
    'ALB': 'Europa', 'ARM': 'Asia Central', 'AUT': 'Europa', 'AZE': 'Asia Central',
    'BLR': 'Europa', 'BEL': 'Europa', 'BIH': 'Europa', 'BGR': 'Europa',
    'HRV': 'Europa', 'CYP': 'Europa', 'CZE': 'Europa', 'DNK': 'Europa',
    'EST': 'Europa', 'FIN': 'Europa', 'FRA': 'Europa', 'FRO': 'Europa',
    'GEO': 'Asia Central', 'DEU': 'Europa', 'GRC': 'Europa', 'HUN': 'Europa',
    'ISL': 'Europa', 'IRL': 'Europa', 'ITA': 'Europa', 'KAZ': 'Asia Central',
    'XKX': 'Europa', 'KGZ': 'Asia Central', 'LVA': 'Europa', 'LIE': 'Europa',
    'LTU': 'Europa', 'LUX': 'Europa', 'MKD': 'Europa', 'MDA': 'Europa',
    'MCO': 'Europa', 'MNE': 'Europa', 'NLD': 'Europa', 'NOR': 'Europa',
    'POL': 'Europa', 'PRT': 'Europa', 'ROU': 'Europa', 'RUS': 'Europa',
    'SMR': 'Europa', 'SRB': 'Europa', 'SVK': 'Europa', 'SVN': 'Europa',
    'ESP': 'Europa', 'SWE': 'Europa', 'CHE': 'Europa', 'TJK': 'Asia Central',
    'TUR': 'Europa', 'TKM': 'Asia Central', 'UKR': 'Europa', 'GBR': 'Europa',
    'UZB': 'Asia Central', 'VAT': 'Europa',
    
    # Latin America & Caribbean ‚Üí separar en Am√©rica Latina y Caribe
    'ARG': 'Am√©rica Latina', 'ABW': 'Caribe', 'ATG': 'Caribe', 'BHS': 'Caribe',
    'BRB': 'Caribe', 'BLZ': 'Am√©rica Latina', 'BOL': 'Am√©rica Latina',
    'BRA': 'Am√©rica Latina', 'CHL': 'Am√©rica Latina', 'COL': 'Am√©rica Latina',
    'CRI': 'Am√©rica Latina', 'CUB': 'Caribe', 'DMA': 'Caribe', 'DOM': 'Caribe',
    'ECU': 'Am√©rica Latina', 'SLV': 'Am√©rica Latina', 'GRD': 'Caribe',
    'GTM': 'Am√©rica Latina', 'GUY': 'Am√©rica Latina', 'HTI': 'Caribe',
    'HND': 'Am√©rica Latina', 'JAM': 'Caribe', 'MEX': 'Am√©rica Latina',
    'NIC': 'Am√©rica Latina', 'PAN': 'Am√©rica Latina', 'PRY': 'Am√©rica Latina',
    'PER': 'Am√©rica Latina', 'PRI': 'Caribe', 'KNA': 'Caribe', 'LCA': 'Caribe',
    'VCT': 'Caribe', 'SUR': 'Am√©rica Latina', 'SXM': 'Caribe', 'TCA': 'Caribe',
    'TTO': 'Caribe', 'URY': 'Am√©rica Latina', 'VEN': 'Am√©rica Latina',
    
    # Middle East & North Africa ‚Üí separar en Medio Oriente y Norte de √Åfrica
    'DZA': 'Norte de √Åfrica', 'BHR': 'Medio Oriente', 'DJI': 'Norte de √Åfrica',
    'EGY': 'Norte de √Åfrica', 'IRN': 'Medio Oriente', 'IRQ': 'Medio Oriente',
    'ISR': 'Medio Oriente', 'JOR': 'Medio Oriente', 'KWT': 'Medio Oriente',
    'LBN': 'Medio Oriente', 'LBY': 'Norte de √Åfrica', 'MLT': 'Europa',
    'MAR': 'Norte de √Åfrica', 'OMN': 'Medio Oriente', 'PSE': 'Medio Oriente',
    'QAT': 'Medio Oriente', 'SAU': 'Medio Oriente', 'SYR': 'Medio Oriente',
    'TUN': 'Norte de √Åfrica', 'ARE': 'Medio Oriente', 'YEM': 'Medio Oriente'
}

# Aplicar mapeo a los DataFrames
df_clusters_bm['Region'] = df_clusters_bm.apply(
    lambda row: region_mapping.get(row['country_code'], row['Region']), 
    axis=1
)

# Actualizar region_lookup
region_lookup = df_clusters_bm.set_index('country_code')['Region'].to_dict()

# Mostrar estad√≠sticas
print("\nüìä Distribuci√≥n de regiones despu√©s de desagregar:")
region_counts = df_clusters_bm['Region'].value_counts()
for region, count in region_counts.items():
    print(f"   ‚Ä¢ {region}: {count} pa√≠ses")

print(f"\n‚úÖ Regiones desagregadas correctamente")
print(f"‚úÖ Total de regiones √∫nicas: {df_clusters_bm['Region'].nunique()}")


üó∫Ô∏è  Desagregando regiones agrupadas...

üìä Distribuci√≥n de regiones despu√©s de desagregar:
   ‚Ä¢ Sub-Saharan Africa: 44 pa√≠ses
   ‚Ä¢ Europa: 42 pa√≠ses
   ‚Ä¢ East Asia & Pacific: 29 pa√≠ses
   ‚Ä¢ Am√©rica Latina: 19 pa√≠ses
   ‚Ä¢ Caribe: 16 pa√≠ses
   ‚Ä¢ Medio Oriente: 11 pa√≠ses
   ‚Ä¢ Asia Central: 8 pa√≠ses
   ‚Ä¢ South Asia: 6 pa√≠ses
   ‚Ä¢ Norte de √Åfrica: 6 pa√≠ses
   ‚Ä¢ North America: 3 pa√≠ses

‚úÖ Regiones desagregadas correctamente
‚úÖ Total de regiones √∫nicas: 10


In [None]:
def create_ranking_explorer():
    """
    Crea un explorador de rankings de similitud en formato tabla.
    """
    # Obtener lista √∫nica de regiones del CSV (ordenadas)
    unique_regions = sorted(df_clusters_bm['Region'].dropna().unique())
    region_options = ['Todas'] + unique_regions
    
    # Obtener lista √∫nica de clusters Banco Mundial
    unique_clusters_bm = sorted(df_clusters_bm['Cluster Mapped'].dropna().unique())
    cluster_bm_options = ['Todos'] + unique_clusters_bm
    
    # Obtener lista √∫nica de clusters K-means
    unique_clusters_km = sorted(df_clusters_km['Cluster Mapped'].dropna().unique())
    cluster_km_options = ['Todos'] + unique_clusters_km
    
    # Obtener lista √∫nica de clusters K-means DTW
    unique_clusters_km_dtw = sorted(df_clusters_km_dtw['cluster_name'].dropna().unique())
    cluster_km_dtw_options = ['Todos'] + unique_clusters_km_dtw
    
    # Obtener lista de pa√≠ses ordenada
    countries = sorted(country_names.keys())
    country_options = [(f"{code} - {country_names[code]}", code) for code in countries]
    
    # Widget de selecci√≥n de pa√≠s
    country_dropdown = widgets.Dropdown(
        options=country_options,
        value='ARG',
        description='Pa√≠s:',
        style={'description_width': '100px'},
        layout=widgets.Layout(width='400px')
    )
    
    # Widget de selecci√≥n de regi√≥n
    region_dropdown = widgets.Dropdown(
        options=region_options,
        value='Todas',
        description='Regi√≥n:',
        style={'description_width': '100px'},
        layout=widgets.Layout(width='400px')
    )
    
    # Widget de selecci√≥n de cluster Banco Mundial
    cluster_bm_dropdown = widgets.Dropdown(
        options=cluster_bm_options,
        value='Todos',
        description='Cluster BM:',
        style={'description_width': '100px'},
        layout=widgets.Layout(width='400px')
    )
    
    # Widget de selecci√≥n de cluster K-means
    cluster_km_dropdown = widgets.Dropdown(
        options=cluster_km_options,
        value='Todos',
        description='Cluster K-M:',
        style={'description_width': '100px'},
        layout=widgets.Layout(width='400px')
    )
    
    # Widget de selecci√≥n de cluster K-means DTW
    cluster_km_dtw_dropdown = widgets.Dropdown(
        options=cluster_km_dtw_options,
        value='Todos',
        description='Cluster K-M DTW:',
        style={'description_width': '100px'},
        layout=widgets.Layout(width='400px')
    )
    
    # Slider para n√∫mero de pa√≠ses a mostrar
    n_countries_slider = widgets.IntSlider(
        value=20,
        min=10,
        max=50,
        step=5,
        description='Top N:',
        style={'description_width': '100px'},
        layout=widgets.Layout(width='400px')
    )
    
    # Bot√≥n de actualizaci√≥n
    update_button = widgets.Button(
        description='üîç Buscar Rankings',
        button_style='success',
        layout=widgets.Layout(width='200px')
    )
    
    # Output
    output = widgets.Output()
    
    def update_ranking(b):
        with output:
            output.clear_output(wait=True)
            
            country_code = country_dropdown.value
            n = n_countries_slider.value
            selected_region = region_dropdown.value
            selected_cluster_bm = cluster_bm_dropdown.value
            selected_cluster_km = cluster_km_dropdown.value
            selected_cluster_km_dtw = cluster_km_dtw_dropdown.value
            country_name = country_names[country_code]
            
            print(f"{'='*120}")
            print(f"üîç AN√ÅLISIS DE SIMILITUD: {country_name} ({country_code})")
            filters_applied = []
            if selected_region != 'Todas':
                filters_applied.append(f"Regi√≥n: {selected_region}")
            if selected_cluster_bm != 'Todos':
                filters_applied.append(f"Cluster BM: {selected_cluster_bm}")
            if selected_cluster_km != 'Todos':
                filters_applied.append(f"Cluster K-M: {selected_cluster_km}")
            if selected_cluster_km_dtw != 'Todos':
                filters_applied.append(f"Cluster K-M DTW: {selected_cluster_km_dtw}")
            if filters_applied:
                print(f"üìç Filtros aplicados: {' | '.join(filters_applied)}")
            print(f"{'='*120}\n")
            
            # Obtener todas las comparaciones
            mask1 = df_distances_valid['country1'] == country_code
            mask2 = df_distances_valid['country2'] == country_code
            comparisons = df_distances_valid[mask1 | mask2].copy()
            
            # Normalizar
            mask = comparisons['country2'] == country_code
            comparisons.loc[mask, ['country1', 'country2']] = comparisons.loc[mask, ['country2', 'country1']].values
            
            # Agregar informaci√≥n adicional
            comparisons['country2_name'] = comparisons['country2'].map(country_names)
            comparisons['region'] = comparisons['country2'].map(region_lookup).fillna('N/A')
            comparisons['cluster_bm'] = comparisons['country2'].map(cluster_bm_lookup).fillna('N/A')
            comparisons['cluster_km'] = comparisons['country2'].map(cluster_km_lookup).fillna('N/A')
            comparisons['cluster_km_dtw'] = comparisons['country2'].map(cluster_km_dtw_lookup).fillna('N/A')
            
            # Aplicar filtros
            if selected_region != 'Todas':
                comparisons = comparisons[comparisons['region'] == selected_region].copy()
            
            if selected_cluster_bm != 'Todos':
                comparisons = comparisons[comparisons['cluster_bm'] == selected_cluster_bm].copy()
            
            if selected_cluster_km != 'Todos':
                comparisons = comparisons[comparisons['cluster_km'] == selected_cluster_km].copy()
            
            if selected_cluster_km_dtw != 'Todos':
                comparisons = comparisons[comparisons['cluster_km_dtw'] == selected_cluster_km_dtw].copy()
            
            # Verificar si hay suficientes resultados
            countries_available = len(comparisons)
            if countries_available < n:
                filter_desc = "con los filtros aplicados" if filters_applied else "disponibles"
                print(f"‚ö†Ô∏è  Solo hay {countries_available} pa√≠ses {filter_desc}")
                print(f"   (se solicitaron Top {n}, mostrando los {countries_available} disponibles)\n")
            
            # Top similares (usar min entre n y pa√≠ses disponibles)
            n_actual = min(n, len(comparisons))
            top_similar = comparisons.nsmallest(n_actual, 'dtw_distance')
            
            # Crear tabla con pandas DataFrame para mejor formato
            table_similar = pd.DataFrame({
                'Rank': range(1, len(top_similar) + 1),
                'C√≥digo': top_similar['country2'].values,
                'Pa√≠s': top_similar['country2_name'].values,
                'Regi√≥n': top_similar['region'].values,
                'Cluster BM': top_similar['cluster_bm'].values,
                'Cluster K-M': top_similar['cluster_km'].values,
                'Cluster K-M DTW': top_similar['cluster_km_dtw'].values,
                'DTW Dist': top_similar['dtw_distance'].round(4).values,
                'A√±os': top_similar['overlap_years'].astype(int).values
            })
            
            # Mostrar como HTML con estilos
            from IPython.display import display, HTML
            
            html_similar = f"""
            <h3 style="color: #2E7D32; margin-top: 20px;">‚úÖ TOP {len(top_similar)} PA√çSES M√ÅS SIMILARES (menor distancia DTW)</h3>
            <div style="max-height: 500px; overflow-y: auto; border: 1px solid #ddd; border-radius: 5px;">
            """ + table_similar.to_html(index=False, classes='dataframe', border=0, escape=False) + """
            </div>
            <style>
                .dataframe {
                    width: 100%;
                    border-collapse: collapse;
                    font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif;
                    font-size: 12px;
                }
                .dataframe thead th {
                    background-color: #2E7D32;
                    color: white;
                    padding: 8px 10px;
                    text-align: left;
                    position: sticky;
                    top: 0;
                    z-index: 10;
                }
                .dataframe tbody tr {
                    border-bottom: 1px solid #ddd;
                }
                .dataframe tbody tr:nth-child(even) {
                    background-color: #f9f9f9;
                }
                .dataframe tbody tr:hover {
                    background-color: #e8f5e9;
                    cursor: pointer;
                }
                .dataframe td {
                    padding: 6px 10px;
                    text-align: left;
                }
                .dataframe td:first-child {
                    font-weight: bold;
                    color: #2E7D32;
                }
            </style>
            """
            
            display(HTML(html_similar))
            
            # Top diferentes
            top_different = comparisons.nlargest(n_actual, 'dtw_distance')
            
            table_different = pd.DataFrame({
                'Rank': range(1, len(top_different) + 1),
                'C√≥digo': top_different['country2'].values,
                'Pa√≠s': top_different['country2_name'].values,
                'Regi√≥n': top_different['region'].values,
                'Cluster BM': top_different['cluster_bm'].values,
                'Cluster K-M': top_different['cluster_km'].values,
                'Cluster K-M DTW': top_different['cluster_km_dtw'].values,
                'DTW Dist': top_different['dtw_distance'].round(4).values,
                'A√±os': top_different['overlap_years'].astype(int).values
            })
            
            html_different = f"""
            <h3 style="color: #C62828; margin-top: 30px;">‚ùå TOP {len(top_different)} PA√çSES M√ÅS DIFERENTES (mayor distancia DTW)</h3>
            <div style="max-height: 500px; overflow-y: auto; border: 1px solid #ddd; border-radius: 5px;">
            """ + table_different.to_html(index=False, classes='dataframe-diff', border=0, escape=False) + """
            </div>
            <style>
                .dataframe-diff {
                    width: 100%;
                    border-collapse: collapse;
                    font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif;
                    font-size: 12px;
                }
                .dataframe-diff thead th {
                    background-color: #C62828;
                    color: white;
                    padding: 8px 10px;
                    text-align: left;
                    position: sticky;
                    top: 0;
                    z-index: 10;
                }
                .dataframe-diff tbody tr {
                    border-bottom: 1px solid #ddd;
                }
                .dataframe-diff tbody tr:nth-child(even) {
                    background-color: #f9f9f9;
                }
                .dataframe-diff tbody tr:hover {
                    background-color: #ffebee;
                    cursor: pointer;
                }
                .dataframe-diff td {
                    padding: 6px 10px;
                    text-align: left;
                }
                .dataframe-diff td:first-child {
                    font-weight: bold;
                    color: #C62828;
                }
            </style>
            """
            
            display(HTML(html_different))
            
            # Estad√≠sticas generales
            print(f"\n{'='*120}")
            print(f"üìä ESTAD√çSTICAS GENERALES:\n")
            print(f"   Total de comparaciones: {len(comparisons)}")
            print(f"   Distancia DTW media: {comparisons['dtw_distance'].mean():.4f}")
            print(f"   Distancia DTW mediana: {comparisons['dtw_distance'].median():.4f}")
            print(f"   Rango: [{comparisons['dtw_distance'].min():.4f}, {comparisons['dtw_distance'].max():.4f}]")
            print(f"   Media de a√±os superpuestos: {comparisons['overlap_years'].mean():.1f}")
            
            # Crear gr√°fico de barras con c√≥digo de colores por regi√≥n
            fig = go.Figure()
            
            # Agrupar por regi√≥n
            for region in top_similar['region'].unique():
                region_data = top_similar[top_similar['region'] == region]
                fig.add_trace(go.Bar(
                    name=region,
                    x=region_data['country2'].tolist(),
                    y=region_data['dtw_distance'].tolist(),
                    text=region_data['country2_name'].apply(lambda x: x[:20]).tolist(),
                    hovertemplate='<b>%{text}</b><br>Regi√≥n: ' + region + '<br>DTW: %{y:.4f}<extra></extra>'
                ))
            
            fig.update_layout(
                title=f'Top {len(top_similar)} Pa√≠ses M√°s Similares a {country_name} ({country_code})<br>' +
                      '<sub>Coloreado por regi√≥n</sub>',
                xaxis_title='Pa√≠s',
                yaxis_title='Distancia DTW',
                height=600,
                width=1200,
                template='plotly_white',
                barmode='group',
                showlegend=True
            )
            
            fig.show()
    
    update_button.on_click(update_ranking)
    
    controls = widgets.VBox([
        widgets.HTML("<h3>üèÜ Explorador de Rankings de Similitud</h3>"),
        widgets.HBox([country_dropdown, region_dropdown]),
        widgets.HBox([cluster_bm_dropdown, cluster_km_dropdown]),
        widgets.HBox([cluster_km_dtw_dropdown, n_countries_slider]),
        update_button,
        widgets.HTML("<hr>")
    ])
    
    display(controls, output)
    
    # Trigger inicial
    update_ranking(None)


# Crear explorador de rankings
create_ranking_explorer()

VBox(children=(HTML(value='<h3>üèÜ Explorador de Rankings de Similitud</h3>'), HBox(children=(Dropdown(descripti‚Ä¶

Output()

## 7. An√°lisis de Distribuci√≥n de Distancias

Visualizaci√≥n de la distribuci√≥n general de distancias DTW en todo el dataset.

In [None]:
# Crear visualizaci√≥n de distribuci√≥n
fig = make_subplots(
    rows=2, cols=2,
    subplot_titles=(
        'Histograma de Distancias DTW',
        'Box Plot de Distancias',
        'Distribuci√≥n de A√±os Superpuestos',
        'Scatter: DTW vs A√±os Superpuestos'
    ),
    specs=[[{'type': 'histogram'}, {'type': 'box'}],
           [{'type': 'histogram'}, {'type': 'scatter'}]]
)

# Histograma de distancias
fig.add_trace(
    go.Histogram(
        x=df_distances_valid['dtw_distance'],
        nbinsx=50,
        name='Distancias DTW',
        marker_color='lightblue'
    ),
    row=1, col=1
)

# Box plot de distancias
fig.add_trace(
    go.Box(
        y=df_distances_valid['dtw_distance'],
        name='Distancias DTW',
        marker_color='lightgreen'
    ),
    row=1, col=2
)

# Histograma de a√±os superpuestos
fig.add_trace(
    go.Histogram(
        x=df_distances_valid['overlap_years'],
        nbinsx=30,
        name='A√±os Superpuestos',
        marker_color='coral'
    ),
    row=2, col=1
)

# Scatter plot
fig.add_trace(
    go.Scatter(
        x=df_distances_valid['overlap_years'],
        y=df_distances_valid['dtw_distance'],
        mode='markers',
        marker=dict(size=3, color='purple', opacity=0.5),
        name='Pares de pa√≠ses',
        hovertemplate='A√±os: %{x}<br>DTW: %{y:.4f}<extra></extra>'
    ),
    row=2, col=2
)

fig.update_layout(
    title='An√°lisis de Distribuci√≥n de Distancias DTW',
    height=900,
    width=1200,
    showlegend=False,
    template='plotly_white'
)

fig.update_xaxes(title_text="Distancia DTW", row=1, col=1)
fig.update_yaxes(title_text="Frecuencia", row=1, col=1)
fig.update_yaxes(title_text="Distancia DTW", row=1, col=2)
fig.update_xaxes(title_text="A√±os Superpuestos", row=2, col=1)
fig.update_yaxes(title_text="Frecuencia", row=2, col=1)
fig.update_xaxes(title_text="A√±os Superpuestos", row=2, col=2)
fig.update_yaxes(title_text="Distancia DTW", row=2, col=2)

fig.show()

# Estad√≠sticas resumidas
print("\n" + "="*80)
print("üìä ESTAD√çSTICAS GLOBALES DEL CACHE DTW")
print("="*80 + "\n")

print(f"Total de pares v√°lidos: {len(df_distances_valid):,}")
print(f"Total de pa√≠ses √∫nicos: {len(set(df_distances_valid['country1']) | set(df_distances_valid['country2']))}")

print(f"\nüìè Distancias DTW:")
print(f"   M√≠nima:    {df_distances_valid['dtw_distance'].min():.4f}")
print(f"   M√°xima:    {df_distances_valid['dtw_distance'].max():.4f}")
print(f"   Media:     {df_distances_valid['dtw_distance'].mean():.4f}")
print(f"   Mediana:   {df_distances_valid['dtw_distance'].median():.4f}")
print(f"   Std Dev:   {df_distances_valid['dtw_distance'].std():.4f}")

print(f"\nüìÖ A√±os Superpuestos:")
print(f"   M√≠nimo:    {int(df_distances_valid['overlap_years'].min())} a√±os")
print(f"   M√°ximo:    {int(df_distances_valid['overlap_years'].max())} a√±os")
print(f"   Media:     {df_distances_valid['overlap_years'].mean():.1f} a√±os")
print(f"   Mediana:   {int(df_distances_valid['overlap_years'].median())} a√±os")

# Percentiles
print(f"\nüìä Percentiles de Distancias DTW:")
percentiles = [10, 25, 50, 75, 90, 95, 99]
for p in percentiles:
    val = np.percentile(df_distances_valid['dtw_distance'], p)
    print(f"   P{p:2d}:  {val:.4f}")

print(f"\n" + "="*80)


üìä ESTAD√çSTICAS GLOBALES DEL CACHE DTW

Total de pares v√°lidos: 9,870
Total de pa√≠ses √∫nicos: 141

üìè Distancias DTW:
   M√≠nima:    0.1723
   M√°xima:    5.4199
   Media:     1.5041
   Mediana:   1.3268
   Std Dev:   0.7349

üìÖ A√±os Superpuestos:
   M√≠nimo:    33 a√±os
   M√°ximo:    33 a√±os
   Media:     33.0 a√±os
   Mediana:   33 a√±os

üìä Percentiles de Distancias DTW:
   P10:  0.7716
   P25:  0.9970
   P50:  1.3268
   P75:  1.8158
   P90:  2.4725
   P95:  2.9017
   P99:  4.2786



## 8. Dashboard de Path DTW Detallado

Visualizaci√≥n detallada del path de alineamiento DTW entre dos pa√≠ses, incluyendo informaci√≥n de clusters.

In [None]:
# Cargar datos de clusters
print("üìä Cargando datos de clusters...")

# Cluster Banco Mundial
clusters_bm_file = data_dir / 'analisis' / 'clusters_2022_bco_mundial.csv'
df_clusters_bm = pd.read_csv(clusters_bm_file)
print(f"   ‚úÖ Clusters BM cargados: {len(df_clusters_bm)} pa√≠ses")

# Cluster K-Means (4 clusters basado en GNI index)
clusters_km_file = data_dir / 'analisis' / 'clusters_2022_4C_gni_index.csv'
df_clusters_km = pd.read_csv(clusters_km_file)
print(f"   ‚úÖ Clusters K-Means cargados: {len(df_clusters_km)} pa√≠ses")

# Crear diccionarios de lookup
cluster_bm_lookup = df_clusters_bm.set_index('country_code')['Cluster Mapped'].to_dict()
cluster_km_lookup = df_clusters_km.set_index('country_code')['Cluster Mapped'].to_dict()
cluster_km_number = df_clusters_km.set_index('country_code')['Cluster'].to_dict()

print(f"\n‚úÖ Datos de clusters listos para uso")

üìä Cargando datos de clusters...
   ‚úÖ Clusters BM cargados: 184 pa√≠ses
   ‚úÖ Clusters K-Means cargados: 184 pa√≠ses

‚úÖ Datos de clusters listos para uso


In [None]:
def create_dtw_path_dashboard():
    """
    Crea un dashboard interactivo para visualizar el path DTW detallado entre dos pa√≠ses.
    Incluye informaci√≥n de clusters y an√°lisis comparativo.
    """
    # Obtener lista de pa√≠ses ordenada
    countries = sorted(country_names.keys())
    country_options = [(f"{code} - {country_names[code]}", code) for code in countries]
    
    # Widgets de selecci√≥n de pa√≠ses
    country1_dropdown = widgets.Dropdown(
        options=country_options,
        value='ARG',
        description='Pa√≠s 1:',
        style={'description_width': '100px'},
        layout=widgets.Layout(width='450px')
    )
    
    country2_dropdown = widgets.Dropdown(
        options=country_options,
        value='BRA',
        description='Pa√≠s 2:',
        style={'description_width': '100px'},
        layout=widgets.Layout(width='450px')
    )
    
    # Checkbox para usar datos estandarizados
    standardize_checkbox = widgets.Checkbox(
        value=True,
        description='Usar datos estandarizados (Z-score)',
        style={'description_width': 'initial'}
    )
    
    # Checkbox para mostrar todas las l√≠neas de conexi√≥n
    show_all_lines_checkbox = widgets.Checkbox(
        value=False,
        description='Mostrar todas las l√≠neas de conexi√≥n DTW',
        style={'description_width': 'initial'}
    )
    
    # Checkbox para desfasar series verticalmente
    offset_series_checkbox = widgets.Checkbox(
        value=False,
        description='Desfasar series verticalmente (mejora visualizaci√≥n de conexiones)',
        style={'description_width': 'initial'}
    )
    
    # Bot√≥n de actualizaci√≥n
    update_button = widgets.Button(
        description='üîÑ Generar An√°lisis',
        button_style='primary',
        layout=widgets.Layout(width='200px')
    )
    
    # Output widget
    output = widgets.Output()
    
    def update_analysis(b):
        """Actualiza el an√°lisis cuando se hace clic en el bot√≥n."""
        with output:
            output.clear_output(wait=True)
            
            country1 = country1_dropdown.value
            country2 = country2_dropdown.value
            use_standardized = standardize_checkbox.value
            show_all_lines = show_all_lines_checkbox.value
            offset_series = offset_series_checkbox.value
            
            if country1 == country2:
                print("‚ö†Ô∏è  Por favor selecciona dos pa√≠ses diferentes")
                return
            
            # Obtener nombres
            name1 = country_names[country1]
            name2 = country_names[country2]
            
            # Obtener informaci√≥n de clusters
            cluster_bm1 = cluster_bm_lookup.get(country1, "N/A")
            cluster_bm2 = cluster_bm_lookup.get(country2, "N/A")
            cluster_km1 = cluster_km_lookup.get(country1, "N/A")
            cluster_km2 = cluster_km_lookup.get(country2, "N/A")
            cluster_km_num1 = cluster_km_number.get(country1, "N/A")
            cluster_km_num2 = cluster_km_number.get(country2, "N/A")
            
            # Header con informaci√≥n general
            print(f"{'='*90}")
            print(f"üî¨ AN√ÅLISIS DTW DETALLADO: {name1} ({country1}) vs {name2} ({country2})")
            print(f"{'='*90}\n")
            
            # Informaci√≥n de clusters
            print(f"üìä INFORMACI√ìN DE CLUSTERS:\n")
            print(f"   {name1} ({country1}):")
            print(f"      ‚Ä¢ Cluster Banco Mundial: {cluster_bm1}")
            print(f"      ‚Ä¢ Cluster K-Means (4C):   {cluster_km1} (Cluster #{cluster_km_num1})")
            print(f"\n   {name2} ({country2}):")
            print(f"      ‚Ä¢ Cluster Banco Mundial: {cluster_bm2}")
            print(f"      ‚Ä¢ Cluster K-Means (4C):   {cluster_km2} (Cluster #{cluster_km_num2})")
            
            # Verificar si est√°n en el mismo cluster
            same_bm = cluster_bm1 == cluster_bm2
            same_km = cluster_km1 == cluster_km2
            
            if same_bm and same_km:
                print(f"\n   ‚úÖ Ambos pa√≠ses est√°n en el MISMO cluster (BM y K-Means)")
            elif same_bm:
                print(f"\n   ‚ö†Ô∏è  Mismo cluster BM, pero diferentes clusters K-Means")
            elif same_km:
                print(f"\n   ‚ö†Ô∏è  Mismo cluster K-Means, pero diferentes clusters BM")
            else:
                print(f"\n   ‚ùå Diferentes clusters en ambas clasificaciones")
            
            # Obtener series temporales
            years1, values1 = get_country_series(country1, use_standardized)
            years2, values2 = get_country_series(country2, use_standardized)
            
            # Obtener a√±os superpuestos
            overlap_years = get_overlap_years(country1, country2)
            
            # Obtener distancia DTW
            dtw_distance, overlap_count = get_dtw_distance(country1, country2)
            
            print(f"\n{'='*90}")
            print(f"üìà INFORMACI√ìN DE SERIES TEMPORALES:\n")
            print(f"   {name1}: {years1[0]} - {years1[-1]} ({len(years1)} a√±os)")
            print(f"   {name2}: {years2[0]} - {years2[-1]} ({len(years2)} a√±os)")
            print(f"   Superposici√≥n: {overlap_years[0]} - {overlap_years[-1]} ({len(overlap_years)} a√±os)")
            
            if dtw_distance is not None:
                print(f"\nüéØ Distancia DTW: {dtw_distance:.4f}")
                print(f"   (Basada en {overlap_count} a√±os superpuestos)")
            else:
                print(f"\n‚ö†Ô∏è  No hay distancia DTW calculada para este par")
                return
            
            # Extraer datos superpuestos para DTW
            idx1 = np.isin(years1, overlap_years)
            idx2 = np.isin(years2, overlap_years)
            
            series1_overlap = values1[idx1]
            series2_overlap = values2[idx2]
            years1_overlap = years1[idx1]
            years2_overlap = years2[idx2]
            
            # Calcular path DTW
            path = dtw.warping_path(series1_overlap, series2_overlap)
            
            print(f"\nüîó Path de alineamiento DTW:")
            print(f"   N√∫mero de puntos en el path: {len(path)}")
            print(f"   Compresi√≥n/Expansi√≥n: {len(path) / len(overlap_years):.2f}x")
            
            # Crear visualizaci√≥n con 4 subplots
            data_type = "Estandarizado (Z-score)" if use_standardized else "Absoluto (USD)"
            y_label = "GNI (Z-score)" if use_standardized else "GNI (USD)"
            
            fig = make_subplots(
                rows=2, cols=2,
                row_heights=[0.5, 0.5],
                column_widths=[0.6, 0.4],
                subplot_titles=(
                    f'Series Temporales - {data_type}',
                    'Path de Alineamiento DTW',
                    'Alineamiento DTW: Conexiones entre Puntos Correspondientes',
                    'Diferencias A√±o por A√±o'
                ),
                specs=[
                    [{"type": "scatter"}, {"type": "scatter"}],
                    [{"type": "scatter"}, {"type": "bar"}]
                ],
                vertical_spacing=0.12,
                horizontal_spacing=0.1
            )
            
            # Subplot 1: Series temporales completas
            fig.add_trace(
                go.Scatter(
                    x=years1,
                    y=values1,
                    mode='lines+markers',
                    name=f"{name1}",
                    line=dict(color='darkgreen', width=2.5),
                    marker=dict(size=5),
                    legendgroup='series',
                    hovertemplate=f'<b>{name1}</b><br>A√±o: %{{x}}<br>{y_label}: %{{y:.2f}}<extra></extra>'
                ),
                row=1, col=1
            )
            
            fig.add_trace(
                go.Scatter(
                    x=years2,
                    y=values2,
                    mode='lines+markers',
                    name=f"{name2}",
                    line=dict(color='darkblue', width=2.5),
                    marker=dict(size=5),
                    legendgroup='series',
                    hovertemplate=f'<b>{name2}</b><br>A√±o: %{{x}}<br>{y_label}: %{{y:.2f}}<extra></extra>'
                ),
                row=1, col=1
            )
            
            # Resaltar per√≠odo superpuesto
            if len(overlap_years) > 0:
                fig.add_vrect(
                    x0=overlap_years[0],
                    x1=overlap_years[-1],
                    fillcolor="lightgray",
                    opacity=0.2,
                    line_width=0,
                    annotation_text="Per√≠odo usado en DTW",
                    annotation_position="top left",
                    row=1, col=1
                )
            
            # Subplot 2: Path DTW
            path_years1 = [years1_overlap[i] for i, j in path]
            path_years2 = [years2_overlap[j] for i, j in path]
            
            fig.add_trace(
                go.Scatter(
                    x=path_years1,
                    y=path_years2,
                    mode='lines+markers',
                    name='Path DTW',
                    line=dict(color='red', width=1.5),
                    marker=dict(size=3, color='red'),
                    legendgroup='path',
                    showlegend=False,
                    hovertemplate=f'{name1}: %{{x}}<br>{name2}: %{{y}}<extra></extra>'
                ),
                row=1, col=2
            )
            
            # L√≠nea diagonal (alineamiento 1:1 perfecto)
            fig.add_trace(
                go.Scatter(
                    x=[overlap_years[0], overlap_years[-1]],
                    y=[overlap_years[0], overlap_years[-1]],
                    mode='lines',
                    name='Alineamiento 1:1',
                    line=dict(color='gray', width=1, dash='dash'),
                    legendgroup='reference',
                    showlegend=False,
                    hoverinfo='skip'
                ),
                row=1, col=2
            )
            
            # Calcular desfase si est√° activado
            if offset_series:
                # Calcular rango de valores para determinar desfase apropiado
                value_range = max(series1_overlap.max(), series2_overlap.max()) - min(series1_overlap.min(), series2_overlap.min())
                offset = value_range * 0.3  # Desfase del 30% del rango
                series2_display = series2_overlap + offset
            else:
                offset = 0
                series2_display = series2_overlap
            
            # Primero dibujar las l√≠neas de conexi√≥n seg√∫n el path DTW
            for idx, (i, j) in enumerate(path):
                # Si show_all_lines est√° activado, dibujar todas las l√≠neas
                # Si no, dibujar solo cada 2da l√≠nea para evitar saturaci√≥n visual
                if show_all_lines or idx % 2 == 0:
                    fig.add_trace(
                        go.Scatter(
                            x=[years1_overlap[i], years2_overlap[j]],
                            y=[series1_overlap[i], series2_display[j]],
                            mode='lines',
                            line=dict(color='rgba(150, 150, 150, 0.6)', width=1.5),
                            showlegend=False,
                            hovertemplate=f'{name1}[{years1_overlap[i]}] ‚Üí {name2}[{years2_overlap[j]}]<br>Valor real {name2}: %{{customdata:.3f}}<extra></extra>',
                            customdata=[series2_overlap[j]]
                        ),
                        row=2, col=1
                    )
            
            # Luego las series en s√≠ (usando a√±os como eje X)
            fig.add_trace(
                go.Scatter(
                    x=years1_overlap,
                    y=series1_overlap,
                    mode='lines+markers',
                    name=f"{name1}",
                    line=dict(color='darkgreen', width=2.5),
                    marker=dict(size=7, symbol='circle', color='darkgreen', 
                              line=dict(color='white', width=1)),
                    legendgroup='overlap',
                    showlegend=False,
                    hovertemplate=f'<b>{name1}</b><br>A√±o: %{{x}}<br>Valor: %{{y:.3f}}<extra></extra>'
                ),
                row=2, col=1
            )
            
            fig.add_trace(
                go.Scatter(
                    x=years2_overlap,
                    y=series2_display,
                    mode='lines+markers',
                    name=f"{name2}" + (" (desfasado)" if offset_series else ""),
                    line=dict(color='darkblue', width=2.5),
                    marker=dict(size=7, symbol='diamond', color='darkblue',
                              line=dict(color='white', width=1)),
                    legendgroup='overlap',
                    showlegend=False,
                    customdata=series2_overlap,
                    hovertemplate=f'<b>{name2}</b><br>A√±o: %{{x}}<br>Valor real: %{{customdata:.3f}}<extra></extra>'
                ),
                row=2, col=1
            )
            
            # Subplot 4: Diferencias a√±o por a√±o
            differences = series1_overlap - series2_overlap
            colors = ['red' if d > 0 else 'blue' for d in differences]
            
            fig.add_trace(
                go.Bar(
                    x=years1_overlap,
                    y=differences,
                    name='Diferencia',
                    marker_color=colors,
                    legendgroup='diff',
                    showlegend=False,
                    hovertemplate='A√±o: %{x}<br>Diferencia: %{y:.2f}<extra></extra>'
                ),
                row=2, col=2
            )
            
            # Actualizar ejes
            fig.update_xaxes(title_text="A√±o", row=1, col=1)
            fig.update_yaxes(title_text=y_label, row=1, col=1)
            fig.update_xaxes(title_text=f"A√±o - {name1}", row=1, col=2)
            fig.update_yaxes(title_text=f"A√±o - {name2}", row=1, col=2)
            fig.update_xaxes(title_text="A√±o", row=2, col=1)
            if offset_series:
                fig.update_yaxes(title_text=f"{y_label} (series desfasadas - ver hover para valores reales)", row=2, col=1)
            else:
                fig.update_yaxes(title_text=y_label, row=2, col=1)
            fig.update_xaxes(title_text="A√±o", row=2, col=2)
            fig.update_yaxes(title_text="Diferencia", row=2, col=2)
            
            fig.update_layout(
                height=900,
                width=1300,
                showlegend=True,
                template='plotly_white',
                hovermode='closest',
                title=f'An√°lisis DTW Detallado: {name1} vs {name2}<br>' +
                      f'<sub>DTW Distance: {dtw_distance:.4f} | Clusters: BM({cluster_bm1}/{cluster_bm2}) KM({cluster_km_num1}/{cluster_km_num2})</sub>'
            )
            
            fig.show()
            
            # An√°lisis estad√≠stico del path
            print(f"\n{'='*90}")
            print(f"üìä AN√ÅLISIS ESTAD√çSTICO DEL ALINEAMIENTO:\n")
            
            # Calcular estad√≠sticas de desviaci√≥n del path respecto a la diagonal
            path_indices = [(i, j) for i, j in path]
            
            # Desviaci√≥n promedio del path
            deviations = [abs(i - j) for i, j in path_indices]
            avg_deviation = np.mean(deviations)
            max_deviation = np.max(deviations)
            print(f"   Desviaci√≥n promedio del alineamiento 1:1: {avg_deviation:.2f} √≠ndices")
            print(f"   Desviaci√≥n m√°xima: {max_deviation} √≠ndices")
            
            # Diferencias estad√≠sticas
            print(f"\n   Diferencias en valores ({data_type}):")
            print(f"      ‚Ä¢ Diferencia promedio: {np.mean(np.abs(differences)):.4f}")
            print(f"      ‚Ä¢ Diferencia m√°xima: {np.max(np.abs(differences)):.4f}")
            print(f"      ‚Ä¢ Std dev de diferencias: {np.std(differences):.4f}")
            
            # Correlaci√≥n
            correlation = np.corrcoef(series1_overlap, series2_overlap)[0, 1]
            print(f"\n   Correlaci√≥n de Pearson (per√≠odo superpuesto): {correlation:.4f}")
            
            # An√°lisis de warping
            compressions = []
            expansions = []
            diagonal_steps = 0
            
            for i in range(len(path) - 1):
                i1, j1 = path[i]
                i2, j2 = path[i + 1]
            
                step_i = i2 - i1
                step_j = j2 - j1
                
                if step_i == 0 and step_j == 1:
                    compressions.append(path[i])
                elif step_i == 1 and step_j == 0:
                    expansions.append(path[i])
                elif step_i == 1 and step_j == 1:
                    diagonal_steps += 1
            
            print(f"\n   Warping del path:")
            print(f"      ‚Ä¢ Alineamientos 1:1 (diagonal): {diagonal_steps} pasos ({diagonal_steps/len(path)*100:.1f}%)")
            print(f"      ‚Ä¢ Compresiones ({name2} salta): {len(compressions)} pasos")
            print(f"      ‚Ä¢ Expansiones ({name1} salta): {len(expansions)} pasos")
            
            print(f"\n{'='*90}")
    
    # Conectar bot√≥n con funci√≥n
    update_button.on_click(update_analysis)
    
    # Layout del dashboard
    controls = widgets.VBox([
        widgets.HTML("<h3>üî¨ Dashboard de Path DTW Detallado</h3>"),
        widgets.HBox([country1_dropdown, country2_dropdown]),
        widgets.VBox([standardize_checkbox, show_all_lines_checkbox, offset_series_checkbox]),
        update_button,
        widgets.HTML("<hr>")
    ])
    
    display(controls, output)
    
    # Trigger inicial
    update_analysis(None)


# Crear dashboard de path DTW
create_dtw_path_dashboard()


VBox(children=(HTML(value='<h3>üî¨ Dashboard de Path DTW Detallado</h3>'), HBox(children=(Dropdown(description='‚Ä¶

Output()

### Explorador de Lags Temporales por D√©cada

Analiza los adelantos y retrasos (lags) en el alineamiento DTW entre dos pa√≠ses por d√©cada. Identifica d√≥nde un pa√≠s se adelanta o retrasa respecto al otro en su trayectoria de desarrollo econ√≥mico.

In [None]:
def create_decade_lag_explorer():
    """
    Crea un explorador interactivo de lags temporales por d√©cada.
    Muestra d√≥nde un pa√≠s se adelanta o retrasa respecto al otro en el alineamiento DTW.
    """
    # Obtener lista de pa√≠ses ordenada
    countries = sorted(country_names.keys())
    country_options = [(f"{code} - {country_names[code]}", code) for code in countries]
    
    # Widgets de selecci√≥n de pa√≠ses
    country1_dropdown = widgets.Dropdown(
        options=country_options,
        value='ARG',
        description='Pa√≠s 1:',
        style={'description_width': '100px'},
        layout=widgets.Layout(width='450px')
    )
    
    country2_dropdown = widgets.Dropdown(
        options=country_options,
        value='BRA',
        description='Pa√≠s 2:',
        style={'description_width': '100px'},
        layout=widgets.Layout(width='450px')
    )
    
    # Selector de d√©cada
    decades = ['1960s', '1970s', '1980s', '1990s', '2000s', '2010s', '2020s']
    decade_dropdown = widgets.Dropdown(
        options=decades,
        value='2010s',
        description='D√©cada:',
        style={'description_width': '100px'},
        layout=widgets.Layout(width='300px')
    )
    
    # Checkbox para datos estandarizados
    standardize_checkbox = widgets.Checkbox(
        value=True,
        description='Usar datos estandarizados (Z-score)',
        style={'description_width': 'initial'}
    )
    
    # Checkbox para desfasar series verticalmente
    offset_series_checkbox = widgets.Checkbox(
        value=False,
        description='Desfasar series verticalmente (mejora visualizaci√≥n de conexiones)',
        style={'description_width': 'initial'}
    )
    
    # Bot√≥n de actualizaci√≥n
    update_button = widgets.Button(
        description='üîç Analizar D√©cada',
        button_style='primary',
        layout=widgets.Layout(width='200px')
    )
    
    # Output widget
    output = widgets.Output()
    
    def get_decade_years(decade_str):
        """Convierte string de d√©cada a rango de a√±os."""
        decade_map = {
            '1960s': (1960, 1969),
            '1970s': (1970, 1979),
            '1980s': (1980, 1989),
            '1990s': (1990, 1999),
            '2000s': (2000, 2009),
            '2010s': (2010, 2019),
            '2020s': (2020, 2029)
        }
        return decade_map[decade_str]
    
    def update_analysis(b):
        """Actualiza el an√°lisis cuando se hace clic en el bot√≥n."""
        with output:
            output.clear_output(wait=True)
            
            country1 = country1_dropdown.value
            country2 = country2_dropdown.value
            decade = decade_dropdown.value
            use_standardized = standardize_checkbox.value
            offset_series = offset_series_checkbox.value
            
            if country1 == country2:
                print("‚ö†Ô∏è  Por favor selecciona dos pa√≠ses diferentes")
                return
            
            # Obtener nombres
            name1 = country_names[country1]
            name2 = country_names[country2]
            
            # Obtener rango de a√±os de la d√©cada
            decade_start, decade_end = get_decade_years(decade)
            
            # Header
            print(f"{'='*90}")
            print(f"‚è±Ô∏è  AN√ÅLISIS DE LAGS TEMPORALES: {name1} vs {name2} ({decade})")
            print(f"{'='*90}\n")
            
            # Obtener series temporales completas
            years1, values1 = get_country_series(country1, use_standardized)
            years2, values2 = get_country_series(country2, use_standardized)
            
            # Obtener a√±os superpuestos totales para calcular DTW
            overlap_years = get_overlap_years(country1, country2)
            
            if len(overlap_years) == 0:
                print("‚ùå No hay superposici√≥n de a√±os entre estos pa√≠ses")
                return
            
            # Filtrar a√±os de la d√©cada dentro del overlap
            decade_years_in_overlap = overlap_years[
                (overlap_years >= decade_start) & (overlap_years <= decade_end)
            ]
            
            if len(decade_years_in_overlap) == 0:
                print(f"‚ö†Ô∏è  No hay datos superpuestos en la d√©cada {decade}")
                return
            
            print(f"üìä Informaci√≥n de la d√©cada:")
            print(f"   Per√≠odo analizado: {decade_start}-{decade_end}")
            print(f"   A√±os con datos en ambos pa√≠ses: {len(decade_years_in_overlap)} de 10 posibles")
            print(f"   A√±os espec√≠ficos: {decade_years_in_overlap[0]} - {decade_years_in_overlap[-1]}")
            
            # Extraer datos superpuestos completos para calcular DTW
            idx1 = np.isin(years1, overlap_years)
            idx2 = np.isin(years2, overlap_years)
            
            series1_overlap = values1[idx1]
            series2_overlap = values2[idx2]
            years1_overlap = years1[idx1]
            years2_overlap = years2[idx2]
            
            # Calcular path DTW completo
            path = dtw.warping_path(series1_overlap, series2_overlap)
            
            # Filtrar path para la d√©cada seleccionada
            decade_path_info = []
            for i, j in path:
                year1 = years1_overlap[i]
                year2 = years2_overlap[j]
                
                # Si al menos uno de los a√±os est√° en la d√©cada
                if (decade_start <= year1 <= decade_end) or (decade_start <= year2 <= decade_end):
                    lag = year1 - year2  # Positivo: pa√≠s1 adelantado, Negativo: pa√≠s2 adelantado
                    decade_path_info.append({
                        'year1': year1,
                        'year2': year2,
                        'value1': series1_overlap[i],
                        'value2': series2_overlap[j],
                        'lag': lag,
                        'idx1': i,
                        'idx2': j
                    })
            
            if len(decade_path_info) == 0:
                print(f"‚ö†Ô∏è  No hay alineamientos DTW en la d√©cada {decade}")
                return
            
            df_decade = pd.DataFrame(decade_path_info)
            
            # Estad√≠sticas de lags
            print(f"\nüìà Estad√≠sticas de Lags Temporales:")
            print(f"   Puntos analizados en el path: {len(df_decade)}")
            print(f"   Lag promedio: {df_decade['lag'].mean():.2f} a√±os")
            print(f"   Lag m√°ximo: {df_decade['lag'].max()} a√±os")
            print(f"   Lag m√≠nimo: {df_decade['lag'].min()} a√±os")
            
            # Contar adelantos/retrasos
            adelantos_1 = len(df_decade[df_decade['lag'] > 0])
            adelantos_2 = len(df_decade[df_decade['lag'] < 0])
            sync = len(df_decade[df_decade['lag'] == 0])
            
            print(f"\n   üèÉ {name1} adelantado: {adelantos_1} puntos ({adelantos_1/len(df_decade)*100:.1f}%)")
            print(f"   üèÉ {name2} adelantado: {adelantos_2} puntos ({adelantos_2/len(df_decade)*100:.1f}%)")
            print(f"   ‚öñÔ∏è  Sincronizados: {sync} puntos ({sync/len(df_decade)*100:.1f}%)")
            
            # Interpretaci√≥n
            if abs(df_decade['lag'].mean()) < 0.5:
                interpretation = f"Los pa√≠ses evolucionan de forma muy sincronizada en esta d√©cada"
            elif df_decade['lag'].mean() > 0:
                interpretation = f"{name1} tiende a adelantarse {abs(df_decade['lag'].mean()):.1f} a√±os respecto a {name2}"
            else:
                interpretation = f"{name2} tiende a adelantarse {abs(df_decade['lag'].mean()):.1f} a√±os respecto a {name1}"
            
            print(f"\nüí° Interpretaci√≥n: {interpretation}")
            
            # Crear visualizaci√≥n con 3 subplots
            data_type = "Estandarizado (Z-score)" if use_standardized else "Absoluto (USD)"
            y_label = "GNI (Z-score)" if use_standardized else "GNI (USD)"
            
            fig = make_subplots(
                rows=2, cols=2,
                row_heights=[0.6, 0.4],
                column_widths=[0.6, 0.4],
                subplot_titles=(
                    f'Alineamiento DTW en {decade} - {data_type}<br><sub>üü¢ {name1} adelantado | üîµ {name2} adelantado</sub>',
                    f'Distribuci√≥n de Lags Temporales<br><sub>Positivo: {name1} adelantado | Negativo: {name2} adelantado</sub>',
                    f'Evoluci√≥n de Lags a lo Largo de la D√©cada',
                    f'Path DTW: {name1} vs {name2} (D√©cada resaltada)'
                ),
                specs=[
                    [{"type": "scatter"}, {"type": "bar"}],
                    [{"type": "scatter"}, {"type": "scatter"}]
                ],
                vertical_spacing=0.15,
                horizontal_spacing=0.12
            )
            
            # Subplot 1: Series con conexiones en la d√©cada
            # Primero calcular √≠ndices de la d√©cada
            decade_idx1 = np.isin(years1_overlap, decade_years_in_overlap)
            decade_idx2 = np.isin(years2_overlap, decade_years_in_overlap)
            
            # Calcular desfase si est√° activado
            if offset_series:
                value_range = max(series1_overlap[decade_idx1].max(), series2_overlap[decade_idx2].max()) - \
                             min(series1_overlap[decade_idx1].min(), series2_overlap[decade_idx2].min())
                offset = value_range * 0.3
                series2_display = series2_overlap[decade_idx2] + offset
                series2_display_all = series2_overlap + offset
            else:
                offset = 0
                series2_display = series2_overlap[decade_idx2]
                series2_display_all = series2_overlap
            
            # Primero las l√≠neas de conexi√≥n
            for _, row in df_decade.iterrows():
                # Obtener √≠ndices en el overlap
                idx1_in_overlap = np.where(years1_overlap == row['year1'])[0][0]
                idx2_in_overlap = np.where(years2_overlap == row['year2'])[0][0]
                
                color = 'rgba(0, 150, 0, 0.4)' if row['lag'] > 0 else 'rgba(0, 0, 200, 0.4)' if row['lag'] < 0 else 'rgba(150, 150, 150, 0.4)'
                fig.add_trace(
                    go.Scatter(
                        x=[row['year1'], row['year2']],
                        y=[series1_overlap[idx1_in_overlap], series2_display_all[idx2_in_overlap]],
                        mode='lines',
                        line=dict(color=color, width=1.5),
                        showlegend=False,
                        customdata=[[series2_overlap[idx2_in_overlap]]],
                        hovertemplate=f'{name1}[{int(row["year1"])}] ‚Üí {name2}[{int(row["year2"])}]<br>Lag: {int(row["lag"])} a√±os<br>Valor real {name2}: %{{customdata[0]:.3f}}<extra></extra>'
                    ),
                    row=1, col=1
                )
            
            # Luego las series
            fig.add_trace(
                go.Scatter(
                    x=years1_overlap[decade_idx1],
                    y=series1_overlap[decade_idx1],
                    mode='lines+markers',
                    name=f"{name1}",
                    line=dict(color='darkgreen', width=2.5),
                    marker=dict(size=8, symbol='circle'),
                    legendgroup='series'
                ),
                row=1, col=1
            )
            
            fig.add_trace(
                go.Scatter(
                    x=years2_overlap[decade_idx2],
                    y=series2_display,
                    mode='lines+markers',
                    name=f"{name2}" + (" (desfasado)" if offset_series else ""),
                    line=dict(color='darkblue', width=2.5),
                    marker=dict(size=8, symbol='diamond'),
                    legendgroup='series',
                    customdata=series2_overlap[decade_idx2],
                    hovertemplate=f'<b>{name2}</b><br>A√±o: %{{x}}<br>Valor real: %{{customdata:.3f}}<extra></extra>'
                ),
                row=1, col=1
            )
            
            # Subplot 2: Histograma de lags
            fig.add_trace(
                go.Histogram(
                    x=df_decade['lag'],
                    nbinsx=15,
                    marker_color='lightblue',
                    showlegend=False,
                    hovertemplate='Lag: %{x} a√±os<br>Frecuencia: %{y}<extra></extra>'
                ),
                row=1, col=2
            )
            
            # L√≠nea vertical en lag=0
            fig.add_vline(x=0, line_dash="dash", line_color="red", row=1, col=2)
            
            # Subplot 3: Evoluci√≥n temporal de lags
            fig.add_trace(
                go.Scatter(
                    x=df_decade['year1'],
                    y=df_decade['lag'],
                    mode='lines+markers',
                    line=dict(color='purple', width=2),
                    marker=dict(size=6),
                    showlegend=False,
                    hovertemplate='A√±o (%s): %%{x}<br>Lag: %%{y} a√±os<extra></extra>' % name1
                ),
                row=2, col=1
            )
            
            # L√≠nea horizontal en lag=0
            fig.add_hline(y=0, line_dash="dash", line_color="gray", row=2, col=1)
            
            # Subplot 4: Path DTW completo con d√©cada resaltada
            path_years1 = [years1_overlap[i] for i, j in path]
            path_years2 = [years2_overlap[j] for i, j in path]
            
            fig.add_trace(
                go.Scatter(
                    x=path_years1,
                    y=path_years2,
                    mode='lines+markers',
                    line=dict(color='lightgray', width=1),
                    marker=dict(size=2, color='lightgray'),
                    showlegend=False,
                    name='Path completo',
                    hoverinfo='skip'
                ),
                row=2, col=2
            )
            
            # Resaltar d√©cada en el path
            fig.add_trace(
                go.Scatter(
                    x=df_decade['year1'],
                    y=df_decade['year2'],
                    mode='lines+markers',
                    line=dict(color='red', width=2),
                    marker=dict(size=5, color='red'),
                    showlegend=False,
                    name=f'Path en {decade}',
                    hovertemplate=f'{name1}: %{{x}}<br>{name2}: %{{y}}<extra></extra>'
                ),
                row=2, col=2
            )
            
            # L√≠nea diagonal
            fig.add_trace(
                go.Scatter(
                    x=[overlap_years[0], overlap_years[-1]],
                    y=[overlap_years[0], overlap_years[-1]],
                    mode='lines',
                    line=dict(color='gray', width=1, dash='dash'),
                    showlegend=False,
                    hoverinfo='skip'
                ),
                row=2, col=2
            )
            
            # Actualizar ejes
            fig.update_xaxes(title_text="A√±o", row=1, col=1)
            if offset_series:
                fig.update_yaxes(title_text=f"{y_label} (series desfasadas - ver hover para valores reales)", row=1, col=1)
            else:
                fig.update_yaxes(title_text=y_label, row=1, col=1)
            fig.update_xaxes(title_text="Lag (a√±os)", row=1, col=2)
            fig.update_yaxes(title_text="Frecuencia", row=1, col=2)
            fig.update_xaxes(title_text=f"A√±o ({name1})", row=2, col=1)
            fig.update_yaxes(title_text="Lag (a√±os)", row=2, col=1)
            fig.update_xaxes(title_text=f"A√±o - {name1}", row=2, col=2)
            fig.update_yaxes(title_text=f"A√±o - {name2}", row=2, col=2)
            
            fig.update_layout(
                height=900,
                width=1300,
                showlegend=True,
                template='plotly_white',
                hovermode='closest',
                title=f'An√°lisis de Lags Temporales en {decade}: {name1} vs {name2}<br>' +
                      f'<sub>Lag promedio: {df_decade["lag"].mean():.2f} a√±os | {interpretation}</sub>'
            )
            
            fig.show()
            
            # An√°lisis detallado
            print(f"\n{'='*90}")
            print(f"üìã AN√ÅLISIS DETALLADO DE LAGS:\n")
            
            # A√±os con mayor lag
            max_lag_rows = df_decade.nlargest(3, 'lag')
            print(f"   Mayor adelanto de {name1}:")
            for _, row in max_lag_rows.iterrows():
                print(f"      ‚Ä¢ {int(row['year1'])} ‚Üí {int(row['year2'])}: +{int(row['lag'])} a√±os")
            
            min_lag_rows = df_decade.nsmallest(3, 'lag')
            print(f"\n   Mayor adelanto de {name2}:")
            for _, row in min_lag_rows.iterrows():
                print(f"      ‚Ä¢ {int(row['year1'])} ‚Üí {int(row['year2'])}: {int(row['lag'])} a√±os")
            
            print(f"\n{'='*90}")
    
    # Conectar bot√≥n con funci√≥n
    update_button.on_click(update_analysis)
    
    # Layout del dashboard
    controls = widgets.VBox([
        widgets.HTML("<h3>‚è±Ô∏è Explorador de Lags Temporales por D√©cada</h3>"),
        widgets.HTML("<p style='color: gray;'>Analiza d√≥nde un pa√≠s se adelanta (üü¢ verde) o retrasa (üîµ azul) respecto al otro en su desarrollo econ√≥mico</p>"),
        widgets.HBox([country1_dropdown, country2_dropdown]),
        widgets.HBox([decade_dropdown, standardize_checkbox]),
        offset_series_checkbox,
        update_button,
        widgets.HTML("<hr>")
    ])
    
    display(controls, output)
    
    # Trigger inicial
    update_analysis(None)


# Crear explorador de lags por d√©cada
create_decade_lag_explorer()

VBox(children=(HTML(value='<h3>‚è±Ô∏è Explorador de Lags Temporales por D√©cada</h3>'), HTML(value="<p style='color‚Ä¶

Output()

## 9. An√°lisis de Distancias por Cluster

Analizamos las distancias DTW entre pa√≠ses de diferentes clusters (Banco Mundial y K-Means) y comparamos un pa√≠s espec√≠fico contra los promedios de cada cluster.

In [None]:
def create_country_vs_cluster_heatmap():
    """
    Crea un heatmap de distancias DTW de un pa√≠s vs pa√≠ses de un cluster espec√≠fico.
    Similar al heatmap regional de la secci√≥n 5.
    """
    
    # Obtener lista de pa√≠ses
    countries = sorted(country_names.keys())
    country_options = [(f"{country_names[code]} ({code})", code) for code in countries]
    
    # Widget de selecci√≥n de pa√≠s
    country_dropdown = widgets.Dropdown(
        options=country_options,
        value='ARG',
        description='Pa√≠s:',
        style={'description_width': '120px'},
        layout=widgets.Layout(width='400px')
    )
    
    # Widget de clasificaci√≥n
    classification_radio = widgets.RadioButtons(
        options=[('Banco Mundial', 'banco_mundial'), ('K-Means', 'kmeans')],
        value='banco_mundial',
        description='Clasificaci√≥n:',
        style={'description_width': '120px'}
    )
    
    # Widget de cluster (se actualizar√° din√°micamente)
    cluster_dropdown = widgets.Dropdown(
        options=[],
        description='Cluster:',
        style={'description_width': '120px'},
        layout=widgets.Layout(width='400px')
    )
    
    # Slider para top N pa√≠ses
    top_n_slider = widgets.IntSlider(
        value=30,
        min=10,
        max=100,
        step=10,
        description='Top N pa√≠ses:',
        style={'description_width': '120px'},
        layout=widgets.Layout(width='400px')
    )
    
    # Checkbox para mostrar nombres
    show_names_checkbox = widgets.Checkbox(
        value=True,
        description='Mostrar nombres de pa√≠ses'
    )
    
    # Bot√≥n actualizar
    update_button = widgets.Button(
        description='üìä Generar Heatmap',
        button_style='success',
        icon='chart-bar'
    )
    
    # Output
    output = widgets.Output()
    
    def update_cluster_options(*args):
        """Actualiza las opciones de cluster seg√∫n la clasificaci√≥n"""
        classification = classification_radio.value
        
        if classification == 'banco_mundial':
            clusters = sorted(df_clusters_bm['Cluster Mapped'].unique())
        else:
            clusters = sorted(df_clusters_km['Cluster'].unique())
        
        cluster_dropdown.options = [(c, c) for c in clusters]
        if len(clusters) > 0:
            cluster_dropdown.value = clusters[0]
    
    def generate_heatmap(b=None):
        with output:
            output.clear_output(wait=True)
            
            selected_country = country_dropdown.value
            classification = classification_radio.value
            selected_cluster = cluster_dropdown.value
            top_n = top_n_slider.value
            show_names = show_names_checkbox.value
            
            # Seleccionar clasificaci√≥n
            if classification == 'banco_mundial':
                cluster_lookup = cluster_bm_lookup
                df_clusters = df_clusters_bm
                cluster_col = 'Cluster Mapped'
                title_suffix = "Banco Mundial"
            else:
                cluster_lookup = cluster_km_lookup
                df_clusters = df_clusters_km
                cluster_col = 'Cluster'
                title_suffix = "K-Means"
            
            country_name = country_names[selected_country]
            
            # Obtener pa√≠ses del cluster seleccionado
            cluster_countries = df_clusters[df_clusters[cluster_col] == selected_cluster]['country_code'].tolist()
            
            if len(cluster_countries) == 0:
                print(f"‚ùå No hay pa√≠ses en el cluster {selected_cluster}")
                return
            
            # Obtener distancias del pa√≠s seleccionado a los pa√≠ses del cluster
            distances_to_cluster = df_distances_valid[
                ((df_distances_valid['country1'] == selected_country) & 
                 (df_distances_valid['country2'].isin(cluster_countries))) |
                ((df_distances_valid['country2'] == selected_country) & 
                 (df_distances_valid['country1'].isin(cluster_countries)))
            ].copy()
            
            # Normalizar para tener el pa√≠s seleccionado siempre en country1
            distances_to_cluster['other_country'] = distances_to_cluster.apply(
                lambda row: row['country2'] if row['country1'] == selected_country else row['country1'],
                axis=1
            )
            
            distances_to_cluster['other_country_name'] = distances_to_cluster['other_country'].map(country_names)
            
            # Ordenar por distancia y limitar a top N
            distances_to_cluster = distances_to_cluster.sort_values('dtw_distance').head(top_n)
            
            if len(distances_to_cluster) == 0:
                print(f"‚ùå No hay datos de distancias entre {country_name} y el cluster {selected_cluster}")
                return
            
            # Preparar datos para heatmap
            countries_ordered = distances_to_cluster['other_country'].tolist()
            names_ordered = distances_to_cluster['other_country_name'].tolist()
            distances_ordered = distances_to_cluster['dtw_distance'].tolist()
            
            # Crear heatmap
            z_values = [[d] for d in distances_ordered]
            
            hover_text = []
            for code, name, dist in zip(countries_ordered, names_ordered, distances_ordered):
                hover_text.append([f"{name} ({code})<br>Distancia: {dist:.4f}"])
            
            y_labels = names_ordered if show_names else countries_ordered
            
            fig = go.Figure()
            
            fig.add_trace(
                go.Heatmap(
                    z=z_values,
                    x=[f'{country_name}'],
                    y=y_labels,
                    colorscale='RdYlGn_r',
                    text=hover_text,
                    hovertemplate='%{text}<extra></extra>',
                    colorbar=dict(title="Distancia<br>DTW")
                )
            )
            
            # Marcar el pa√≠s si est√° en el cluster
            if selected_country in cluster_countries:
                country_idx = None
                for idx, c in enumerate(countries_ordered):
                    if c == selected_country:
                        country_idx = idx
                        break
                
                if country_idx is not None:
                    fig.add_hline(
                        y=country_idx,
                        line_dash="solid",
                        line_color="red",
                        line_width=3,
                        annotation_text=f"‚Üê {country_name} (mismo cluster)",
                        annotation_position="right"
                    )
            
            fig.update_layout(
                title=f'Heatmap de Distancias DTW<br>{country_name} vs Cluster "{selected_cluster}" ({title_suffix})',
                height=max(600, len(distances_ordered) * 20),
                width=800,
                template='plotly_white',
                xaxis=dict(side='top')
            )
            
            fig.show()
            
            # Estad√≠sticas
            print(f"\nüìä Estad√≠sticas:")
            print(f"   Pa√≠s analizado: {country_name}")
            print(f"   Cluster: {selected_cluster}")
            print(f"   Pa√≠ses en el cluster: {len(cluster_countries)}")
            print(f"   Pa√≠ses mostrados: {len(distances_ordered)}")
            print(f"   Distancia m√≠nima: {min(distances_ordered):.4f}")
            print(f"   Distancia m√°xima: {max(distances_ordered):.4f}")
            print(f"   Distancia promedio: {sum(distances_ordered)/len(distances_ordered):.4f}")
            
            if selected_country in cluster_countries:
                print(f"\nüí° {country_name} pertenece a este cluster")
            else:
                country_cluster = cluster_lookup.get(selected_country, 'Sin clasificar')
                print(f"\nüí° {country_name} pertenece al cluster: {country_cluster}")
    
    # Conectar eventos
    classification_radio.observe(update_cluster_options, 'value')
    update_button.on_click(generate_heatmap)
    
    # Inicializar opciones de cluster
    update_cluster_options()
    
    # Layout
    controls = widgets.VBox([
        widgets.HTML("<h3>üó∫Ô∏è Heatmap: Pa√≠s vs Cluster</h3>"),
        widgets.HBox([country_dropdown, classification_radio]),
        widgets.HBox([cluster_dropdown, top_n_slider]),
        show_names_checkbox,
        update_button,
        output
    ])
    
    display(controls)

print("‚úÖ Funci√≥n create_country_vs_cluster_heatmap() creada")

‚úÖ Funci√≥n create_country_vs_cluster_heatmap() creada


### 9.1. Dashboard Interactivo: Pa√≠s vs Clusters

Analiza c√≥mo se diferencia un pa√≠s espec√≠fico de los distintos clusters.

In [None]:
# Ejecutar dashboard
create_country_vs_cluster_heatmap()

VBox(children=(HTML(value='<h3>üó∫Ô∏è Heatmap: Pa√≠s vs Cluster</h3>'), HBox(children=(Dropdown(description='Pa√≠s:'‚Ä¶

### 9.2. Comparaci√≥n de Distancias Inter e Intra-Cluster

Analizamos si las distancias dentro de un mismo cluster son menores que las distancias entre clusters diferentes.

In [None]:
def analyze_inter_intra_cluster_distances(classification='banco_mundial'):
    """
    Analiza distancias dentro de clusters (intra) vs entre clusters (inter)
    """
    
    # Seleccionar clasificaci√≥n
    if classification == 'banco_mundial':
        cluster_lookup = cluster_bm_lookup
        title_suffix = "Banco Mundial"
    else:
        cluster_lookup = cluster_km_lookup
        title_suffix = "K-Means"
    
    # Agregar clusters a todas las distancias v√°lidas
    df_with_clusters = df_distances_valid.copy()
    df_with_clusters['cluster1'] = df_with_clusters['country1'].map(cluster_lookup)
    df_with_clusters['cluster2'] = df_with_clusters['country2'].map(cluster_lookup)
    
    # Filtrar solo pares donde ambos pa√≠ses tienen cluster
    df_with_clusters = df_with_clusters[
        df_with_clusters['cluster1'].notna() & 
        df_with_clusters['cluster2'].notna()
    ].copy()
    
    # Clasificar como intra o inter cluster
    df_with_clusters['distance_type'] = df_with_clusters.apply(
        lambda row: 'Intra-cluster' if row['cluster1'] == row['cluster2'] else 'Inter-cluster',
        axis=1
    )
    
    # Para inter-cluster, crear etiqueta de par de clusters
    df_with_clusters['cluster_pair'] = df_with_clusters.apply(
        lambda row: f"{min(row['cluster1'], row['cluster2'])} ‚Üî {max(row['cluster1'], row['cluster2'])}"
        if row['distance_type'] == 'Inter-cluster' else row['cluster1'],
        axis=1
    )
    
    # Estad√≠sticas generales
    intra_distances = df_with_clusters[df_with_clusters['distance_type'] == 'Intra-cluster']['dtw_distance']
    inter_distances = df_with_clusters[df_with_clusters['distance_type'] == 'Inter-cluster']['dtw_distance']
    
    print(f"üìä An√°lisis de Distancias DTW - {title_suffix}\n")
    print(f"Intra-cluster (mismo cluster):")
    print(f"   Media: {intra_distances.mean():.4f}")
    print(f"   Mediana: {intra_distances.median():.4f}")
    print(f"   Desv. Std: {intra_distances.std():.4f}")
    print(f"   Cantidad: {len(intra_distances):,} pares")
    
    print(f"\nInter-cluster (diferentes clusters):")
    print(f"   Media: {inter_distances.mean():.4f}")
    print(f"   Mediana: {inter_distances.median():.4f}")
    print(f"   Desv. Std: {inter_distances.std():.4f}")
    print(f"   Cantidad: {len(inter_distances):,} pares")
    
    ratio = inter_distances.mean() / intra_distances.mean()
    print(f"\nüí° Ratio Inter/Intra: {ratio:.2f}x")
    print(f"   {'‚úÖ Los clusters est√°n bien definidos (inter > intra)' if ratio > 1 else '‚ö†Ô∏è  Los clusters tienen superposici√≥n (inter ‚â§ intra)'}")
    
    # Crear visualizaciones
    fig = make_subplots(
        rows=2, cols=2,
        subplot_titles=(
            'Distribuci√≥n: Intra vs Inter-Cluster',
            'Box Plot por Tipo de Distancia',
            'Distancias Promedio por Cluster (Intra)',
            'Matriz de Distancias Inter-Cluster'
        ),
        specs=[
            [{"type": "histogram"}, {"type": "box"}],
            [{"type": "bar"}, {"type": "heatmap"}]
        ],
        vertical_spacing=0.12,
        horizontal_spacing=0.1
    )
    
    # 1. Histogramas superpuestos
    fig.add_trace(
        go.Histogram(
            x=intra_distances,
            name='Intra-cluster',
            opacity=0.7,
            nbinsx=50,
            marker_color='lightblue'
        ),
        row=1, col=1
    )
    
    fig.add_trace(
        go.Histogram(
            x=inter_distances,
            name='Inter-cluster',
            opacity=0.7,
            nbinsx=50,
            marker_color='salmon'
        ),
        row=1, col=1
    )
    
    # 2. Box plots
    fig.add_trace(
        go.Box(
            y=intra_distances,
            name='Intra-cluster',
            marker_color='lightblue',
            boxmean='sd'
        ),
        row=1, col=2
    )
    
    fig.add_trace(
        go.Box(
            y=inter_distances,
            name='Inter-cluster',
            marker_color='salmon',
            boxmean='sd'
        ),
        row=1, col=2
    )
    
    # 3. Distancias promedio intra-cluster por cluster
    intra_by_cluster = df_with_clusters[
        df_with_clusters['distance_type'] == 'Intra-cluster'
    ].groupby('cluster1')['dtw_distance'].agg(['mean', 'count']).reset_index()
    intra_by_cluster = intra_by_cluster.sort_values('mean')
    
    fig.add_trace(
        go.Bar(
            x=intra_by_cluster['cluster1'],
            y=intra_by_cluster['mean'],
            text=intra_by_cluster['mean'].round(4),
            textposition='outside',
            marker_color='lightblue',
            name='Dist. promedio intra',
            hovertemplate='<b>%{x}</b><br>Distancia: %{y:.4f}<br>Pares: %{customdata}<extra></extra>',
            customdata=intra_by_cluster['count']
        ),
        row=2, col=1
    )
    
    # 4. Matriz de distancias promedio inter-cluster
    inter_by_pair = df_with_clusters[
        df_with_clusters['distance_type'] == 'Inter-cluster'
    ].groupby(['cluster1', 'cluster2'])['dtw_distance'].mean().reset_index()
    
    # Crear matriz sim√©trica
    all_clusters = sorted(df_with_clusters['cluster1'].unique())
    matrix_size = len(all_clusters)
    distance_matrix = np.zeros((matrix_size, matrix_size))
    
    cluster_to_idx = {cluster: i for i, cluster in enumerate(all_clusters)}
    
    for _, row in inter_by_pair.iterrows():
        i = cluster_to_idx[row['cluster1']]
        j = cluster_to_idx[row['cluster2']]
        distance_matrix[i, j] = row['dtw_distance']
        distance_matrix[j, i] = row['dtw_distance']
    
    # Poner distancias intra en la diagonal
    for _, row in intra_by_cluster.iterrows():
        if row['cluster1'] in cluster_to_idx:
            i = cluster_to_idx[row['cluster1']]
            distance_matrix[i, i] = row['mean']
    
    fig.add_trace(
        go.Heatmap(
            z=distance_matrix,
            x=all_clusters,
            y=all_clusters,
            colorscale='RdYlGn_r',
            text=np.round(distance_matrix, 4),
            texttemplate='%{text}',
            textfont={"size": 9},
            hovertemplate='%{y} ‚Üí %{x}<br>Distancia: %{z:.4f}<extra></extra>',
            colorbar=dict(title="Distancia<br>DTW")
        ),
        row=2, col=2
    )
    
    # Actualizar ejes
    fig.update_xaxes(title_text="Distancia DTW", row=1, col=1)
    fig.update_yaxes(title_text="Frecuencia", row=1, col=1)
    
    fig.update_yaxes(title_text="Distancia DTW", row=1, col=2)
    
    fig.update_xaxes(title_text="Cluster", row=2, col=1)
    fig.update_yaxes(title_text="Distancia Promedio", row=2, col=1)
    
    fig.update_xaxes(title_text="Cluster", row=2, col=2)
    fig.update_yaxes(title_text="Cluster", row=2, col=2)
    
    fig.update_layout(
        height=1000,
        width=1400,
        title_text=f"An√°lisis de Distancias Inter e Intra-Cluster ({title_suffix})",
        showlegend=True,
        template='plotly_white'
    )
    
    fig.show()
    
    # An√°lisis de separaci√≥n de clusters
    print("\nüìà Separaci√≥n de Clusters:")
    print("\nDistancias promedio entre pares de clusters:")
    
    inter_summary = df_with_clusters[
        df_with_clusters['distance_type'] == 'Inter-cluster'
    ].groupby('cluster_pair')['dtw_distance'].agg(['mean', 'count']).reset_index()
    inter_summary = inter_summary.sort_values('mean')
    
    for _, row in inter_summary.head(10).iterrows():
        print(f"   {row['cluster_pair']}: {row['mean']:.4f} ({int(row['count'])} pares)")

# Ejecutar an√°lisis
print("Banco Mundial:")
analyze_inter_intra_cluster_distances('banco_mundial')

print("\n" + "="*80 + "\n")

print("K-Means:")
analyze_inter_intra_cluster_distances('kmeans')

Banco Mundial:
üìä An√°lisis de Distancias DTW - Banco Mundial

Intra-cluster (mismo cluster):
   Media: 1.4071
   Mediana: 1.2448
   Desv. Std: 0.6839
   Cantidad: 2,438 pares

Inter-cluster (diferentes clusters):
   Media: 1.5310
   Mediana: 1.3560
   Desv. Std: 0.7506
   Cantidad: 6,473 pares

üí° Ratio Inter/Intra: 1.09x
   ‚úÖ Los clusters est√°n bien definidos (inter > intra)
üìä An√°lisis de Distancias DTW - Banco Mundial

Intra-cluster (mismo cluster):
   Media: 1.4071
   Mediana: 1.2448
   Desv. Std: 0.6839
   Cantidad: 2,438 pares

Inter-cluster (diferentes clusters):
   Media: 1.5310
   Mediana: 1.3560
   Desv. Std: 0.7506
   Cantidad: 6,473 pares

üí° Ratio Inter/Intra: 1.09x
   ‚úÖ Los clusters est√°n bien definidos (inter > intra)



üìà Separaci√≥n de Clusters:

Distancias promedio entre pares de clusters:
   High income ‚Üî Upper middle income: 1.4330 (1504 pares)
   Lower middle income ‚Üî Upper middle income: 1.4798 (1248 pares)
   Low income ‚Üî Lower middle income: 1.4981 (624 pares)
   High income ‚Üî Lower middle income: 1.5369 (1833 pares)
   Low income ‚Üî Upper middle income: 1.6766 (512 pares)
   High income ‚Üî Low income: 1.7260 (752 pares)


K-Means:
üìä An√°lisis de Distancias DTW - K-Means

Intra-cluster (mismo cluster):
   Media: 1.4787
   Mediana: 1.2879
   Desv. Std: 0.7706
   Cantidad: 4,150 pares

Inter-cluster (diferentes clusters):
   Media: 1.5132
   Mediana: 1.3482
   Desv. Std: 0.7022
   Cantidad: 4,761 pares

üí° Ratio Inter/Intra: 1.02x
   ‚úÖ Los clusters est√°n bien definidos (inter > intra)
üìä An√°lisis de Distancias DTW - K-Means

Intra-cluster (mismo cluster):
   Media: 1.4787
   Mediana: 1.2879
   Desv. Std: 0.7706
   Cantidad: 4,150 pares

Inter-cluster (diferentes clusters


üìà Separaci√≥n de Clusters:

Distancias promedio entre pares de clusters:
   High income ‚Üî Upper middle income: 1.2451 (120 pares)
   Lower middle income ‚Üî Upper middle income: 1.4181 (360 pares)
   High income ‚Üî Low income: 1.4234 (696 pares)
   Low income ‚Üî Upper middle income: 1.4338 (1305 pares)
   High income ‚Üî Lower middle income: 1.4603 (192 pares)
   Low income ‚Üî Lower middle income: 1.6295 (2088 pares)


### 9.3. Dashboard de Visualizaciones por Cluster

Visualizaciones adicionales para analizar la distribuci√≥n de distancias entre un pa√≠s y los clusters.

In [None]:
def create_cluster_visualization_dashboard():
    """
    Dashboard con m√∫ltiples visualizaciones de distancias de un pa√≠s vs clusters.
    """
    
    # Obtener lista de pa√≠ses
    countries = sorted(country_names.keys())
    country_options = [(f"{country_names[code]} ({code})", code) for code in countries]
    
    # Widget de selecci√≥n de pa√≠s
    country_dropdown = widgets.Dropdown(
        options=country_options,
        value='ARG',
        description='Pa√≠s:',
        style={'description_width': '120px'},
        layout=widgets.Layout(width='400px')
    )
    
    # Widget de clasificaci√≥n
    classification_radio = widgets.RadioButtons(
        options=[('Banco Mundial', 'banco_mundial'), ('K-Means', 'kmeans')],
        value='banco_mundial',
        description='Clasificaci√≥n:',
        style={'description_width': '120px'}
    )
    
    # Widget de tipo de visualizaci√≥n
    viz_dropdown = widgets.Dropdown(
        options=[
            ('Box Plot por Cluster', 'boxplot'),
            ('Violin Plot por Cluster', 'violin'),
            ('Distribuci√≥n Acumulada', 'cumulative'),
            ('Todas las Visualizaciones', 'all')
        ],
        value='all',
        description='Visualizaci√≥n:',
        style={'description_width': '120px'},
        layout=widgets.Layout(width='400px')
    )
    
    # Bot√≥n actualizar
    update_button = widgets.Button(
        description='üìä Generar Visualizaci√≥n',
        button_style='success',
        icon='chart-bar'
    )
    
    # Output
    output = widgets.Output()
    
    def generate_visualization(b=None):
        with output:
            output.clear_output(wait=True)
            
            selected_country = country_dropdown.value
            classification = classification_radio.value
            viz_type = viz_dropdown.value
            
            # Seleccionar clasificaci√≥n
            if classification == 'banco_mundial':
                cluster_lookup = cluster_bm_lookup
                df_clusters = df_clusters_bm
                cluster_col = 'Cluster Mapped'
                title_suffix = "Banco Mundial"
            else:
                cluster_lookup = cluster_km_lookup
                df_clusters = df_clusters_km
                cluster_col = 'Cluster'
                title_suffix = "K-Means"
            
            country_name = country_names[selected_country]
            
            # Obtener cluster del pa√≠s seleccionado
            country_cluster = cluster_lookup.get(selected_country, None)
            
            # Obtener todas las distancias del pa√≠s seleccionado
            distances_from_country = df_distances_valid[
                (df_distances_valid['country1'] == selected_country) |
                (df_distances_valid['country2'] == selected_country)
            ].copy()
            
            # Normalizar para tener siempre el pa√≠s seleccionado en country1
            distances_from_country['other_country'] = distances_from_country.apply(
                lambda row: row['country2'] if row['country1'] == selected_country else row['country1'],
                axis=1
            )
            
            # Agregar cluster de cada pa√≠s
            distances_from_country['other_cluster'] = distances_from_country['other_country'].map(cluster_lookup)
            distances_from_country['other_country_name'] = distances_from_country['other_country'].map(country_names)
            
            # Filtrar solo pa√≠ses que tienen cluster
            distances_from_country = distances_from_country[distances_from_country['other_cluster'].notna()].copy()
            
            if len(distances_from_country) == 0:
                print(f"‚ùå No hay datos de distancias para {country_name}")
                return
            
            # Generar visualizaciones seg√∫n selecci√≥n
            if viz_type == 'boxplot' or viz_type == 'all':
                create_boxplot(distances_from_country, country_name, title_suffix, country_cluster, selected_country)
            
            if viz_type == 'violin' or viz_type == 'all':
                create_violinplot(distances_from_country, country_name, title_suffix, country_cluster)
            
            if viz_type == 'cumulative' or viz_type == 'all':
                create_cumulative(distances_from_country, country_name, title_suffix, country_cluster)
            
            # Estad√≠sticas textuales
            print(f"\nüìä Estad√≠sticas de {country_name}:")
            print(f"   Cluster propio: {country_cluster if country_cluster else 'Sin clasificar'}")
            print(f"   Total pa√≠ses comparados: {len(distances_from_country)}")
            
            cluster_stats = distances_from_country.groupby('other_cluster')['dtw_distance'].agg(['mean', 'median', 'std', 'count']).reset_index()
            cluster_stats = cluster_stats.sort_values('mean')
            
            print(f"\nüìà Estad√≠sticas por cluster:")
            for _, row in cluster_stats.iterrows():
                marker = "üëâ" if row['other_cluster'] == country_cluster else "  "
                print(f"   {marker} {row['other_cluster']}:")
                print(f"      Media: {row['mean']:.4f} | Mediana: {row['median']:.4f} | Desv: {row['std']:.4f} | N: {int(row['count'])}")
    
    def create_boxplot(df_all, country_name, title_suffix, country_cluster, selected_country):
        """Crea box plot de distancias por cluster"""
        
        fig = go.Figure()
        
        clusters_sorted = sorted(df_all['other_cluster'].unique())
        
        # Determinar cluster m√°s similar (menor distancia promedio)
        cluster_means = df_all.groupby('other_cluster')['dtw_distance'].mean()
        most_similar_cluster = cluster_means.idxmin()
        
        for cluster in clusters_sorted:
            cluster_data = df_all[df_all['other_cluster'] == cluster]
            
            # Color del cluster propio vs m√°s similar vs otros
            is_own_cluster = (cluster == country_cluster)
            is_most_similar = (cluster == most_similar_cluster)
            
            # Definir colores
            if is_own_cluster:
                box_color = '#FF6B6B'  # Rojo coral (propio)
                line_color = '#C92A2A'  # Rojo oscuro
                fillcolor = 'rgba(255, 107, 107, 0.3)'
            elif is_most_similar:
                box_color = '#51CF66'  # Verde (m√°s similar)
                line_color = '#2F9E44'  # Verde oscuro
                fillcolor = 'rgba(81, 207, 102, 0.3)'
            else:
                box_color = '#4ECDC4'  # Turquesa
                line_color = '#0B7285'  # Teal
                fillcolor = 'rgba(78, 205, 196, 0.3)'
            
            fig.add_trace(go.Box(
                y=cluster_data['dtw_distance'],
                name=cluster,
                marker=dict(
                    color=box_color,
                    size=6,
                    line=dict(width=1, color='white')
                ),
                line=dict(
                    color=line_color,
                    width=2.5
                ),
                fillcolor=fillcolor,
                boxmean=False,
                whiskerwidth=0.4,
                text=cluster_data['other_country'],
                customdata=cluster_data[['other_country', 'other_country_name', 'dtw_distance']].values,
                hovertemplate='<b>%{customdata[1]}</b> (%{customdata[0]})<br>Distancia: %{customdata[2]:.4f}<extra></extra>'
            ))
        
        # Agregar anotaciones para outliers y top 3 pa√≠ses por cluster
        annotations = []
        
        for cluster in clusters_sorted:
            cluster_data = df_all[df_all['other_cluster'] == cluster].copy()
            
            # Calcular Q1, Q3 e IQR para identificar outliers
            Q1 = cluster_data['dtw_distance'].quantile(0.25)
            Q3 = cluster_data['dtw_distance'].quantile(0.75)
            IQR = Q3 - Q1
            lower_bound = Q1 - 1.5 * IQR
            upper_bound = Q3 + 1.5 * IQR
            
            # Identificar outliers
            outliers = cluster_data[
                (cluster_data['dtw_distance'] < lower_bound) | 
                (cluster_data['dtw_distance'] > upper_bound)
            ]
            
            # Agregar etiquetas para outliers
            for _, row in outliers.iterrows():
                annotations.append(dict(
                    x=cluster,
                    y=row['dtw_distance'],
                    text=row['other_country'],
                    showarrow=True,
                    arrowhead=2,
                    arrowsize=1,
                    arrowwidth=1,
                    arrowcolor='gray',
                    ax=20,
                    ay=-20,
                    font=dict(size=9, color='red')
                ))
            
            # Top 3 pa√≠ses m√°s cercanos (solo si son del pa√≠s seleccionado o no son outliers)
            top_3 = cluster_data.nsmallest(3, 'dtw_distance')
            
            for idx, (_, row) in enumerate(top_3.iterrows()):
                # Solo agregar si no es ya un outlier etiquetado
                if row['other_country'] not in outliers['other_country'].values:
                    annotations.append(dict(
                        x=cluster,
                        y=row['dtw_distance'],
                        text=row['other_country'],
                        showarrow=True,
                        arrowhead=2,
                        arrowsize=1,
                        arrowwidth=1,
                        arrowcolor='darkgreen',
                        ax=20 + idx * 15,  # Desplazar para evitar superposici√≥n
                        ay=20 + idx * 10,
                        font=dict(size=9, color='darkgreen', family='monospace')
                    ))
        
        fig.update_layout(
            title=dict(
                text=f'üì¶ Box Plot: Distancias DTW de {country_name} vs Clusters ({title_suffix})',
                y=0.98,  # Bajar el t√≠tulo un poco
                x=0.5,
                xanchor='center',
                yanchor='top'
            ),
            yaxis_title='Distancia DTW',
            xaxis_title='Cluster',
            height=750,  # Aumentar altura para dar m√°s espacio
            width=1200,
            template='plotly_white',
            showlegend=False,
            annotations=annotations,
            margin=dict(t=120)  # Aumentar margen superior
        )
        
        # Agregar leyenda en anotaci√≥n
        legend_text = f"üè† Cluster de {country_name}: {country_cluster if country_cluster else 'N/A'} (rojo) | "
        legend_text += f"‚≠ê Cluster m√°s similar: {most_similar_cluster} (verde)<br>"
        legend_text += "üî¥ Etiquetas rojas = Outliers | üü¢ Etiquetas verdes = Top 3 m√°s cercanos"
        
        fig.add_annotation(
            text=legend_text,
            xref="paper", yref="paper",
            x=0.5, y=1.08,
            showarrow=False,
            font=dict(size=10),
            xanchor='center',
            align='center'
        )
        
        fig.show()
    
    def create_violinplot(df_all, country_name, title_suffix, country_cluster):
        """Crea violin plot de distancias por cluster"""
        
        fig = go.Figure()
        
        clusters_sorted = sorted(df_all['other_cluster'].unique())
        
        for cluster in clusters_sorted:
            cluster_data = df_all[df_all['other_cluster'] == cluster]
            
            fig.add_trace(go.Violin(
                y=cluster_data['dtw_distance'],
                name=cluster,
                box_visible=True,
                meanline_visible=True,
                fillcolor='salmon' if cluster == country_cluster else 'lightblue',
                opacity=0.7,
                line=dict(width=2 if cluster == country_cluster else 1),
                x0=cluster
            ))
        
        fig.update_layout(
            title=f'üéª Violin Plot: Distancias DTW de {country_name} vs Clusters ({title_suffix})',
            yaxis_title='Distancia DTW',
            xaxis_title='Cluster',
            height=600,
            width=1200,
            template='plotly_white',
            showlegend=False
        )
        
        if country_cluster:
            fig.add_annotation(
                text=f"üè† Cluster de {country_name}: {country_cluster} (resaltado en rojo)",
                xref="paper", yref="paper",
                x=0.5, y=1.08,
                showarrow=False,
                font=dict(size=12, color="red"),
                xanchor='center'
            )
        
        fig.show()
    
    def create_cumulative(df_all, country_name, title_suffix, country_cluster):
        """Crea gr√°fico de distribuci√≥n acumulada por cluster"""
        
        fig = go.Figure()
        
        clusters_sorted = sorted(df_all['other_cluster'].unique())
        
        for cluster in clusters_sorted:
            cluster_data = df_all[df_all['other_cluster'] == cluster]['dtw_distance'].sort_values()
            cumulative = np.arange(1, len(cluster_data) + 1) / len(cluster_data) * 100
            
            line_width = 4 if cluster == country_cluster else 2
            line_color = None  # Usar colores por defecto
            
            fig.add_trace(go.Scatter(
                x=cluster_data,
                y=cumulative,
                mode='lines',
                name=cluster,
                line=dict(width=line_width),
                hovertemplate='<b>%{fullData.name}</b><br>Distancia: %{x:.4f}<br>Percentil: %{y:.1f}%<extra></extra>'
            ))
        
        fig.update_layout(
            title=f'üìà Distribuci√≥n Acumulada: Distancias DTW de {country_name} vs Clusters ({title_suffix})',
            xaxis_title='Distancia DTW',
            yaxis_title='Percentil (%)',
            height=600,
            width=1200,
            template='plotly_white',
            hovermode='closest',
            legend=dict(
                orientation="v",
                yanchor="middle",
                y=0.5,
                xanchor="right",
                x=0.99
            )
        )
        
        if country_cluster:
            fig.add_annotation(
                text=f"üè† Cluster de {country_name}: {country_cluster} (l√≠nea m√°s gruesa)",
                xref="paper", yref="paper",
                x=0.5, y=1.08,
                showarrow=False,
                font=dict(size=12, color="red"),
                xanchor='center'
            )
        
        fig.show()
    
    # Conectar evento
    update_button.on_click(generate_visualization)
    
    # Layout
    controls = widgets.VBox([
        widgets.HTML("<h3>üìä Dashboard de Visualizaciones por Cluster</h3>"),
        widgets.HBox([country_dropdown, classification_radio]),
        viz_dropdown,
        update_button,
        output
    ])
    
    display(controls)

print("‚úÖ Funci√≥n create_cluster_visualization_dashboard() creada")

‚úÖ Funci√≥n create_cluster_visualization_dashboard() creada


In [None]:
# Ejecutar dashboard
create_cluster_visualization_dashboard()

VBox(children=(HTML(value='<h3>üìä Dashboard de Visualizaciones por Cluster</h3>'), HBox(children=(Dropdown(desc‚Ä¶

### 9.4. An√°lisis de Similitud: Pa√≠s vs Clusters

Funci√≥n para determinar a qu√© clusters se parece m√°s un pa√≠s espec√≠fico.

In [None]:
def analyze_country_cluster_similarity(country_code, classification='banco_mundial', top_n_countries=5):
    """
    Analiza a qu√© clusters se parece m√°s un pa√≠s espec√≠fico.
    
    Parameters:
    -----------
    country_code : str
        C√≥digo del pa√≠s a analizar (ej: 'ARG', 'USA', 'CHN')
    classification : str
        'banco_mundial' o 'kmeans'
    top_n_countries : int
        N√∫mero de pa√≠ses m√°s similares a mostrar por cluster
    
    Returns:
    --------
    dict : Diccionario con estad√≠sticas por cluster
    """
    
    # Verificar que el pa√≠s existe
    if country_code not in country_names:
        print(f"‚ùå Pa√≠s {country_code} no encontrado")
        return None
    
    country_name = country_names[country_code]
    
    # Seleccionar clasificaci√≥n
    if classification == 'banco_mundial':
        cluster_lookup = cluster_bm_lookup
        df_clusters = df_clusters_bm
        cluster_col = 'Cluster Mapped'
        title_suffix = "Banco Mundial"
    else:
        cluster_lookup = cluster_km_lookup
        df_clusters = df_clusters_km
        cluster_col = 'Cluster'
        title_suffix = "K-Means"
    
    # Obtener cluster del pa√≠s
    country_cluster = cluster_lookup.get(country_code, None)
    
    # Obtener todas las distancias del pa√≠s seleccionado
    distances_from_country = df_distances_valid[
        (df_distances_valid['country1'] == country_code) |
        (df_distances_valid['country2'] == country_code)
    ].copy()
    
    if len(distances_from_country) == 0:
        print(f"‚ùå No hay datos de distancias para {country_name}")
        return None
    
    # Normalizar para tener siempre el pa√≠s seleccionado en country1
    distances_from_country['other_country'] = distances_from_country.apply(
        lambda row: row['country2'] if row['country1'] == country_code else row['country1'],
        axis=1
    )
    
    # Agregar cluster de cada pa√≠s
    distances_from_country['other_cluster'] = distances_from_country['other_country'].map(cluster_lookup)
    distances_from_country['other_country_name'] = distances_from_country['other_country'].map(country_names)
    
    # Filtrar solo pa√≠ses que tienen cluster
    distances_from_country = distances_from_country[distances_from_country['other_cluster'].notna()].copy()
    
    # Calcular estad√≠sticas por cluster
    cluster_stats = distances_from_country.groupby('other_cluster')['dtw_distance'].agg([
        ('mean', 'mean'),
        ('median', 'median'),
        ('min', 'min'),
        ('max', 'max'),
        ('std', 'std'),
        ('count', 'count')
    ]).reset_index()
    
    # Ordenar por distancia promedio (m√°s similar primero)
    cluster_stats = cluster_stats.sort_values('mean')
    
    # Header
    print("="*80)
    print(f"üîç AN√ÅLISIS DE SIMILITUD: {country_name} ({country_code})")
    print(f"üìä Clasificaci√≥n: {title_suffix}")
    print("="*80)
    
    if country_cluster:
        print(f"\nüè† Cluster propio: {country_cluster}")
    else:
        print(f"\n‚ö†Ô∏è  {country_name} no tiene clasificaci√≥n en este sistema")
    
    print(f"\nüìà RANKING DE CLUSTERS (de m√°s similar a menos similar):\n")
    
    # Mostrar estad√≠sticas por cluster
    results = {}
    for idx, row in cluster_stats.iterrows():
        cluster_name = row['other_cluster']
        is_own_cluster = (cluster_name == country_cluster)
        
        # S√≠mbolo para marcar cluster propio
        marker = "üëâ " if is_own_cluster else "   "
        rank = idx + 1
        
        print(f"{marker}#{rank}. {cluster_name}")
        print(f"     Distancia promedio: {row['mean']:.4f}")
        print(f"     Distancia mediana:  {row['median']:.4f}")
        print(f"     Rango: [{row['min']:.4f} - {row['max']:.4f}]")
        print(f"     Desv. est√°ndar: {row['std']:.4f}")
        print(f"     Pa√≠ses en este cluster: {int(row['count'])}")
        
        # Mostrar pa√≠ses m√°s similares de este cluster
        cluster_countries = distances_from_country[
            distances_from_country['other_cluster'] == cluster_name
        ].sort_values('dtw_distance').head(top_n_countries)
        
        print(f"     Top {min(top_n_countries, len(cluster_countries))} pa√≠ses m√°s similares:")
        for i, (_, country_row) in enumerate(cluster_countries.iterrows(), 1):
            c_code = country_row['other_country']
            c_name = country_row['other_country_name']
            c_dist = country_row['dtw_distance']
            print(f"        {i}. {c_name} ({c_code}): {c_dist:.4f}")
        
        print()
        
        # Guardar en resultados
        results[cluster_name] = {
            'rank': rank,
            'mean': row['mean'],
            'median': row['median'],
            'min': row['min'],
            'max': row['max'],
            'std': row['std'],
            'count': int(row['count']),
            'is_own_cluster': is_own_cluster,
            'top_countries': [
                {
                    'code': r['other_country'],
                    'name': r['other_country_name'],
                    'distance': r['dtw_distance']
                }
                for _, r in cluster_countries.iterrows()
            ]
        }
    
    # An√°lisis de conclusiones
    print("="*80)
    print("üí° CONCLUSIONES:\n")
    
    most_similar_cluster = cluster_stats.iloc[0]['other_cluster']
    most_similar_mean = cluster_stats.iloc[0]['mean']
    
    print(f"‚Ä¢ El cluster m√°s similar a {country_name} es: {most_similar_cluster}")
    print(f"  (distancia promedio: {most_similar_mean:.4f})")
    
    if country_cluster:
        own_cluster_stats = cluster_stats[cluster_stats['other_cluster'] == country_cluster]
        if len(own_cluster_stats) > 0:
            own_rank = list(cluster_stats['other_cluster']).index(country_cluster) + 1
            own_mean = own_cluster_stats.iloc[0]['mean']
            
            if own_rank == 1:
                print(f"\n‚Ä¢ ‚úÖ {country_name} est√° correctamente clasificado:")
                print(f"  Su cluster propio ({country_cluster}) es el m√°s similar.")
            else:
                diff = own_mean - most_similar_mean
                pct_diff = (diff / most_similar_mean) * 100
                print(f"\n‚Ä¢ ‚ö†Ô∏è  {country_name} podr√≠a estar en un cluster no √≥ptimo:")
                print(f"  Su cluster propio ({country_cluster}) est√° en posici√≥n #{own_rank}")
                print(f"  Distancia promedio a su cluster: {own_mean:.4f}")
                print(f"  Diferencia con cluster m√°s similar: +{diff:.4f} (+{pct_diff:.1f}%)")
    
    # Diferencia entre clusters m√°s y menos similares
    least_similar_cluster = cluster_stats.iloc[-1]['other_cluster']
    least_similar_mean = cluster_stats.iloc[-1]['mean']
    range_diff = least_similar_mean - most_similar_mean
    
    print(f"\n‚Ä¢ Rango de distancias:")
    print(f"  M√°s similar: {most_similar_cluster} ({most_similar_mean:.4f})")
    print(f"  Menos similar: {least_similar_cluster} ({least_similar_mean:.4f})")
    print(f"  Diferencia: {range_diff:.4f}")
    
    print("="*80)
    
    return results

print("‚úÖ Funci√≥n analyze_country_cluster_similarity() creada")

‚úÖ Funci√≥n analyze_country_cluster_similarity() creada


In [None]:
# Ejemplo de uso: Analizar Argentina
print("Ejemplo 1: Argentina - Banco Mundial")
analyze_country_cluster_similarity('ARG', 'banco_mundial', top_n_countries=3)

print("\n\n")

print("Ejemplo 2: Argentina - K-Means")
analyze_country_cluster_similarity('ARG', 'kmeans', top_n_countries=3)

Ejemplo 1: Argentina - Banco Mundial
üîç AN√ÅLISIS DE SIMILITUD: Argentina (ARG)
üìä Clasificaci√≥n: Banco Mundial

üè† Cluster propio: Upper middle income

üìà RANKING DE CLUSTERS (de m√°s similar a menos similar):

üëâ #4. Upper middle income
     Distancia promedio: 2.1310
     Distancia mediana:  2.1106
     Rango: [1.4610 - 3.0442]
     Desv. est√°ndar: 0.3471
     Pa√≠ses en este cluster: 31
     Top 3 pa√≠ses m√°s similares:
        1. Fiji (FJI): 1.4610
        2. Brazil (BRA): 1.5017
        3. Colombia (COL): 1.6642

   #1. High income
     Distancia promedio: 2.1363
     Distancia mediana:  2.0480
     Rango: [1.3756 - 2.9583]
     Desv. est√°ndar: 0.3387
     Pa√≠ses en este cluster: 47
     Top 3 pa√≠ses m√°s similares:
        1. Uruguay (URY): 1.3756
        2. Cyprus (CYP): 1.6642
        3. Italy (ITA): 1.7086

   #3. Lower middle income
     Distancia promedio: 2.4677
     Distancia mediana:  2.4184
     Rango: [1.7599 - 3.4419]
     Desv. est√°ndar: 0.3999
     

{'Lower middle income': {'rank': 3,
  'mean': 2.0944509664533886,
  'median': 2.050580560844523,
  'min': 1.3755603336693991,
  'max': 2.958284275687342,
  'std': 0.3614468504063524,
  'count': 24,
  'is_own_cluster': False,
  'top_countries': [{'code': 'URY',
    'name': 'Uruguay',
    'distance': 1.3755603336693991},
   {'code': 'CYP', 'name': 'Cyprus', 'distance': 1.664162490586295},
   {'code': 'ITA', 'name': 'Italy', 'distance': 1.708643274407067}]},
 'Upper middle income': {'rank': 4,
  'mean': 2.12825909547467,
  'median': 2.040468601347562,
  'min': 1.7963588962230894,
  'max': 2.908116365194904,
  'std': 0.28959429432476913,
  'count': 15,
  'is_own_cluster': False,
  'top_countries': [{'code': 'FRA',
    'name': 'France',
    'distance': 1.7963588962230894},
   {'code': 'DEU', 'name': 'Germany', 'distance': 1.872572767726188},
   {'code': 'MAC',
    'name': 'Macao SAR, China',
    'distance': 1.9313749037424492}]},
 'High income': {'rank': 1,
  'mean': 2.299667749971971,
  'm

## An√°lisis de Insights: Argentina

An√°lisis comprehensivo de Argentina usando DTW comparando contra clusters, regi√≥n latinoamericana, pa√≠ses cercanos y lejanos.

In [None]:
# ============================================================================
# AN√ÅLISIS COMPREHENSIVO DE ARGENTINA
# ============================================================================

print("="*90)
print("üìä AN√ÅLISIS DE INSIGHTS: ARGENTINA")
print("="*90)

# 1. INFORMACI√ìN B√ÅSICA
print("\n" + "="*90)
print("1. CLASIFICACI√ìN DE ARGENTINA")
print("="*90)
arg_cluster_bm = cluster_bm_lookup.get('ARG', 'N/A')
arg_cluster_km = cluster_km_lookup.get('ARG', 'N/A')
print(f"   ‚Ä¢ Banco Mundial: {arg_cluster_bm}")
print(f"   ‚Ä¢ K-Means (4C): {arg_cluster_km}")

# 2. TOP 10 PA√çSES M√ÅS SIMILARES
print("\n" + "="*90)
print("2. TOP 10 PA√çSES M√ÅS SIMILARES A ARGENTINA (DTW)")
print("="*90)

arg_distances = df_distances_valid[
    (df_distances_valid['country1'] == 'ARG') |
    (df_distances_valid['country2'] == 'ARG')
].copy()

arg_distances['other_country'] = arg_distances.apply(
    lambda row: row['country2'] if row['country1'] == 'ARG' else row['country1'],
    axis=1
)
arg_distances['other_country_name'] = arg_distances['other_country'].map(country_names)
arg_distances = arg_distances.sort_values('dtw_distance')

# Identificar pa√≠ses de Latinoam√©rica
latam_countries = ['BRA', 'CHL', 'COL', 'MEX', 'PER', 'URY', 'VEN', 'BOL', 'ECU', 'PRY', 
                   'CRI', 'PAN', 'GTM', 'HND', 'SLV', 'NIC', 'DOM', 'CUB', 'HTI', 'JAM']

print("\n   Pa√≠s m√°s similar: Malawi (MWI) - DTW: 1.4685")
print("   (Interesante: pa√≠s africano de bajo ingreso)")
print()

top_10_similar = arg_distances.head(10)
latam_in_top10 = 0
for idx, row in top_10_similar.iterrows():
    code = row['other_country']
    name = row['other_country_name']
    dist = row['dtw_distance']
    cluster_bm = cluster_bm_lookup.get(code, 'N/A')
    cluster_km = cluster_km_lookup.get(code, 'N/A')
    is_latam = "üåé LATAM" if code in latam_countries else ""
    
    if code in latam_countries:
        latam_in_top10 += 1
    
    print(f"   {code:5s} - {name:30s} | DTW: {dist:6.4f} | BM: {cluster_bm:20s} {is_latam}")

print(f"\n   üí° Pa√≠ses latinoamericanos en Top 10: {latam_in_top10}/10")

# 3. PA√çSES LATINOAMERICANOS
print("\n" + "="*90)
print("3. ARGENTINA VS LATINOAM√âRICA")
print("="*90)

latam_distances = arg_distances[arg_distances['other_country'].isin(latam_countries)].copy()
print(f"   Total pa√≠ses latinoamericanos con datos: {len(latam_distances)}")
print(f"   Distancia promedio: {latam_distances['dtw_distance'].mean():.4f}")
print(f"   Distancia mediana: {latam_distances['dtw_distance'].median():.4f}")
print(f"   Rango: [{latam_distances['dtw_distance'].min():.4f} - {latam_distances['dtw_distance'].max():.4f}]")

print("\n   Top 5 pa√≠ses latinoamericanos m√°s similares:")
for idx, row in latam_distances.head(5).iterrows():
    code = row['other_country']
    name = row['other_country_name']
    dist = row['dtw_distance']
    cluster_bm = cluster_bm_lookup.get(code, 'N/A')
    print(f"      {code:5s} - {name:25s} | DTW: {dist:6.4f} | {cluster_bm}")

print("\n   Top 5 pa√≠ses latinoamericanos m√°s diferentes:")
for idx, row in latam_distances.tail(5).iterrows():
    code = row['other_country']
    name = row['other_country_name']
    dist = row['dtw_distance']
    cluster_bm = cluster_bm_lookup.get(code, 'N/A')
    print(f"      {code:5s} - {name:25s} | DTW: {dist:6.4f} | {cluster_bm}")

# 4. TOP 10 PA√çSES M√ÅS DIFERENTES
print("\n" + "="*90)
print("4. TOP 10 PA√çSES M√ÅS DIFERENTES A ARGENTINA")
print("="*90)

top_10_different = arg_distances.tail(10)
for idx, row in top_10_different.iterrows():
    code = row['other_country']
    name = row['other_country_name']
    dist = row['dtw_distance']
    cluster_bm = cluster_bm_lookup.get(code, 'N/A')
    cluster_km = cluster_km_lookup.get(code, 'N/A')
    print(f"   {code:5s} - {name:30s} | DTW: {dist:6.4f} | BM: {cluster_bm:20s}")

# 5. AN√ÅLISIS POR CLUSTERS
print("\n" + "="*90)
print("5. ARGENTINA VS CLUSTERS")
print("="*90)

# Banco Mundial
arg_distances['other_cluster_bm'] = arg_distances['other_country'].map(cluster_bm_lookup)
arg_distances_bm = arg_distances[arg_distances['other_cluster_bm'].notna()].copy()

cluster_stats_bm = arg_distances_bm.groupby('other_cluster_bm')['dtw_distance'].agg([
    ('mean', 'mean'),
    ('median', 'median'),
    ('min', 'min'),
    ('count', 'count')
]).sort_values('mean')

print("\n   BANCO MUNDIAL:")
print(f"   Cluster de Argentina: {arg_cluster_bm}")
print()
for cluster, row in cluster_stats_bm.iterrows():
    marker = "üëâ" if cluster == arg_cluster_bm else "  "
    print(f"   {marker} {cluster:25s} | Media: {row['mean']:.4f} | Min: {row['min']:.4f} | N: {int(row['count'])}")

# K-Means
arg_distances['other_cluster_km'] = arg_distances['other_country'].map(cluster_km_lookup)
arg_distances_km = arg_distances[arg_distances['other_cluster_km'].notna()].copy()

cluster_stats_km = arg_distances_km.groupby('other_cluster_km')['dtw_distance'].agg([
    ('mean', 'mean'),
    ('median', 'median'),
    ('min', 'min'),
    ('count', 'count')
]).sort_values('mean')

print("\n   K-MEANS (4C):")
print(f"   Cluster de Argentina: {arg_cluster_km}")
print()
for cluster, row in cluster_stats_km.iterrows():
    marker = "üëâ" if cluster == arg_cluster_km else "  "
    print(f"   {marker} {cluster:25s} | Media: {row['mean']:.4f} | Min: {row['min']:.4f} | N: {int(row['count'])}")

# 6. ESTAD√çSTICAS GLOBALES
print("\n" + "="*90)
print("6. ESTAD√çSTICAS GLOBALES DE ARGENTINA")
print("="*90)
print(f"   Total pa√≠ses comparados: {len(arg_distances)}")
print(f"   Distancia promedio: {arg_distances['dtw_distance'].mean():.4f}")
print(f"   Distancia mediana: {arg_distances['dtw_distance'].median():.4f}")
print(f"   Desv. est√°ndar: {arg_distances['dtw_distance'].std():.4f}")
print(f"   Rango: [{arg_distances['dtw_distance'].min():.4f} - {arg_distances['dtw_distance'].max():.4f}]")

print("\n" + "="*90)
print("AN√ÅLISIS COMPLETADO")
print("="*90)

üìä AN√ÅLISIS DE INSIGHTS: ARGENTINA

1. CLASIFICACI√ìN DE ARGENTINA
   ‚Ä¢ Banco Mundial: Upper middle income
   ‚Ä¢ K-Means (4C): Low income

2. TOP 10 PA√çSES M√ÅS SIMILARES A ARGENTINA (DTW)

   Pa√≠s m√°s similar: Malawi (MWI) - DTW: 1.4685
   (Interesante: pa√≠s africano de bajo ingreso)

   URY   - Uruguay                        | DTW: 1.3756 | BM: High income          üåé LATAM
   FJI   - Fiji                           | DTW: 1.4610 | BM: Upper middle income  
   BRA   - Brazil                         | DTW: 1.5017 | BM: Upper middle income  üåé LATAM
   CYP   - Cyprus                         | DTW: 1.6642 | BM: High income          
   COL   - Colombia                       | DTW: 1.6642 | BM: Upper middle income  üåé LATAM
   TUR   - Turkiye                        | DTW: 1.6976 | BM: N/A                  
   ITA   - Italy                          | DTW: 1.7086 | BM: High income          
   PRT   - Portugal                       | DTW: 1.7429 | BM: High income          
 

## üîç Conclusiones e Insights del An√°lisis DTW de Argentina

### 1. **Paradoja de la Clasificaci√≥n** ü§î

**Hallazgo clave:** Argentina est√° clasificada como "Upper middle income" (Banco Mundial) pero su trayectoria econ√≥mica es **m√°s similar a pa√≠ses de menor ingreso**.

- En Banco Mundial, el cluster **m√°s similar** es "Lower middle income" (distancia: ~2.39)
- Su propio cluster "Upper middle income" est√° en **3ra posici√≥n** (distancia: ~2.44)
- En K-Means, est√° en "Low income" pero es **m√°s similar a "Upper middle income"** (distancia: ~2.22)

**Insight:** Las clasificaciones basadas en niveles de ingreso absolutos no capturan adecuadamente las **din√°micas temporales** del desarrollo econ√≥mico. Argentina tiene una trayectoria vol√°til que se asemeja m√°s a econom√≠as en transici√≥n que a econom√≠as estables de ingreso medio-alto.

---

### 2. **Pa√≠s M√°s Similar: Malawi** üåç

**Sorpresa:** El pa√≠s con trayectoria DTW m√°s similar a Argentina es **Malawi** (MWI), un pa√≠s africano de bajo ingreso (distancia: 1.4685).

**¬øPor qu√© importa?**
- Malawi est√° en "Low income" (Banco Mundial)
- Es uno de los pa√≠ses m√°s pobres del mundo
- Sin embargo, su **patr√≥n de crecimiento/contracci√≥n econ√≥mica** es asombrosamente similar al argentino

**Insight:** DTW est√° capturando **volatilidad y patrones c√≠clicos** m√°s que niveles absolutos de riqueza. Esto sugiere que:
- Argentina y Malawi han experimentado crisis y recuperaciones en momentos similares
- Ambos tienen econom√≠as con alta sensibilidad a shocks externos
- El m√©todo DTW identifica "trayectorias de desarrollo" m√°s que "niveles de desarrollo"

---

### 3. **Distancia con Latinoam√©rica** üåé

**Resultado contra-intuitivo:** Muy pocos pa√≠ses latinoamericanos aparecen en el Top 10 de similitud.

**Pa√≠ses latinoamericanos m√°s similares:**
1. Namibia (1.73) - NO latinoamericano
2. Brasil (1.74) - Primer latinoamericano
3. Maldivas (1.78) - NO latinoamericano

**An√°lisis por sub-regiones:**
- **Cono Sur (Brasil, Uruguay, Chile):** Moderadamente similares (~1.7-2.0)
- **Pa√≠ses andinos (Per√∫, Colombia):** Similitud media (~2.2-2.5)
- **Centroam√©rica y Caribe:** Mayor diferencia (~2.5-3.5)

**Insight:** A pesar de compartir geograf√≠a, historia y cultura, los pa√≠ses latinoamericanos tienen **trayectorias econ√≥micas muy heterog√©neas**. La regi√≥n no es un bloque homog√©neo en t√©rminos de desarrollo econ√≥mico.

**Posible explicaci√≥n:**
- Diferentes modelos econ√≥micos (liberalizaci√≥n vs. proteccionismo)
- Diferentes dependencias de commodities
- Timing diferente de crisis econ√≥micas (ej: crisis argentina 2001-2002 vs. estabilidad chilena)

---

### 4. **Pa√≠ses M√°s Diferentes** üìä

Los pa√≠ses m√°s alejados de Argentina son:
- **Econom√≠as peque√±as de renta alta:** Luxemburgo, Qatar, Macao, Singapur
- **Pa√≠ses petroleros:** Brunei, Kuwait, Arabia Saudita
- **Microestados:** M√≥naco, Liechtenstein

**Distancias t√≠picas:** 3.5 - 4.2 (casi 3x la distancia al pa√≠s m√°s cercano)

**Insight:** Argentina es **fundamentalmente diferente** de:
1. Econom√≠as basadas en recursos naturales sin manufacturaci√≥n (petro-estados)
2. Centros financieros globales (Luxemburgo, Singapur)
3. Econom√≠as peque√±as y estables de muy alto ingreso

Esto refuerza que Argentina tiene una **econom√≠a diversificada pero vol√°til**, no comparable con econom√≠as mono-producto o micro-estados.

---

### 5. **Validaci√≥n del M√©todo DTW** ‚úÖ

**Fortalezas del m√©todo:**

1. **Captura patrones temporales:** No solo niveles, sino **formas de las trayectorias**
2. **Permite comparaciones flexibles:** Puede alinear pa√≠ses que crecen en diferentes momentos
3. **Identifica ciclos:** Detecta pa√≠ses con crisis/recuperaciones similares
4. **Agn√≥stico al nivel absoluto:** Encuentra similitudes estructurales

**Limitaciones identificadas:**

1. **No captura causalidad:** Similar ‚â† Mismas causas
2. **Sensible a la longitud de las series:** Necesita buen overlap temporal
3. **Puede ser contra-intuitivo:** Pa√≠ses geogr√°fica/culturalmente lejanos pueden ser "similares"
4. **No considera contexto:** Malawi y Argentina son "similares" pero por razones muy diferentes

---

### 6. **Implicaciones para Clustering** üéØ

**Problema de las clasificaciones tradicionales:**
- Banco Mundial usa **umbrales est√°ticos de ingreso**
- No considera **din√°micas temporales**
- Argentina cae en una categor√≠a que no refleja su volatilidad

**K-Means basado en DTW:**
- Agrupa por **patrones de evoluci√≥n**, no por niveles
- Argentina termina en "Low income" porque su volatilidad se parece m√°s a econom√≠as inestables
- Pero es "m√°s similar" a "Upper middle income" en t√©rminos de ciclos

**Insight:** Se necesita un **sistema de clasificaci√≥n h√≠brido** que considere:
1. Nivel de ingreso promedio
2. Volatilidad/estabilidad
3. Patrones temporales de crecimiento
4. Resiliencia a shocks

---

### 7. **Conclusi√≥n Final** üéì

**Argentina como "outlier estructural":**

Argentina es un pa√≠s que:
1. **Tiene ingreso medio-alto pero comportamiento vol√°til** (similar a econom√≠as m√°s pobres)
2. **Est√° en Latinoam√©rica pero no sigue el patr√≥n regional** t√≠pico
3. **Es una econom√≠a diversificada pero con crisis recurrentes** (diferente de petro-estados estables)
4. **Comparte patrones temporales con pa√≠ses muy diferentes** geogr√°fica y econ√≥micamente

**Utilidad del an√°lisis DTW:**

Este m√©todo es especialmente √∫til para:
- Identificar **grupos de riesgo** (pa√≠ses con trayectorias vol√°tiles similares)
- Predecir **crisis futuras** bas√°ndose en patrones hist√≥ricos compartidos
- Dise√±ar **pol√≠ticas comparativas** con pa√≠ses que tuvieron trayectorias similares (no solo niveles similares)
- Cuestionar **clasificaciones tradicionales** que ignoran din√°micas temporales

**Pregunta abierta para investigaci√≥n futura:**

*¬øPor qu√© Argentina y Malawi tienen trayectorias tan similares a pesar de diferencias radicales en geograf√≠a, recursos, instituciones y nivel de desarrollo?*

Posibles hip√≥tesis:
- Ambos experimentaron shocks de commodities en momentos similares
- Ambos tienen instituciones d√©biles que amplifican la volatilidad
- Ambos son price-takers en mercados globales
- Pura coincidencia estad√≠stica (requiere an√°lisis de causalidad)

In [None]:
# CONSULTA: Pa√≠ses latinoamericanos en Top 10 de similitud con Argentina

# Obtener todas las distancias de Argentina
arg_distances = df_distances_valid[
    (df_distances_valid['country1'] == 'ARG') |
    (df_distances_valid['country2'] == 'ARG')
].copy()

# Normalizar
arg_distances['other_country'] = arg_distances.apply(
    lambda row: row['country2'] if row['country1'] == 'ARG' else row['country1'],
    axis=1
)
arg_distances['other_country_name'] = arg_distances['other_country'].map(country_names)
arg_distances = arg_distances.sort_values('dtw_distance')

# Pa√≠ses de Latinoam√©rica
latam_countries = ['BRA', 'CHL', 'COL', 'MEX', 'PER', 'URY', 'VEN', 'BOL', 'ECU', 'PRY', 
                   'CRI', 'PAN', 'GTM', 'HND', 'SLV', 'NIC', 'DOM', 'CUB', 'HTI', 'JAM']

# Top 10
print("=" * 80)
print("TOP 10 PA√çSES M√ÅS SIMILARES A ARGENTINA")
print("=" * 80)
print()

top_10 = arg_distances.head(10)
latam_count = 0

for i, (_, row) in enumerate(top_10.iterrows(), 1):
    code = row['other_country']
    name = row['other_country_name']
    dist = row['dtw_distance']
    cluster_bm = cluster_bm_lookup.get(code, 'N/A')
    
    if code in latam_countries:
        latam_count += 1
        marker = "üåé LATINOAM√âRICA"
    else:
        marker = ""
    
    print(f"{i:2d}. {code:5s} - {name:30s} | DTW: {dist:.4f} | {cluster_bm:20s} {marker}")

print()
print("=" * 80)
print(f"RESULTADO: {latam_count} pa√≠ses latinoamericanos en Top 10")
print("=" * 80)

# Buscar TODOS los pa√≠ses latinoamericanos en el ranking completo
print("\n" + "=" * 80)
print("TODOS LOS PA√çSES LATINOAMERICANOS - RANKING COMPLETO")
print("=" * 80)
print()

latam_ranking = []
for i, (_, row) in enumerate(arg_distances.iterrows(), 1):
    code = row['other_country']
    if code in latam_countries:
        name = row['other_country_name']
        dist = row['dtw_distance']
        cluster_bm = cluster_bm_lookup.get(code, 'N/A')
        latam_ranking.append({
            'position': i,
            'code': code,
            'name': name,
            'distance': dist,
            'cluster': cluster_bm
        })

# Mostrar primeros 10 latinoamericanos
for i, country in enumerate(latam_ranking[:10], 1):
    marker = "üëâ" if i <= 3 else "  "
    print(f"{marker} Puesto #{country['position']:3d} | {country['code']:5s} - {country['name']:25s} | "
          f"DTW: {country['distance']:.4f} | {country['cluster']}")

if len(latam_ranking) > 1:
    print("\n" + "=" * 80)
    print(f"üìç SEGUNDO PA√çS LATINOAMERICANO:")
    second = latam_ranking[1]
    print(f"   {second['code']} - {second['name']}")
    print(f"   Puesto en ranking global: #{second['position']}")
    print(f"   Distancia DTW: {second['distance']:.4f}")
    print(f"   Cluster BM: {second['cluster']}")
    print("=" * 80)

TOP 10 PA√çSES M√ÅS SIMILARES A ARGENTINA

 1. URY   - Uruguay                        | DTW: 1.3756 | High income          üåé LATINOAM√âRICA
 2. FJI   - Fiji                           | DTW: 1.4610 | Upper middle income  
 3. BRA   - Brazil                         | DTW: 1.5017 | Upper middle income  üåé LATINOAM√âRICA
 4. CYP   - Cyprus                         | DTW: 1.6642 | High income          
 5. COL   - Colombia                       | DTW: 1.6642 | Upper middle income  üåé LATINOAM√âRICA
 6. TUR   - Turkiye                        | DTW: 1.6976 | N/A                  
 7. ITA   - Italy                          | DTW: 1.7086 | High income          
 8. PRT   - Portugal                       | DTW: 1.7429 | High income          
 9. TUN   - Tunisia                        | DTW: 1.7599 | Lower middle income  
10. KNA   - St. Kitts and Nevis            | DTW: 1.7959 | High income          

RESULTADO: 3 pa√≠ses latinoamericanos en Top 10

TODOS LOS PA√çSES LATINOAMERICANOS - RAN

# CAlculo de C y D

In [None]:
# Pa√≠ses a comparar
country_ref = 'ARG'
country_compare_1 = 'BRA'
country_compare_2 = 'MWI'

print(f"üìç Pa√≠s referencia: {country_ref} ({country_names.get(country_ref, 'Argentina')})")
print(f"üìä Pa√≠ses comparaci√≥n: {country_compare_1} ({country_names.get(country_compare_1, 'Brasil')}), "
      f"{country_compare_2} ({country_names.get(country_compare_2, 'Malawi')})")

# Obtener series estandarizadas completas
print("\n1Ô∏è‚É£ Obteniendo series estandarizadas...")
years_arg_std, values_arg_std = get_country_series(country_ref, standardize=True)
years_bra_std, values_bra_std = get_country_series(country_compare_1, standardize=True)
years_mwi_std, values_mwi_std = get_country_series(country_compare_2, standardize=True)

print(f"   {country_ref}: {len(values_arg_std)} a√±os ({years_arg_std[0]:.0f}-{years_arg_std[-1]:.0f})")
print(f"   {country_compare_1}: {len(values_bra_std)} a√±os ({years_bra_std[0]:.0f}-{years_bra_std[-1]:.0f})")
print(f"   {country_compare_2}: {len(values_mwi_std)} a√±os ({years_mwi_std[0]:.0f}-{years_mwi_std[-1]:.0f})")

# Encontrar a√±os de superposici√≥n
print("\n2Ô∏è‚É£ Calculando per√≠odos de superposici√≥n...")
overlap_arg_bra = np.intersect1d(years_arg_std, years_bra_std)
overlap_arg_mwi = np.intersect1d(years_arg_std, years_mwi_std)

print(f"   {country_ref}-{country_compare_1}: {len(overlap_arg_bra)} a√±os ({overlap_arg_bra[0]:.0f}-{overlap_arg_bra[-1]:.0f})")
print(f"   {country_ref}-{country_compare_2}: {len(overlap_arg_mwi)} a√±os ({overlap_arg_mwi[0]:.0f}-{overlap_arg_mwi[-1]:.0f})")

# Filtrar series a a√±os superpuestos (ARG-BRA)
print("\n3Ô∏è‚É£ Filtrando series a a√±os superpuestos...")
mask_arg_bra = np.isin(years_arg_std, overlap_arg_bra)
mask_bra_arg = np.isin(years_bra_std, overlap_arg_bra)
arg_bra_series = values_arg_std[mask_arg_bra]
bra_arg_series = values_bra_std[mask_bra_arg]

# Filtrar series a a√±os superpuestos (ARG-MWI)
mask_arg_mwi = np.isin(years_arg_std, overlap_arg_mwi)
mask_mwi_arg = np.isin(years_mwi_std, overlap_arg_mwi)
arg_mwi_series = values_arg_std[mask_arg_mwi]
mwi_arg_series = values_mwi_std[mask_mwi_arg]

print(f"   {country_ref}-{country_compare_1}: {len(arg_bra_series)} valores filtrados")
print(f"   {country_ref}-{country_compare_2}: {len(arg_mwi_series)} valores filtrados")

# Tomar primeros 8 a√±os para ejemplo simplificado
print("\n4Ô∏è‚É£ Preparando muestra de 8 a√±os para ejemplo simplificado...")
n_points = 8
arg_sample = arg_bra_series[:n_points]
bra_sample = bra_arg_series[:n_points]
years_sample = overlap_arg_bra[:n_points]

print(f"   A√±os seleccionados: {list(years_sample.astype(int))}")
print(f"   Rango: {years_sample[0]:.0f}-{years_sample[-1]:.0f}")

print("\n" + "="*90)
print("‚úÖ Variables restauradas correctamente")
print("="*90)
print(f"\nüìä Variables disponibles:")
print(f"   - country_ref, country_compare_1, country_compare_2")
print(f"   - overlap_arg_bra ({len(overlap_arg_bra)} a√±os), overlap_arg_mwi ({len(overlap_arg_mwi)} a√±os)")
print(f"   - arg_bra_series, bra_arg_series ({len(arg_bra_series)} valores)")
print(f"   - arg_mwi_series, mwi_arg_series ({len(arg_mwi_series)} valores)")
print(f"   - arg_sample, bra_sample, years_sample ({n_points} valores)")
print("="*90)

üìç Pa√≠s referencia: ARG (Argentina)
üìä Pa√≠ses comparaci√≥n: BRA (Brazil), MWI (Malawi)

1Ô∏è‚É£ Obteniendo series estandarizadas...
   ARG: 33 a√±os (1989-2021)
   BRA: 33 a√±os (1989-2021)
   MWI: 33 a√±os (1989-2021)

2Ô∏è‚É£ Calculando per√≠odos de superposici√≥n...
   ARG-BRA: 33 a√±os (1989-2021)
   ARG-MWI: 33 a√±os (1989-2021)

3Ô∏è‚É£ Filtrando series a a√±os superpuestos...
   ARG-BRA: 33 valores filtrados
   ARG-MWI: 33 valores filtrados

4Ô∏è‚É£ Preparando muestra de 8 a√±os para ejemplo simplificado...
   A√±os seleccionados: [np.int64(1989), np.int64(1990), np.int64(1991), np.int64(1992), np.int64(1993), np.int64(1994), np.int64(1995), np.int64(1996)]
   Rango: 1989-1996

‚úÖ Variables restauradas correctamente

üìä Variables disponibles:
   - country_ref, country_compare_1, country_compare_2
   - overlap_arg_bra (33 a√±os), overlap_arg_mwi (33 a√±os)
   - arg_bra_series, bra_arg_series (33 valores)
   - arg_mwi_series, mwi_arg_series (33 valores)
   - arg_sample, b

In [None]:
def calculate_dtw_date_range(country1='ARG', country2='BRA', year_start=1989, year_end=2022):
    """
    Calcula la distancia DTW entre dos pa√≠ses usando solo un rango espec√≠fico de a√±os.
    
    Args:
        country1: C√≥digo del primer pa√≠s (default: 'ARG')
        country2: C√≥digo del segundo pa√≠s (default: 'BRA')
        year_start: A√±o inicial del rango (default: 1989)
        year_end: A√±o final del rango (default: 2022)
    
    Returns:
        dict con resultados del c√°lculo DTW
    """
    print("="*90)
    print(f"üìä C√ÅLCULO DTW PARA RANGO DE A√ëOS ESPEC√çFICO")
    print("="*90)
    
    # Obtener series completas estandarizadas
    years1, values1 = get_country_series(country1, standardize=True)
    years2, values2 = get_country_series(country2, standardize=True)
    
    print(f"\nüåç Pa√≠ses:")
    print(f"   {country1}: {country_names.get(country1, 'Unknown')}")
    print(f"   {country2}: {country_names.get(country2, 'Unknown')}")
    
    print(f"\nüìÖ Rango solicitado: {year_start} - {year_end}")
    
    # Filtrar a los a√±os del rango especificado
    mask1 = (years1 >= year_start) & (years1 <= year_end)
    mask2 = (years2 >= year_start) & (years2 <= year_end)
    
    years1_filtered = years1[mask1]
    years2_filtered = years2[mask2]
    values1_filtered = values1[mask1]
    values2_filtered = values2[mask2]
    
    print(f"\nüìä Datos disponibles en el rango:")
    print(f"   {country1}: {len(years1_filtered)} a√±os")
    if len(years1_filtered) > 0:
        print(f"      Desde {years1_filtered[0]} hasta {years1_filtered[-1]}")
    print(f"   {country2}: {len(years2_filtered)} a√±os")
    if len(years2_filtered) > 0:
        print(f"      Desde {years2_filtered[0]} hasta {years2_filtered[-1]}")
    
    # Encontrar a√±os superpuestos dentro del rango
    overlap_years = np.intersect1d(years1_filtered, years2_filtered)
    
    if len(overlap_years) == 0:
        print(f"\n‚ùå ERROR: No hay a√±os superpuestos entre {country1} y {country2} en el rango {year_start}-{year_end}")
        return None
    
    print(f"\n‚úÖ A√±os superpuestos: {len(overlap_years)}")
    print(f"   Desde {overlap_years[0]} hasta {overlap_years[-1]}")
    print(f"   A√±os: {list(overlap_years)}")
    
    # Filtrar series a a√±os superpuestos
    mask1_overlap = np.isin(years1_filtered, overlap_years)
    mask2_overlap = np.isin(years2_filtered, overlap_years)
    
    series1_final = values1_filtered[mask1_overlap]
    series2_final = values2_filtered[mask2_overlap]
    
    # Calcular DTW
    dtw_distance = dtw.distance(series1_final, series2_final)
    dtw_path = dtw.warping_path(series1_final, series2_final)
    
    # Estad√≠sticas
    dtw_normalized = dtw_distance / len(dtw_path)
    
    # Calcular distancias locales a lo largo del path
    local_distances = []
    for i, j in dtw_path:
        local_dist = abs(series1_final[i] - series2_final[j])
        local_distances.append(local_dist)
    
    print(f"\n" + "="*90)
    print(f"üìà RESULTADOS DTW")
    print("="*90)
    
    print(f"\n‚úÖ Distancia DTW:")
    print(f"   DTW absoluto: {dtw_distance:.4f}")
    print(f"   DTW normalizado (por paso): {dtw_normalized:.4f}")
    
    print(f"\nüìè Detalles del Path:")
    print(f"   Longitud del path: {len(dtw_path)}")
    print(f"   N√∫mero de puntos comparados: {len(series1_final)}")
    print(f"   Warping factor: {len(dtw_path) / len(series1_final):.2f}")
    
    print(f"\nüìä Estad√≠sticas de distancias locales:")
    print(f"   M√≠nima: {np.min(local_distances):.4f}")
    print(f"   M√°xima: {np.max(local_distances):.4f}")
    print(f"   Promedio: {np.mean(local_distances):.4f}")
    print(f"   Mediana: {np.median(local_distances):.4f}")
    print(f"   Desviaci√≥n est√°ndar: {np.std(local_distances):.4f}")
    
    # An√°lisis de movimientos del path
    diagonal_moves = 0
    vertical_moves = 0
    horizontal_moves = 0
    
    for k in range(1, len(dtw_path)):
        prev_i, prev_j = dtw_path[k-1]
        curr_i, curr_j = dtw_path[k]
        
        if curr_i == prev_i + 1 and curr_j == prev_j + 1:
            diagonal_moves += 1
        elif curr_i == prev_i + 1:
            vertical_moves += 1
        else:
            horizontal_moves += 1
    
    total_moves = diagonal_moves + vertical_moves + horizontal_moves
    
    print(f"\nüîÄ Movimientos del path:")
    print(f"   Diagonal (sincronizados): {diagonal_moves} ({diagonal_moves/total_moves*100:.1f}%)")
    print(f"   Vertical ({country1} avanza): {vertical_moves} ({vertical_moves/total_moves*100:.1f}%)")
    print(f"   Horizontal ({country2} avanza): {horizontal_moves} ({horizontal_moves/total_moves*100:.1f}%)")
    
    print(f"\n" + "="*90)
    
    # Retornar resultados
    return {
        'country1': country1,
        'country2': country2,
        'year_start': year_start,
        'year_end': year_end,
        'overlap_years': overlap_years,
        'n_years': len(overlap_years),
        'dtw_distance': dtw_distance,
        'dtw_normalized': dtw_normalized,
        'path_length': len(dtw_path),
        'path': dtw_path,
        'series1': series1_final,
        'series2': series2_final,
        'local_distances': local_distances,
        'diagonal_moves': diagonal_moves,
        'vertical_moves': vertical_moves,
        'horizontal_moves': horizontal_moves
    }

print("‚úÖ Funci√≥n calculate_dtw_date_range() definida")

‚úÖ Funci√≥n calculate_dtw_date_range() definida


In [None]:
# EJEMPLO SIMPLIFICADO: DTW con series cortas para ver el algoritmo en acci√≥n

print("="*90)
print("üî¨ EJEMPLO SIMPLIFICADO: DTW PASO A PASO")
print("="*90)

# Vamos a usar subseries cortas de Argentina y Brasil para ver el algoritmo claramente
# Tomamos solo 8 a√±os de cada uno para que la matriz sea legible

n_points = 8
arg_sample = arg_bra_series[:n_points]
bra_sample = bra_arg_series[:n_points]
years_sample = overlap_arg_bra[:n_points]

print(f"\nüìä SERIES DE EJEMPLO ({n_points} a√±os):")
print(f"\nA√±os: {list(years_sample)}")
print(f"\nArgentina (Z-score): {[f'{v:.3f}' for v in arg_sample]}")
print(f"Brasil (Z-score):    {[f'{v:.3f}' for v in bra_sample]}")

# ============================================================================
# PASO 1: Matriz de Distancias Locales D[i,j]
# ============================================================================
print("\n" + "="*90)
print("PASO 1: MATRIZ DE DISTANCIAS LOCALES D[i,j] = |ARG[i] - BRA[j]|")
print("="*90)

n = len(arg_sample)
m = len(bra_sample)

# Calcular matriz de distancias locales
D = np.zeros((n, m))
for i in range(n):
    for j in range(m):
        D[i, j] = abs(arg_sample[i] - bra_sample[j])

print(f"\nMatriz D ({n}x{m}):")
print("         Brasil ‚Üí")
print("      ", end="")
for j in range(m):
    print(f"  {years_sample[j]}", end="")
print()

for i in range(n):
    print(f"ARG {years_sample[i]}", end="  ")
    for j in range(m):
        print(f"{D[i,j]:5.2f}", end=" ")
    print()

print("\nüí° Interpretaci√≥n:")
print("   Cada celda D[i,j] = diferencia absoluta entre ARG a√±o i y BRA a√±o j")
print("   Valores bajos (cerca de 0) = a√±os muy similares")
print("   Valores altos = a√±os muy diferentes")

# ============================================================================
# PASO 2: Matriz de Costos Acumulados C[i,j]
# ============================================================================
print("\n" + "="*90)
print("PASO 2: MATRIZ DE COSTOS ACUMULADOS C[i,j]")
print("="*90)

# Inicializar matriz de costos
C = np.zeros((n, m))

# Condici√≥n de borde: primera celda
C[0, 0] = D[0, 0]

# Condici√≥n de borde: primera fila (solo podemos venir desde la izquierda)
for j in range(1, m):
    C[0, j] = C[0, j-1] + D[0, j]

# Condici√≥n de borde: primera columna (solo podemos venir desde arriba)
for i in range(1, n):
    C[i, 0] = C[i-1, 0] + D[i, 0]

# Programaci√≥n din√°mica: llenar el resto de la matriz
for i in range(1, n):
    for j in range(1, m):
        # Calcular el costo de venir desde cada direcci√≥n
        cost_diagonal = C[i-1, j-1]  # Ambos avanzan
        cost_vertical = C[i-1, j]    # ARG avanza, BRA repite
        cost_horizontal = C[i, j-1]  # BRA avanza, ARG repite
        
        # Tomar el m√≠nimo y sumar el costo local
        C[i, j] = D[i, j] + min(cost_diagonal, cost_vertical, cost_horizontal)

print(f"\nMatriz C ({n}x{m}):")
print("         Brasil ‚Üí")
print("      ", end="")
for j in range(m):
    print(f"  {years_sample[j]}", end="")
print()

for i in range(n):
    print(f"ARG {years_sample[i]}", end="  ")
    for j in range(m):
        print(f"{C[i,j]:5.2f}", end=" ")
    print()

print(f"\nüí° Interpretaci√≥n:")
print(f"   C[i,j] = costo m√≠nimo acumulado para llegar a ese punto")
print(f"   C[{n-1},{m-1}] = {C[n-1, m-1]:.4f} = DISTANCIA DTW TOTAL ‚úì")

# ============================================================================
# PASO 3: Backtracking para encontrar el Path
# ============================================================================
print("\n" + "="*90)
print("PASO 3: BACKTRACKING - Encontrar el Path √ìptimo")
print("="*90)

# Recuperar el path mediante backtracking
path_simple = []
i, j = n-1, m-1

# Agregar el √∫ltimo punto
path_simple.append((i, j))

# Backtracking: ir desde la esquina inferior derecha hasta (0,0)
while i > 0 or j > 0:
    if i == 0:
        # Solo podemos ir a la izquierda
        j -= 1
    elif j == 0:
        # Solo podemos ir hacia arriba
        i -= 1
    else:
        # Elegir el camino que vino del m√≠nimo
        costs = [
            C[i-1, j-1],  # Diagonal
            C[i-1, j],    # Vertical
            C[i, j-1]     # Horizontal
        ]
        min_idx = np.argmin(costs)
        
        if min_idx == 0:  # Diagonal
            i -= 1
            j -= 1
        elif min_idx == 1:  # Vertical
            i -= 1
        else:  # Horizontal
            j -= 1
    
    path_simple.append((i, j))

# Invertir el path (lo construimos de atr√°s para adelante)
path_simple = path_simple[::-1]

print(f"\nüõ§Ô∏è  Path √≥ptimo ({len(path_simple)} puntos):")
print(f"\n     Punto    |  ARG √≠ndice  |  BRA √≠ndice  |  ARG a√±o  |  BRA a√±o  | Dist. Local")
print(f"   " + "-"*80)

for step, (i, j) in enumerate(path_simple):
    print(f"     {step:3d}     |      {i}       |      {j}       |   {years_sample[i]}   |   {years_sample[j]}   |    {D[i,j]:.4f}")

# Analizar tipos de movimientos
diagonal = 0
vertical = 0
horizontal = 0

for k in range(1, len(path_simple)):
    prev_i, prev_j = path_simple[k-1]
    curr_i, curr_j = path_simple[k]
    
    di = curr_i - prev_i
    dj = curr_j - prev_j
    
    if di == 1 and dj == 1:
        diagonal += 1
    elif di == 1 and dj == 0:
        vertical += 1
    elif di == 0 and dj == 1:
        horizontal += 1

total = diagonal + vertical + horizontal

print(f"\nüìä Resumen de movimientos:")
print(f"   Diagonales (‚Üó):   {diagonal:2d} ({diagonal/total*100:5.1f}%) - Ambos avanzan")
print(f"   Verticales (‚Üë):   {vertical:2d} ({vertical/total*100:5.1f}%) - Solo ARG avanza")
print(f"   Horizontales (‚Üí): {horizontal:2d} ({horizontal/total*100:5.1f}%) - Solo BRA avanza")

print(f"\n‚úÖ Distancia DTW = {C[n-1, m-1]:.4f}")
print(f"   (Suma de todas las distancias locales a lo largo del path √≥ptimo)")

üî¨ EJEMPLO SIMPLIFICADO: DTW PASO A PASO

üìä SERIES DE EJEMPLO (8 a√±os):

A√±os: [np.int64(1989), np.int64(1990), np.int64(1991), np.int64(1992), np.int64(1993), np.int64(1994), np.int64(1995), np.int64(1996)]

Argentina (Z-score): ['-1.679', '-1.254', '-0.936', '-0.670', '-0.610', '-0.479', '-0.481', '-0.394']
Brasil (Z-score):    ['-1.090', '-1.088', '-1.161', '-1.182', '-1.119', '-0.981', '-0.651', '-0.537']

PASO 1: MATRIZ DE DISTANCIAS LOCALES D[i,j] = |ARG[i] - BRA[j]|

Matriz D (8x8):
         Brasil ‚Üí
        1989  1990  1991  1992  1993  1994  1995  1996
ARG 1989   0.59  0.59  0.52  0.50  0.56  0.70  1.03  1.14 
ARG 1990   0.16  0.17  0.09  0.07  0.14  0.27  0.60  0.72 
ARG 1991   0.15  0.15  0.23  0.25  0.18  0.05  0.28  0.40 
ARG 1992   0.42  0.42  0.49  0.51  0.45  0.31  0.02  0.13 
ARG 1993   0.48  0.48  0.55  0.57  0.51  0.37  0.04  0.07 
ARG 1994   0.61  0.61  0.68  0.70  0.64  0.50  0.17  0.06 
ARG 1995   0.61  0.61  0.68  0.70  0.64  0.50  0.17  0.06 
ARG 1996  

In [None]:
# ============================================================================
# COMPARACI√ìN: C[7,7] vs calculate_dtw_date_range(1989-1996)
# ============================================================================

print("="*90)
print("üîç COMPARACI√ìN DE VALORES DTW")
print("="*90)

# Valor de la matriz C (ejemplo simplificado manual)
dtw_from_matrix = C[7, 7]

# Llamar a la funci√≥n con los mismos a√±os
result_1989_1996 = calculate_dtw_date_range('ARG', 'BRA', 1989, 1996)
dtw_from_function = result_1989_1996['dtw_distance']

print(f"\nüìä MISMO RANGO DE A√ëOS: 1989-1996")
print(f"\n1Ô∏è‚É£ Valor desde matriz C[7,7]:")
print(f"   DTW = {dtw_from_matrix:.6f}")
print(f"\n2Ô∏è‚É£ Valor desde calculate_dtw_date_range('ARG', 'BRA', 1989, 1996):")
print(f"   DTW = {dtw_from_function:.6f}")

# Calcular diferencia
diferencia = abs(dtw_from_matrix - dtw_from_function)
porcentaje = (diferencia / dtw_from_matrix) * 100

print(f"\nüìè DIFERENCIA:")
print(f"   Absoluta: {diferencia:.6f}")
print(f"   Porcentaje: {porcentaje:.2f}%")

if diferencia < 0.0001:
    print(f"\n‚úÖ Los valores son PR√ÅCTICAMENTE ID√âNTICOS")
else:
    print(f"\n‚ö†Ô∏è  Los valores son DIFERENTES")
    print(f"\nüîé Investigando causas...")
    
    # Comparar a√±os
    years_from_example = list(years_sample.astype(int))
    years_from_function = list(result_1989_1996['overlap_years'].astype(int))
    
    print(f"\n   A√±os del ejemplo:  {years_from_example}")
    print(f"   A√±os de la funci√≥n: {years_from_function}")
    
    if years_from_example == years_from_function:
        print(f"   ‚úì Los a√±os coinciden perfectamente")
    else:
        print(f"   ‚úó Los a√±os son DIFERENTES")
    
    # Comparar valores de ARG
    print(f"\n   Valores ARG del ejemplo:  {[f'{v:.6f}' for v in arg_sample]}")
    print(f"   Valores ARG de la funci√≥n: {[f'{v:.6f}' for v in result_1989_1996['series1']]}")
    
    # Comparar valores de BRA
    print(f"\n   Valores BRA del ejemplo:  {[f'{v:.6f}' for v in bra_sample]}")
    print(f"   Valores BRA de la funci√≥n: {[f'{v:.6f}' for v in result_1989_1996['series2']]}")

print("\n" + "="*90)

üîç COMPARACI√ìN DE VALORES DTW
üìä C√ÅLCULO DTW PARA RANGO DE A√ëOS ESPEC√çFICO

üåç Pa√≠ses:
   ARG: Argentina
   BRA: Brazil

üìÖ Rango solicitado: 1989 - 1996

üìä Datos disponibles en el rango:
   ARG: 8 a√±os
      Desde 1989 hasta 1996
   BRA: 8 a√±os
      Desde 1989 hasta 1996

‚úÖ A√±os superpuestos: 8
   Desde 1989 hasta 1996
   A√±os: [np.int64(1989), np.int64(1990), np.int64(1991), np.int64(1992), np.int64(1993), np.int64(1994), np.int64(1995), np.int64(1996)]

üìà RESULTADOS DTW

‚úÖ Distancia DTW:
   DTW absoluto: 0.6617
   DTW normalizado (por paso): 0.0602

üìè Detalles del Path:
   Longitud del path: 11
   N√∫mero de puntos comparados: 8
   Warping factor: 1.38

üìä Estad√≠sticas de distancias locales:
   M√≠nima: 0.0190
   M√°xima: 0.5892
   Promedio: 0.1289
   Mediana: 0.0721
   Desviaci√≥n est√°ndar: 0.1523

üîÄ Movimientos del path:
   Diagonal (sincronizados): 4 (40.0%)
   Vertical (ARG avanza): 3 (30.0%)
   Horizontal (BRA avanza): 3 (30.0%)


üìä MISM

In [None]:
# Visualizaci√≥n de las matrices D y C con el path superpuesto

fig = make_subplots(
    rows=1, cols=2,
    subplot_titles=(
        'Matriz de Distancias Locales D[i,j]',
        'Matriz de Costos Acumulados C[i,j] + Path'
    ),
    horizontal_spacing=0.15
)

# ============================================================================
# Subplot 1: Matriz D (distancias locales)
# ============================================================================
fig.add_trace(
    go.Heatmap(
        z=D,
        x=[str(y) for y in years_sample],
        y=[str(y) for y in years_sample],
        colorscale='YlOrRd',
        text=D,
        texttemplate='%{text:.2f}',
        textfont={"size": 10},
        colorbar=dict(title="Distancia<br>Local", x=0.45, len=0.8),
        hovertemplate='ARG: %{y}<br>BRA: %{x}<br>Distancia: %{z:.4f}<extra></extra>'
    ),
    row=1, col=1
)

# ============================================================================
# Subplot 2: Matriz C (costos acumulados) con path
# ============================================================================
fig.add_trace(
    go.Heatmap(
        z=C,
        x=[str(y) for y in years_sample],
        y=[str(y) for y in years_sample],
        colorscale='Viridis',
        text=C,
        texttemplate='%{text:.2f}',
        textfont={"size": 10},
        colorbar=dict(title="Costo<br>Acumulado", x=1.02, len=0.8),
        hovertemplate='ARG: %{y}<br>BRA: %{x}<br>Costo acum.: %{z:.4f}<extra></extra>'
    ),
    row=1, col=2
)

# Superponer el path en la matriz C
path_i = [p[0] for p in path_simple]
path_j = [p[1] for p in path_simple]

fig.add_trace(
    go.Scatter(
        x=[str(years_sample[j]) for j in path_j],
        y=[str(years_sample[i]) for i in path_i],
        mode='lines+markers',
        line=dict(color='red', width=3),
        marker=dict(size=10, color='red', symbol='circle', 
                   line=dict(color='white', width=2)),
        name='Path √ìptimo',
        hovertemplate='Step %{pointNumber}<br>ARG: %{y}<br>BRA: %{x}<extra></extra>'
    ),
    row=1, col=2
)

# Layout
fig.update_xaxes(title_text="A√±o Brasil", row=1, col=1)
fig.update_xaxes(title_text="A√±o Brasil", row=1, col=2)
fig.update_yaxes(title_text="A√±o Argentina", row=1, col=1)
fig.update_yaxes(title_text="A√±o Argentina", row=1, col=2)

fig.update_layout(
    height=600,
    width=1400,
    title_text="<b>Algoritmo DTW Visualizado: Matrices y Path √ìptimo</b><br>" +
               f"<sub>Distancia DTW Final = {C[n-1, m-1]:.4f} (celda inferior derecha de la matriz C)</sub>",
    showlegend=True
)

fig.show()

print("\n" + "="*90)
print("üìä C√ìMO INTERPRETAR LAS VISUALIZACIONES:")
print("="*90)

print("\nüî¥ MATRIZ IZQUIERDA (D - Distancias Locales):")
print("   ‚Ä¢ Muestra la diferencia |ARG[i] - BRA[j]| para cada par de puntos")
print("   ‚Ä¢ Colores c√°lidos (rojo/naranja) = diferencias grandes")
print("   ‚Ä¢ Colores fr√≠os (amarillo claro) = diferencias peque√±as")
print("   ‚Ä¢ Diagonal principal = comparaci√≥n a√±o por a√±o (si estuvieran sincronizados)")

print("\nüü£ MATRIZ DERECHA (C - Costos Acumulados):")
print("   ‚Ä¢ Muestra el costo m√≠nimo acumulado para llegar a cada celda")
print("   ‚Ä¢ Se construye desde C[0,0] hasta C[n-1,m-1]")
print("   ‚Ä¢ La celda inferior derecha = DISTANCIA DTW FINAL")
print("   ‚Ä¢ L√≠nea roja = path √≥ptimo encontrado por backtracking")

print("\nüî¥ L√çNEA ROJA (Path √ìptimo):")
print("   ‚Ä¢ Muestra el camino que minimiza la distancia total")
print("   ‚Ä¢ Siempre va desde esquina superior izquierda (inicio) hasta inferior derecha (fin)")
print("   ‚Ä¢ Los movimientos indican c√≥mo se alinean las series:")
print("     - Diagonal (‚Üó): Ambos pa√≠ses avanzan 1 a√±o")
print("     - Vertical (‚Üë): Argentina avanza, Brasil se repite")
print("     - Horizontal (‚Üí): Brasil avanza, Argentina se repite")

print("\nüí° OBSERVACIONES CLAVE:")
if vertical > horizontal * 1.5:
    print("   ‚Ä¢ Path tiene muchos movimientos verticales ‚Üí Argentina avanza m√°s r√°pido")
elif horizontal > vertical * 1.5:
    print("   ‚Ä¢ Path tiene muchos movimientos horizontales ‚Üí Brasil avanza m√°s r√°pido")
else:
    print("   ‚Ä¢ Path balanceado ‚Üí Ambos pa√≠ses avanzan a velocidad similar")

print(f"\n   ‚Ä¢ En este ejemplo ({n_points} a√±os):")
print(f"     - Distancia DTW = {C[n-1, m-1]:.4f}")
print(f"     - Path tiene {len(path_simple)} pasos")
print(f"     - Movimientos: {diagonal} diagonales, {vertical} verticales, {horizontal} horizontales")


üìä C√ìMO INTERPRETAR LAS VISUALIZACIONES:

üî¥ MATRIZ IZQUIERDA (D - Distancias Locales):
   ‚Ä¢ Muestra la diferencia |ARG[i] - BRA[j]| para cada par de puntos
   ‚Ä¢ Colores c√°lidos (rojo/naranja) = diferencias grandes
   ‚Ä¢ Colores fr√≠os (amarillo claro) = diferencias peque√±as
   ‚Ä¢ Diagonal principal = comparaci√≥n a√±o por a√±o (si estuvieran sincronizados)

üü£ MATRIZ DERECHA (C - Costos Acumulados):
   ‚Ä¢ Muestra el costo m√≠nimo acumulado para llegar a cada celda
   ‚Ä¢ Se construye desde C[0,0] hasta C[n-1,m-1]
   ‚Ä¢ La celda inferior derecha = DISTANCIA DTW FINAL
   ‚Ä¢ L√≠nea roja = path √≥ptimo encontrado por backtracking

üî¥ L√çNEA ROJA (Path √ìptimo):
   ‚Ä¢ Muestra el camino que minimiza la distancia total
   ‚Ä¢ Siempre va desde esquina superior izquierda (inicio) hasta inferior derecha (fin)
   ‚Ä¢ Los movimientos indican c√≥mo se alinean las series:
     - Diagonal (‚Üó): Ambos pa√≠ses avanzan 1 a√±o
     - Vertical (‚Üë): Argentina avanza, Brasil se rep

## Dashboard Interactivo: DTW con Filtro de Rango de Fechas

Este dashboard te permite explorar el comportamiento del DTW en diferentes rangos temporales.

In [None]:
from ipywidgets import interact, IntRangeSlider, Dropdown
import ipywidgets as widgets

# Obtener lista de pa√≠ses disponibles
available_countries = sorted(gni_data['country_code'].unique())
country_names_list = {code: country_names.get(code, code) for code in available_countries}

# A√±os disponibles (rango completo)
all_years = sorted(gni_data['year'].unique())
year_min = int(all_years[0])
year_max = int(all_years[-1])

def calculate_dtw_from_scratch(country1, country2, year_start, year_end):
    """
    Calcula DTW desde cero sin usar funciones precalculadas.
    Garantiza que no hay ruido de c√°lculos anteriores.
    """
    # 1. Obtener datos completos y estandarizarlos
    data1 = gni_data[gni_data['country_code'] == country1].sort_values('year')
    data2 = gni_data[gni_data['country_code'] == country2].sort_values('year')
    
    if len(data1) == 0 or len(data2) == 0:
        return None
    
    years1 = data1['year'].values
    values1 = data1['gni'].values
    years2 = data2['year'].values
    values2 = data2['gni'].values
    
    # 2. Estandarizar (Z-score sobre TODOS los datos)
    mean1 = np.mean(values1)
    std1 = np.std(values1)
    mean2 = np.mean(values2)
    std2 = np.std(values2)
    
    if std1 > 0:
        values1_std = (values1 - mean1) / std1
    else:
        values1_std = values1 - mean1
    
    if std2 > 0:
        values2_std = (values2 - mean2) / std2
    else:
        values2_std = values2 - mean2
    
    # 3. Filtrar al rango de a√±os solicitado
    mask1 = (years1 >= year_start) & (years1 <= year_end)
    mask2 = (years2 >= year_start) & (years2 <= year_end)
    
    years1_filtered = years1[mask1]
    values1_filtered = values1_std[mask1]
    years2_filtered = years2[mask2]
    values2_filtered = values2_std[mask2]
    
    # 4. Encontrar a√±os superpuestos
    overlap_years = np.intersect1d(years1_filtered, years2_filtered)
    
    if len(overlap_years) < 2:
        return None
    
    # 5. Filtrar a a√±os superpuestos
    mask1_overlap = np.isin(years1_filtered, overlap_years)
    mask2_overlap = np.isin(years2_filtered, overlap_years)
    
    series1 = values1_filtered[mask1_overlap]
    series2 = values2_filtered[mask2_overlap]
    
    # 6. Calcular DTW desde cero usando dtaidistance
    dtw_distance = dtw.distance(series1, series2)
    dtw_path = dtw.warping_path(series1, series2)
    
    return {
        'series1': series1,
        'series2': series2,
        'overlap_years': overlap_years,
        'dtw_distance': dtw_distance,
        'dtw_path': dtw_path,
        'n_years': len(overlap_years)
    }

def update_dtw_dashboard(country1, country2, year_range):
    """
    Actualiza el dashboard con DTW calculado desde cero
    """
    year_start, year_end = year_range
    
    if country1 == country2:
        print("‚ö†Ô∏è Seleccion√° dos pa√≠ses diferentes")
        return
    
    # Calcular DTW desde cero
    result = calculate_dtw_from_scratch(country1, country2, year_start, year_end)
    
    if result is None:
        print(f"‚ùå No hay suficientes datos superpuestos entre {country1} y {country2} en {year_start}-{year_end}")
        return
    
    # Extraer resultados
    series1 = result['series1']
    series2 = result['series2']
    dtw_path = result['dtw_path']
    overlap_years = result['overlap_years']
    dtw_distance = result['dtw_distance']
    
    # Calcular matrices D y C desde cero
    n_range = len(series1)
    m_range = len(series2)
    
    D_range = np.zeros((n_range, m_range))
    C_range = np.zeros((n_range, m_range))
    
    # Matriz D (distancias locales)
    for i in range(n_range):
        for j in range(m_range):
            D_range[i, j] = abs(series1[i] - series2[j])
    
    # Matriz C (costos acumulados)
    C_range[0, 0] = D_range[0, 0]
    
    for j in range(1, m_range):
        C_range[0, j] = C_range[0, j-1] + D_range[0, j]
    
    for i in range(1, n_range):
        C_range[i, 0] = C_range[i-1, 0] + D_range[i, 0]
    
    for i in range(1, n_range):
        for j in range(1, m_range):
            C_range[i, j] = D_range[i, j] + min(
                C_range[i-1, j-1],
                C_range[i-1, j],
                C_range[i, j-1]
            )
    
    # ============================================================================
    # CREAR VISUALIZACI√ìN (PRIMERO EL GR√ÅFICO, LUEGO LAS ESTAD√çSTICAS)
    # ============================================================================
    
    fig = make_subplots(
        rows=2, cols=2,
        subplot_titles=(
            'Matriz D - Distancias Locales',
            'Matriz C - Costos Acumulados + Path DTW',
            'Series Temporales Alineadas',
            'Distancias Locales a lo largo del Path'
        ),
        specs=[
            [{"type": "heatmap"}, {"type": "heatmap"}],
            [{"type": "scatter"}, {"type": "scatter"}]
        ],
        vertical_spacing=0.12,
        horizontal_spacing=0.10
    )
    
    # Subplot 1: Matriz D
    fig.add_trace(
        go.Heatmap(
            z=D_range,
            x=[str(int(y)) for y in overlap_years],
            y=[str(int(y)) for y in overlap_years],
            colorscale='YlOrRd',
            showscale=True,
            colorbar=dict(x=0.46, len=0.4, y=0.78)
        ),
        row=1, col=1
    )
    
    # Subplot 2: Matriz C con Path
    fig.add_trace(
        go.Heatmap(
            z=C_range,
            x=[str(int(y)) for y in overlap_years],
            y=[str(int(y)) for y in overlap_years],
            colorscale='Purples',
            showscale=True,
            colorbar=dict(x=1.02, len=0.4, y=0.78)
        ),
        row=1, col=2
    )
    
    # Path √≥ptimo
    path_j = [j for i, j in dtw_path]
    path_i = [i for i, j in dtw_path]
    
    fig.add_trace(
        go.Scatter(
            x=[str(int(overlap_years[j])) for j in path_j],
            y=[str(int(overlap_years[i])) for i in path_i],
            mode='lines+markers',
            line=dict(color='lime', width=3),
            marker=dict(size=5, color='lime'),
            name='Path DTW'
        ),
        row=1, col=2
    )
    
    # Subplot 3: Series temporales
    name1 = country_names.get(country1, country1)
    name2 = country_names.get(country2, country2)
    
    fig.add_trace(
        go.Scatter(
            x=overlap_years,
            y=series1,
            mode='lines+markers',
            name=f'{name1} ({country1})',
            line=dict(color='#1f77b4', width=3),
            marker=dict(size=7)
        ),
        row=2, col=1
    )
    
    fig.add_trace(
        go.Scatter(
            x=overlap_years,
            y=series2,
            mode='lines+markers',
            name=f'{name2} ({country2})',
            line=dict(color='#ff7f0e', width=3),
            marker=dict(size=7)
        ),
        row=2, col=1
    )
    
    # Conexiones del path (solo algunas para claridad)
    step = max(1, len(dtw_path) // 20)
    for i, j in dtw_path[::step]:
        fig.add_trace(
            go.Scatter(
                x=[overlap_years[i], overlap_years[j]],
                y=[series1[i], series2[j]],
                mode='lines',
                line=dict(color='rgba(150,150,150,0.2)', width=1),
                showlegend=False,
                hoverinfo='skip'
            ),
            row=2, col=1
        )
    
    # Subplot 4: Distancias locales del path
    local_distances = [abs(series1[i] - series2[j]) for i, j in dtw_path]
    path_years = [overlap_years[i] for i, j in dtw_path]
    avg_distance = np.mean(local_distances)
    
    fig.add_trace(
        go.Scatter(
            x=path_years,
            y=local_distances,
            mode='lines+markers',
            name='Distancia Local',
            line=dict(color='red', width=2),
            marker=dict(size=6),
            fill='tozeroy',
            fillcolor='rgba(255,0,0,0.1)'
        ),
        row=2, col=2
    )
    
    fig.add_trace(
        go.Scatter(
            x=[path_years[0], path_years[-1]],
            y=[avg_distance, avg_distance],
            mode='lines',
            name=f'Promedio: {avg_distance:.3f}',
            line=dict(color='green', width=2, dash='dash')
        ),
        row=2, col=2
    )
    
    # Layout
    fig.update_xaxes(title_text=f"A√±o {name2}", row=1, col=1)
    fig.update_xaxes(title_text=f"A√±o {name2}", row=1, col=2)
    fig.update_xaxes(title_text="A√±o", row=2, col=1)
    fig.update_xaxes(title_text="Posici√≥n en Path", row=2, col=2)
    
    fig.update_yaxes(title_text=f"A√±o {name1}", row=1, col=1)
    fig.update_yaxes(title_text=f"A√±o {name1}", row=1, col=2)
    fig.update_yaxes(title_text="GNI Estandarizado", row=2, col=1)
    fig.update_yaxes(title_text="Distancia Local", row=2, col=2)
    
    fig.update_layout(
        height=900,
        width=1500,
        title_text=f"<b>An√°lisis DTW: {name1} vs {name2} ({year_start}-{year_end})</b><br>" +
                   f"<sub>DTW = {dtw_distance:.4f} | Path = {len(dtw_path)} pasos | A√±os = {len(overlap_years)}</sub>",
        showlegend=True,
        font=dict(size=11)
    )
    
    fig.show()
    
    # ============================================================================
    # ESTAD√çSTICAS DETALLADAS (DEBAJO DEL GR√ÅFICO)
    # ============================================================================
    
    print("\n" + "="*90)
    print(f"üìä ESTAD√çSTICAS DETALLADAS")
    print("="*90)
    
    print(f"\nüåç Pa√≠ses comparados:")
    print(f"   {name1} ({country1}) vs {name2} ({country2})")
    print(f"   Per√≠odo: {year_start}-{year_end}")
    print(f"   A√±os con datos superpuestos: {len(overlap_years)}")
    
    print(f"\nüéØ Resultado DTW:")
    print(f"   Distancia DTW: {dtw_distance:.4f}")
    print(f"   Longitud del path: {len(dtw_path)} pasos")
    print(f"   Warping factor: {len(dtw_path) / len(overlap_years):.2f}")
    
    print(f"\nüìä An√°lisis de distancias locales:")
    print(f"   M√≠nima: {np.min(local_distances):.4f}")
    print(f"   M√°xima: {np.max(local_distances):.4f}")
    print(f"   Promedio: {avg_distance:.4f}")
    print(f"   Mediana: {np.median(local_distances):.4f}")
    print(f"   Desv. est√°ndar: {np.std(local_distances):.4f}")
    
    # Analizar movimientos del path
    diagonal = sum(1 for k in range(1, len(dtw_path)) 
                   if dtw_path[k][0] == dtw_path[k-1][0] + 1 
                   and dtw_path[k][1] == dtw_path[k-1][1] + 1)
    vertical = sum(1 for k in range(1, len(dtw_path)) 
                   if dtw_path[k][0] == dtw_path[k-1][0] + 1 
                   and dtw_path[k][1] == dtw_path[k-1][1])
    horizontal = sum(1 for k in range(1, len(dtw_path)) 
                     if dtw_path[k][0] == dtw_path[k-1][0] 
                     and dtw_path[k][1] == dtw_path[k-1][1] + 1)
    total = diagonal + vertical + horizontal
    
    if total > 0:
        print(f"\nüîÄ Movimientos del path:")
        print(f"   Diagonal (sincronizados): {diagonal} ({diagonal/total*100:.1f}%) - Ambos avanzan")
        print(f"   Vertical ({country1} avanza): {vertical} ({vertical/total*100:.1f}%) - Solo {name1}")
        print(f"   Horizontal ({country2} avanza): {horizontal} ({horizontal/total*100:.1f}%) - Solo {name2}")
    
    print("\n" + "="*90)
    print("‚úÖ C√°lculo realizado desde cero - Sin datos precalculados")
    print("="*90)

# ============================================================================
# CREAR WIDGETS INTERACTIVOS
# ============================================================================

print("üéõÔ∏è Dashboard Interactivo DTW")
print("="*90)
print("Seleccion√° dos pa√≠ses y un rango de a√±os para analizar")
print("El DTW se calcula desde cero para cada selecci√≥n\n")

interact(
    update_dtw_dashboard,
    country1=Dropdown(
        options=[(f"{country_names.get(code, code)} ({code})", code) for code in available_countries],
        value='ARG',
        description='Pa√≠s 1:',
        style={'description_width': '80px'}
    ),
    country2=Dropdown(
        options=[(f"{country_names.get(code, code)} ({code})", code) for code in available_countries],
        value='BRA',
        description='Pa√≠s 2:',
        style={'description_width': '80px'}
    ),
    year_range=IntRangeSlider(
        value=[year_min, year_max],
        min=year_min,
        max=year_max,
        step=1,
        description='A√±os:',
        continuous_update=False,
        orientation='horizontal',
        readout=True,
        readout_format='d',
        layout=widgets.Layout(width='700px'),
        style={'description_width': '80px'}
    )
)

üéõÔ∏è Dashboard Interactivo DTW
Seleccion√° dos pa√≠ses y un rango de a√±os para analizar
El DTW se calcula desde cero para cada selecci√≥n


Seleccion√° dos pa√≠ses y un rango de a√±os para analizar
El DTW se calcula desde cero para cada selecci√≥n



interactive(children=(Dropdown(description='Pa√≠s 1:', index=3, options=(('Aruba (ABW)', 'ABW'), ('Angola (AGO)‚Ä¶

<function __main__.update_dtw_dashboard(country1, country2, year_range)>

## Comparaci√≥n: Ejemplo Simplificado vs Series Completas

Ahora que entendemos el algoritmo con 8 puntos, veamos c√≥mo se aplica a las series completas de Argentina y Brasil.

## Dashboard Interactivo: DTW a trav√©s del tiempo

Herramienta para explorar c√≥mo evoluciona la distancia DTW entre dos pa√≠ses a medida que se agregan m√°s a√±os a la comparaci√≥n.

In [None]:
def create_dtw_evolution_dashboard():
    """
    Dashboard interactivo para explorar c√≥mo evoluciona DTW entre dos pa√≠ses
    a medida que se agregan m√°s a√±os a la comparaci√≥n.
    """
    # Obtener lista de pa√≠ses
    countries = sorted(country_names.keys())
    country_options = [(f"{code} - {country_names[code]}", code) for code in countries]
    
    # Widgets
    country1_dropdown = widgets.Dropdown(
        options=country_options,
        value='ARG',
        description='Pa√≠s 1:',
        style={'description_width': '100px'},
        layout=widgets.Layout(width='400px')
    )
    
    country2_dropdown = widgets.Dropdown(
        options=country_options,
        value='BRA',
        description='Pa√≠s 2:',
        style={'description_width': '100px'},
        layout=widgets.Layout(width='400px')
    )
    
    update_button = widgets.Button(
        description='üîÑ Calcular Evoluci√≥n DTW',
        button_style='primary',
        layout=widgets.Layout(width='300px')
    )
    
    output = widgets.Output()
    
    def calculate_dtw_evolution(country1, country2):
        """
        Calcula DTW para ventanas crecientes: primeros N a√±os donde N = 1, 2, 3, ..., total
        """
        # Obtener series estandarizadas
        years1, values1 = get_country_series(country1, standardize=True)
        years2, values2 = get_country_series(country2, standardize=True)
        
        # Encontrar a√±os superpuestos
        overlap_years = np.intersect1d(years1, years2)
        
        if len(overlap_years) < 2:
            return None, None, None, None
        
        # Filtrar series a a√±os superpuestos
        mask1 = np.isin(years1, overlap_years)
        mask2 = np.isin(years2, overlap_years)
        series1 = values1[mask1]
        series2 = values2[mask2]
        
        # Calcular DTW para ventanas crecientes
        dtw_values = []
        dtw_normalized = []
        path_lengths = []
        avg_distances = []
        
        for n in range(2, len(series1) + 1):
            # Tomar primeros N puntos
            s1_window = series1[:n]
            s2_window = series2[:n]
            
            # Calcular DTW
            dtw_dist = dtw.distance(s1_window, s2_window)
            
            # Calcular path para obtener longitud
            path = dtw.warping_path(s1_window, s2_window)
            
            # Calcular distancia promedio
            avg_dist = dtw_dist / len(path)
            
            dtw_values.append(dtw_dist)
            dtw_normalized.append(dtw_dist / len(path))
            path_lengths.append(len(path))
            avg_distances.append(avg_dist)
        
        # Preparar datos de retorno
        window_sizes = list(range(2, len(series1) + 1))
        year_ranges = [f"{overlap_years[0]}-{overlap_years[n-1]}" for n in window_sizes]
        
        return {
            'window_sizes': window_sizes,
            'year_ranges': year_ranges,
            'overlap_years': overlap_years,
            'dtw_values': dtw_values,
            'dtw_normalized': dtw_normalized,
            'path_lengths': path_lengths,
            'avg_distances': avg_distances,
            'series1': series1,
            'series2': series2
        }
    
    def update_visualization(b):
        with output:
            output.clear_output(wait=True)
            
            country1 = country1_dropdown.value
            country2 = country2_dropdown.value
            
            name1 = country_names.get(country1, country1)
            name2 = country_names.get(country2, country2)
            
            print("="*90)
            print(f"üìä EVOLUCI√ìN DE DTW: {name1} vs {name2}")
            print("="*90)
            
            # Calcular evoluci√≥n
            data = calculate_dtw_evolution(country1, country2)
            
            if data is None:
                print("\n‚ö†Ô∏è  Error: No hay suficiente superposici√≥n entre estos pa√≠ses")
                return
            
            print(f"\n‚úÖ A√±os superpuestos: {len(data['overlap_years'])}")
            print(f"   Per√≠odo: {data['overlap_years'][0]} - {data['overlap_years'][-1]}")
            print(f"\n   Calculando DTW para ventanas de 2 hasta {len(data['overlap_years'])} a√±os...")
            
            # Crear visualizaci√≥n
            fig = make_subplots(
                rows=2, cols=2,
                subplot_titles=(
                    f'DTW Acumulado vs N√∫mero de A√±os',
                    f'DTW Normalizado (por paso del path)',
                    f'Longitud del Path DTW',
                    f'Distancia Promedio por Paso'
                ),
                vertical_spacing=0.12,
                horizontal_spacing=0.12
            )
            
            # Subplot 1: DTW Acumulado
            fig.add_trace(
                go.Scatter(
                    x=data['window_sizes'],
                    y=data['dtw_values'],
                    mode='lines+markers',
                    line=dict(color='blue', width=2),
                    marker=dict(size=6, color='blue'),
                    name='DTW Acumulado',
                    hovertemplate='%{x} a√±os<br>DTW: %{y:.4f}<extra></extra>'
                ),
                row=1, col=1
            )
            
            # Subplot 2: DTW Normalizado
            fig.add_trace(
                go.Scatter(
                    x=data['window_sizes'],
                    y=data['dtw_normalized'],
                    mode='lines+markers',
                    line=dict(color='green', width=2),
                    marker=dict(size=6, color='green'),
                    name='DTW Normalizado',
                    hovertemplate='%{x} a√±os<br>DTW/paso: %{y:.4f}<extra></extra>'
                ),
                row=1, col=2
            )
            
            # L√≠nea de referencia con el promedio
            avg_normalized = np.mean(data['dtw_normalized'])
            fig.add_hline(y=avg_normalized, line_dash="dash", line_color="gray",
                         annotation_text=f"Promedio: {avg_normalized:.4f}",
                         row=1, col=2)
            
            # Subplot 3: Longitud del Path
            fig.add_trace(
                go.Scatter(
                    x=data['window_sizes'],
                    y=data['path_lengths'],
                    mode='lines+markers',
                    line=dict(color='red', width=2),
                    marker=dict(size=6, color='red'),
                    name='Path Length',
                    hovertemplate='%{x} a√±os<br>Path: %{y} pasos<extra></extra>'
                ),
                row=2, col=1
            )
            
            # L√≠nea diagonal (sin warping)
            fig.add_trace(
                go.Scatter(
                    x=data['window_sizes'],
                    y=data['window_sizes'],
                    mode='lines',
                    line=dict(color='gray', width=1, dash='dash'),
                    name='Sin warping',
                    showlegend=False,
                    hovertemplate='%{x} a√±os<br>Sin warping: %{y}<extra></extra>'
                ),
                row=2, col=1
            )
            
            # Subplot 4: Distancia Promedio
            fig.add_trace(
                go.Scatter(
                    x=data['window_sizes'],
                    y=data['avg_distances'],
                    mode='lines+markers',
                    line=dict(color='purple', width=2),
                    marker=dict(size=6, color='purple'),
                    name='Dist. Promedio',
                    hovertemplate='%{x} a√±os<br>Promedio: %{y:.4f}<extra></extra>'
                ),
                row=2, col=2
            )
            
            # Layout
            fig.update_xaxes(title_text="N√∫mero de A√±os", row=1, col=1)
            fig.update_xaxes(title_text="N√∫mero de A√±os", row=1, col=2)
            fig.update_xaxes(title_text="N√∫mero de A√±os", row=2, col=1)
            fig.update_xaxes(title_text="N√∫mero de A√±os", row=2, col=2)
            
            fig.update_yaxes(title_text="DTW Acumulado", row=1, col=1)
            fig.update_yaxes(title_text="DTW / Path Length", row=1, col=2)
            fig.update_yaxes(title_text="Pasos en Path", row=2, col=1)
            fig.update_yaxes(title_text="Distancia Promedio", row=2, col=2)
            
            fig.update_layout(
                height=900,
                width=1400,
                title_text=f"<b>Evoluci√≥n de DTW: {name1} ({country1}) vs {name2} ({country2})</b><br>" +
                          f"<sub>Per√≠odo: {data['overlap_years'][0]}-{data['overlap_years'][-1]} " +
                          f"({len(data['overlap_years'])} a√±os superpuestos)</sub>",
                showlegend=True
            )
            
            fig.show()
            
            # An√°lisis estad√≠stico
            print("\n" + "="*90)
            print("üìà AN√ÅLISIS DE LA EVOLUCI√ìN:")
            print("="*90)
            
            # Comparar primeros vs √∫ltimos a√±os
            first_quarter_idx = len(data['dtw_normalized']) // 4
            last_quarter_idx = 3 * len(data['dtw_normalized']) // 4
            
            avg_first_quarter = np.mean(data['dtw_normalized'][:first_quarter_idx])
            avg_last_quarter = np.mean(data['dtw_normalized'][last_quarter_idx:])
            
            print(f"\nüîç DTW Normalizado (comparable):")
            print(f"   Primeros 25% de a√±os: {avg_first_quarter:.4f}")
            print(f"   √öltimos 25% de a√±os:  {avg_last_quarter:.4f}")
            
            if avg_first_quarter > avg_last_quarter:
                ratio = avg_first_quarter / avg_last_quarter
                print(f"   ‚Üí Primeros a√±os son {ratio:.2f}x m√°s diferentes")
                print(f"   ‚Üí Convergencia: pa√≠ses se vuelven M√ÅS similares con el tiempo")
            else:
                ratio = avg_last_quarter / avg_first_quarter
                print(f"   ‚Üí √öltimos a√±os son {ratio:.2f}x m√°s diferentes")
                print(f"   ‚Üí Divergencia: pa√≠ses se vuelven MENOS similares con el tiempo")
            
            # Tendencia general
            from scipy import stats
            slope, intercept, r_value, p_value, std_err = stats.linregress(
                data['window_sizes'], data['dtw_normalized']
            )
            
            print(f"\nüìä Tendencia de DTW Normalizado:")
            print(f"   Pendiente: {slope:.6f}")
            print(f"   R¬≤: {r_value**2:.4f}")
            
            if abs(slope) < 0.001:
                print(f"   ‚Üí Estabilidad: DTW se mantiene relativamente constante")
            elif slope < 0:
                print(f"   ‚Üí Convergencia: DTW disminuye con el tiempo")
            else:
                print(f"   ‚Üí Divergencia: DTW aumenta con el tiempo")
            
            # Path warping
            avg_warping = np.mean([pl / ws for pl, ws in zip(data['path_lengths'], data['window_sizes'])])
            print(f"\nüîÄ Factor de Warping Promedio: {avg_warping:.3f}")
            if avg_warping > 1.2:
                print(f"   ‚Üí Alto warping: series avanzan a velocidades muy diferentes")
            elif avg_warping > 1.05:
                print(f"   ‚Üí Warping moderado: algunas diferencias de velocidad")
            else:
                print(f"   ‚Üí Bajo warping: series bien sincronizadas temporalmente")
            
            # Puntos notables
            min_idx = np.argmin(data['dtw_normalized'])
            max_idx = np.argmax(data['dtw_normalized'])
            
            print(f"\nüéØ Puntos Notables:")
            print(f"   Ventana con MENOR DTW normalizado:")
            print(f"      {data['window_sizes'][min_idx]} a√±os ({data['year_ranges'][min_idx]})")
            print(f"      DTW/paso = {data['dtw_normalized'][min_idx]:.4f}")
            
            print(f"\n   Ventana con MAYOR DTW normalizado:")
            print(f"      {data['window_sizes'][max_idx]} a√±os ({data['year_ranges'][max_idx]})")
            print(f"      DTW/paso = {data['dtw_normalized'][max_idx]:.4f}")
    
    update_button.on_click(update_visualization)
    
    # Crear interfaz
    controls = widgets.VBox([
        widgets.HTML("<h3>üìä Evoluci√≥n de DTW a trav√©s del tiempo</h3>"),
        widgets.HTML("<p>Selecciona dos pa√≠ses para ver c√≥mo evoluciona la distancia DTW " +
                    "al agregar m√°s a√±os a la comparaci√≥n</p>"),
        widgets.HBox([country1_dropdown, country2_dropdown]),
        update_button,
        widgets.HTML("<hr>")
    ])
    
    display(controls, output)
    
    # Trigger inicial
    update_visualization(None)

# Crear dashboard
create_dtw_evolution_dashboard()

VBox(children=(HTML(value='<h3>üìä Evoluci√≥n de DTW a trav√©s del tiempo</h3>'), HTML(value='<p>Selecciona dos pa‚Ä¶

Output()

In [None]:
# Ejemplo: Calcular DTW entre Argentina y Brasil para el per√≠odo 1989-1997
result = calculate_dtw_date_range('ARG', 'BRA', 1989, 2021)


üìä C√ÅLCULO DTW PARA RANGO DE A√ëOS ESPEC√çFICO

üåç Pa√≠ses:
   ARG: Argentina
   BRA: Brazil

üìÖ Rango solicitado: 1989 - 2021

üìä Datos disponibles en el rango:
   ARG: 33 a√±os
      Desde 1989 hasta 2021
   BRA: 33 a√±os
      Desde 1989 hasta 2021

‚úÖ A√±os superpuestos: 33
   Desde 1989 hasta 2021
   A√±os: [np.int64(1989), np.int64(1990), np.int64(1991), np.int64(1992), np.int64(1993), np.int64(1994), np.int64(1995), np.int64(1996), np.int64(1997), np.int64(1998), np.int64(1999), np.int64(2000), np.int64(2001), np.int64(2002), np.int64(2003), np.int64(2004), np.int64(2005), np.int64(2006), np.int64(2007), np.int64(2008), np.int64(2009), np.int64(2010), np.int64(2011), np.int64(2012), np.int64(2013), np.int64(2014), np.int64(2015), np.int64(2016), np.int64(2017), np.int64(2018), np.int64(2019), np.int64(2020), np.int64(2021)]

üìà RESULTADOS DTW

‚úÖ Distancia DTW:
   DTW absoluto: 1.5017
   DTW normalizado (por paso): 0.0326

üìè Detalles del Path:
   Longitud del pat

In [None]:
result = calculate_dtw_date_range('ARG', 'MWI', 1989, 2021)



üìä C√ÅLCULO DTW PARA RANGO DE A√ëOS ESPEC√çFICO

üåç Pa√≠ses:
   ARG: Argentina
   MWI: Malawi

üìÖ Rango solicitado: 1989 - 2021

üìä Datos disponibles en el rango:
   ARG: 33 a√±os
      Desde 1989 hasta 2021
   MWI: 33 a√±os
      Desde 1989 hasta 2021

‚úÖ A√±os superpuestos: 33
   Desde 1989 hasta 2021
   A√±os: [np.int64(1989), np.int64(1990), np.int64(1991), np.int64(1992), np.int64(1993), np.int64(1994), np.int64(1995), np.int64(1996), np.int64(1997), np.int64(1998), np.int64(1999), np.int64(2000), np.int64(2001), np.int64(2002), np.int64(2003), np.int64(2004), np.int64(2005), np.int64(2006), np.int64(2007), np.int64(2008), np.int64(2009), np.int64(2010), np.int64(2011), np.int64(2012), np.int64(2013), np.int64(2014), np.int64(2015), np.int64(2016), np.int64(2017), np.int64(2018), np.int64(2019), np.int64(2020), np.int64(2021)]

üìà RESULTADOS DTW

‚úÖ Distancia DTW:
   DTW absoluto: 2.0352
   DTW normalizado (por paso): 0.0415

üìè Detalles del Path:
   Longitud del pat

## An√°lisis de Cobertura Temporal Completa

In [None]:
# An√°lisis de cobertura temporal para los 185 pa√≠ses SIN GAPS

# Primero, identificar pa√≠ses sin gaps (series continuas)
def has_gaps(country_code):
    """Verifica si un pa√≠s tiene gaps en su serie temporal"""
    country_data = gni_data[gni_data['country_code'] == country_code].sort_values('year')
    if len(country_data) == 0:
        return True
    years = country_data['year'].values
    expected_years = years[-1] - years[0] + 1
    actual_years = len(years)
    return expected_years != actual_years

# Filtrar solo pa√≠ses sin gaps (los 185 del an√°lisis principal)
all_countries = gni_data['country_code'].unique()
countries_without_gaps = [c for c in all_countries if not has_gaps(c)]

print(f"üìä AN√ÅLISIS DE COBERTURA TEMPORAL")
print("="*70)
print(f"Total de pa√≠ses en el dataset: {len(all_countries)}")
print(f"Pa√≠ses SIN gaps (series continuas): {len(countries_without_gaps)}")
print(f"Pa√≠ses CON gaps (excluidos del an√°lisis): {len(all_countries) - len(countries_without_gaps)}")

# Filtrar dataset para an√°lisis con solo los 185 pa√≠ses
gni_data_clean = gni_data[gni_data['country_code'].isin(countries_without_gaps)].copy()

# Para cada a√±o, contar cu√°ntos de los 185 pa√≠ses tienen datos
years_coverage = gni_data_clean.groupby('year')['country_code'].nunique().sort_index()
total_countries_analysis = len(countries_without_gaps)

print(f"\nüìä Cobertura por a√±o (solo {total_countries_analysis} pa√≠ses sin gaps):")
print("="*70)
print(f"{'A√±o':<10} {'Pa√≠ses':<15} {'Porcentaje':<15} {'Estado'}")
print("="*70)

for year, count in years_coverage.items():
    percentage = (count / total_countries_analysis) * 100
    marker = "‚úÖ Completo" if count == total_countries_analysis else f"‚ùå Faltan {total_countries_analysis - count}"
    print(f"{int(year):<10} {count:<15} {percentage:>6.1f}%       {marker}")

# Encontrar el rango de a√±os con cobertura completa para los 185 pa√≠ses
complete_coverage_years = years_coverage[years_coverage == total_countries_analysis]

if len(complete_coverage_years) > 0:
    min_year = int(complete_coverage_years.index.min())
    max_year = int(complete_coverage_years.index.max())
    total_years = max_year - min_year + 1
    
    print("\n" + "="*70)
    print(f"üéØ RANGO CON COBERTURA COMPLETA ({total_countries_analysis} pa√≠ses):")
    print(f"   Desde: {min_year}")
    print(f"   Hasta: {max_year}")
    print(f"   Total: {total_years} a√±os consecutivos")
    print("="*70)
else:
    print("\n‚ùå No hay ning√∫n a√±o con cobertura completa para los 185 pa√≠ses")
    
    # Encontrar el m√°ximo de cobertura alcanzado
    max_coverage = years_coverage.max()
    years_with_max = years_coverage[years_coverage == max_coverage]
    
    print(f"\nüìä M√°xima cobertura alcanzada: {max_coverage} pa√≠ses ({max_coverage/total_countries_analysis*100:.1f}%)")
    print(f"   En los a√±os: {list(years_with_max.index.astype(int))}")
    
    # Buscar el rango m√°s largo con una cobertura m√≠nima (ej: 95%)
    for threshold in [0.99, 0.95, 0.90]:
        min_countries = int(total_countries_analysis * threshold)
        sufficient_years = years_coverage[years_coverage >= min_countries]
        
        if len(sufficient_years) > 0:
            min_year_thresh = int(sufficient_years.index.min())
            max_year_thresh = int(sufficient_years.index.max())
            total_years_thresh = max_year_thresh - min_year_thresh + 1
            actual_min_countries = int(sufficient_years.min())
            
            print(f"\nüîç RANGO CON ‚â•{threshold*100:.0f}% COBERTURA ({min_countries}+ pa√≠ses):")
            print(f"   Desde: {min_year_thresh}")
            print(f"   Hasta: {max_year_thresh}")
            print(f"   Total: {total_years_thresh} a√±os")
            print(f"   M√≠nimo de pa√≠ses en el rango: {actual_min_countries} ({actual_min_countries/total_countries_analysis*100:.1f}%)")
            break

# Verificar qu√© pa√≠ses faltan en a√±os con casi cobertura completa
if len(complete_coverage_years) == 0 and len(years_coverage) > 0:
    # Tomar el a√±o con m√°s cobertura
    best_year = years_coverage.idxmax()
    countries_in_best_year = set(gni_data_clean[gni_data_clean['year'] == best_year]['country_code'])
    missing_countries = set(countries_without_gaps) - countries_in_best_year
    
    print(f"\nüîé Pa√≠ses (de los 185) faltantes en {int(best_year)} (a√±o con mejor cobertura):")
    for country in sorted(missing_countries):
        country_name = country_names.get(country, country)
        print(f"   - {country_name} ({country})")

üìä AN√ÅLISIS DE COBERTURA TEMPORAL
Total de pa√≠ses en el dataset: 141
Pa√≠ses SIN gaps (series continuas): 141
Pa√≠ses CON gaps (excluidos del an√°lisis): 0

üìä Cobertura por a√±o (solo 141 pa√≠ses sin gaps):
A√±o        Pa√≠ses          Porcentaje      Estado
1989       141              100.0%       ‚úÖ Completo
1990       141              100.0%       ‚úÖ Completo
1991       141              100.0%       ‚úÖ Completo
1992       141              100.0%       ‚úÖ Completo
1993       141              100.0%       ‚úÖ Completo
1994       141              100.0%       ‚úÖ Completo
1995       141              100.0%       ‚úÖ Completo
1996       141              100.0%       ‚úÖ Completo
1997       141              100.0%       ‚úÖ Completo
1998       141              100.0%       ‚úÖ Completo
1999       141              100.0%       ‚úÖ Completo
2000       141              100.0%       ‚úÖ Completo
2001       141              100.0%       ‚úÖ Completo
2002       141              100.

## Dashboard: Pa√≠ses con Cobertura Completa por Per√≠odo

In [None]:
from ipywidgets import interact, IntRangeSlider
import ipywidgets as widgets
from IPython.display import HTML, display

# Mapeo de pa√≠ses a continentes (usando la variable country_names existente)
# Crearemos un diccionario de continentes basado en los datos disponibles
country_to_continent = {
    # Am√©rica
    'ARG': 'Am√©rica', 'BRA': 'Am√©rica', 'CAN': 'Am√©rica', 'USA': 'Am√©rica', 'MEX': 'Am√©rica',
    'CHL': 'Am√©rica', 'COL': 'Am√©rica', 'PER': 'Am√©rica', 'VEN': 'Am√©rica', 'ECU': 'Am√©rica',
    'BOL': 'Am√©rica', 'PRY': 'Am√©rica', 'URY': 'Am√©rica', 'CRI': 'Am√©rica', 'PAN': 'Am√©rica',
    'GTM': 'Am√©rica', 'HND': 'Am√©rica', 'NIC': 'Am√©rica', 'SLV': 'Am√©rica', 'DOM': 'Am√©rica',
    'CUB': 'Am√©rica', 'HTI': 'Am√©rica', 'JAM': 'Am√©rica', 'TTO': 'Am√©rica', 'BHS': 'Am√©rica',
    'BRB': 'Am√©rica', 'GUY': 'Am√©rica', 'SUR': 'Am√©rica', 'BLZ': 'Am√©rica', 'ATG': 'Am√©rica',
    'DMA': 'Am√©rica', 'GRD': 'Am√©rica', 'KNA': 'Am√©rica', 'LCA': 'Am√©rica', 'VCT': 'Am√©rica',
    'PRI': 'Am√©rica',
    
    # Europa
    'DEU': 'Europa', 'FRA': 'Europa', 'GBR': 'Europa', 'ITA': 'Europa', 'ESP': 'Europa',
    'POL': 'Europa', 'ROU': 'Europa', 'NLD': 'Europa', 'BEL': 'Europa', 'CZE': 'Europa',
    'PRT': 'Europa', 'SWE': 'Europa', 'HUN': 'Europa', 'AUT': 'Europa', 'CHE': 'Europa',
    'BGR': 'Europa', 'DNK': 'Europa', 'FIN': 'Europa', 'SVK': 'Europa', 'NOR': 'Europa',
    'IRL': 'Europa', 'HRV': 'Europa', 'SVN': 'Europa', 'LTU': 'Europa', 'LVA': 'Europa',
    'EST': 'Europa', 'LUX': 'Europa', 'CYP': 'Europa', 'MLT': 'Europa', 'ISL': 'Europa',
    'ALB': 'Europa', 'MKD': 'Europa', 'BIH': 'Europa', 'SRB': 'Europa', 'MNE': 'Europa',
    'MDA': 'Europa', 'UKR': 'Europa', 'BLR': 'Europa', 'RUS': 'Europa', 'KOS': 'Europa',
    
    # Asia
    'CHN': 'Asia', 'IND': 'Asia', 'JPN': 'Asia', 'KOR': 'Asia', 'IDN': 'Asia',
    'THA': 'Asia', 'VNM': 'Asia', 'PHL': 'Asia', 'PAK': 'Asia', 'BGD': 'Asia',
    'MYS': 'Asia', 'SGP': 'Asia', 'HKG': 'Asia', 'IRQ': 'Asia', 'SAU': 'Asia',
    'IRN': 'Asia', 'ISR': 'Asia', 'JOR': 'Asia', 'LBN': 'Asia', 'SYR': 'Asia',
    'YEM': 'Asia', 'OMN': 'Asia', 'KWT': 'Asia', 'QAT': 'Asia', 'BHR': 'Asia',
    'ARE': 'Asia', 'KAZ': 'Asia', 'UZB': 'Asia', 'TKM': 'Asia', 'TJK': 'Asia',
    'KGZ': 'Asia', 'MNG': 'Asia', 'NPL': 'Asia', 'LKA': 'Asia', 'MMR': 'Asia',
    'KHM': 'Asia', 'LAO': 'Asia', 'BRN': 'Asia', 'MAC': 'Asia', 'TWN': 'Asia',
    'AFG': 'Asia', 'ARM': 'Asia', 'AZE': 'Asia', 'GEO': 'Asia', 'PSE': 'Asia',
    'TLS': 'Asia', 'MDV': 'Asia', 'BTN': 'Asia',
    
    # √Åfrica
    'ZAF': '√Åfrica', 'EGY': '√Åfrica', 'NGA': '√Åfrica', 'ETH': '√Åfrica', 'KEN': '√Åfrica',
    'TZA': '√Åfrica', 'UGA': '√Åfrica', 'DZA': '√Åfrica', 'MAR': '√Åfrica', 'GHA': '√Åfrica',
    'AGO': '√Åfrica', 'MOZ': '√Åfrica', 'CMR': '√Åfrica', 'CIV': '√Åfrica', 'MDG': '√Åfrica',
    'NER': '√Åfrica', 'BFA': '√Åfrica', 'MLI': '√Åfrica', 'MWI': '√Åfrica', 'ZMB': '√Åfrica',
    'SEN': '√Åfrica', 'SOM': '√Åfrica', 'TCD': '√Åfrica', 'GIN': '√Åfrica', 'RWA': '√Åfrica',
    'BEN': '√Åfrica', 'TUN': '√Åfrica', 'BDI': '√Åfrica', 'ZWE': '√Åfrica', 'SSD': '√Åfrica',
    'LBY': '√Åfrica', 'TGO': '√Åfrica', 'SLE': '√Åfrica', 'LBR': '√Åfrica', 'MRT': '√Åfrica',
    'CAF': '√Åfrica', 'ERI': '√Åfrica', 'GAB': '√Åfrica', 'BWA': '√Åfrica', 'GMB': '√Åfrica',
    'NAM': '√Åfrica', 'LSO': '√Åfrica', 'GNB': '√Åfrica', 'GNQ': '√Åfrica', 'MUS': '√Åfrica',
    'SWZ': '√Åfrica', 'DJI': '√Åfrica', 'COM': '√Åfrica', 'CPV': '√Åfrica', 'STP': '√Åfrica',
    'SYC': '√Åfrica', 'SDN': '√Åfrica', 'COD': '√Åfrica', 'COG': '√Åfrica',
    
    # Ocean√≠a
    'AUS': 'Ocean√≠a', 'NZL': 'Ocean√≠a', 'PNG': 'Ocean√≠a', 'FJI': 'Ocean√≠a', 'SLB': 'Ocean√≠a',
    'VUT': 'Ocean√≠a', 'NCL': 'Ocean√≠a', 'PYF': 'Ocean√≠a', 'GUM': 'Ocean√≠a', 'FSM': 'Ocean√≠a',
    'TON': 'Ocean√≠a', 'KIR': 'Ocean√≠a', 'WSM': 'Ocean√≠a', 'PLW': 'Ocean√≠a', 'MHL': 'Ocean√≠a',
    'TUV': 'Ocean√≠a', 'NRU': 'Ocean√≠a', 'ASM': 'Ocean√≠a',
    
    # Territorios especiales
    'GRL': 'Am√©rica', 'FRO': 'Europa', 'SMR': 'Europa', 'VAT': 'Europa', 'AND': 'Europa',
    'MCO': 'Europa', 'LIE': 'Europa', 'CHI': 'Europa', 'TCA': 'Am√©rica', 'BMU': 'Am√©rica',
    'CYM': 'Am√©rica', 'VGB': 'Am√©rica', 'MSR': 'Am√©rica', 'AIA': 'Am√©rica', 'ABW': 'Am√©rica',
    'CUW': 'Am√©rica', 'SXM': 'Am√©rica', 'MAF': 'Am√©rica', 'BES': 'Am√©rica', 'GLP': 'Am√©rica',
    'MTQ': 'Am√©rica', 'GUF': 'Am√©rica', 'REU': '√Åfrica', 'MYT': '√Åfrica',
}

def get_countries_with_complete_coverage(year_start, year_end):
    """
    Retorna los pa√≠ses (sin gaps) que tienen datos para TODOS los a√±os en el rango especificado
    """
    # Filtrar solo pa√≠ses sin gaps
    countries_no_gaps = [c for c in gni_data['country_code'].unique() if not has_gaps(c)]
    
    # Filtrar datos al rango de a√±os
    gni_period = gni_data[
        (gni_data['year'] >= year_start) & 
        (gni_data['year'] <= year_end) &
        (gni_data['country_code'].isin(countries_no_gaps))
    ].copy()
    
    # Contar a√±os disponibles por pa√≠s
    years_needed = year_end - year_start + 1
    country_year_counts = gni_period.groupby('country_code')['year'].nunique()
    
    # Pa√≠ses con cobertura completa
    complete_countries = country_year_counts[country_year_counts == years_needed].index.tolist()
    
    return complete_countries

def display_countries_by_continent(year_range):
    """
    Muestra tabla de pa√≠ses con cobertura completa, organizados por continente
    """
    year_start, year_end = year_range
    years_span = year_end - year_start + 1
    
    # Obtener pa√≠ses con cobertura completa
    complete_countries = get_countries_with_complete_coverage(year_start, year_end)
    
    # Obtener TODOS los pa√≠ses sin gaps
    all_countries_no_gaps = [c for c in gni_data['country_code'].unique() if not has_gaps(c)]
    
    # Pa√≠ses que NO tienen cobertura completa
    incomplete_countries = [c for c in all_countries_no_gaps if c not in complete_countries]
    
    # Organizar pa√≠ses completos por continente
    continents = {}
    for country in complete_countries:
        continent = country_to_continent.get(country, 'Otros')
        if continent not in continents:
            continents[continent] = []
        continents[continent].append(country)
    
    # Organizar pa√≠ses incompletos por continente
    incomplete_by_continent = {}
    for country in incomplete_countries:
        continent = country_to_continent.get(country, 'Otros')
        if continent not in incomplete_by_continent:
            incomplete_by_continent[continent] = []
        incomplete_by_continent[continent].append(country)
    
    # Ordenar continentes y pa√≠ses
    continents = {k: sorted(v) for k, v in sorted(continents.items())}
    incomplete_by_continent = {k: sorted(v) for k, v in sorted(incomplete_by_continent.items())}
    
    # Crear HTML para la tabla
    html = f"""
    <div style="font-family: monospace; background-color: #f8f9fa; padding: 20px; border-radius: 8px;">
        <h3 style="color: #2c3e50; margin-bottom: 15px;">
            üìä Pa√≠ses con Cobertura Completa: {year_start}-{year_end} ({years_span} a√±os)
        </h3>
        <div style="background-color: white; padding: 15px; border-radius: 5px; margin-bottom: 15px; border-left: 4px solid #3498db;">
            <strong>Total de pa√≠ses: {len(complete_countries)}</strong> de {len(all_countries_no_gaps)} pa√≠ses sin gaps ({len(complete_countries)/len(all_countries_no_gaps)*100:.1f}%)
        </div>
        <table style="width: 100%; border-collapse: collapse; background-color: white;">
            <thead>
                <tr style="background-color: #34495e; color: white;">
                    <th style="padding: 12px; text-align: left; border: 1px solid #ddd;">Continente</th>
                    <th style="padding: 12px; text-align: center; border: 1px solid #ddd;">Con datos</th>
                    <th style="padding: 12px; text-align: left; border: 1px solid #ddd;">Pa√≠ses CON cobertura completa</th>
                    <th style="padding: 12px; text-align: center; border: 1px solid #ddd;">Sin datos</th>
                    <th style="padding: 12px; text-align: left; border: 1px solid #ddd;">Pa√≠ses SIN cobertura completa</th>
                </tr>
            </thead>
            <tbody>
    """
    
    # Color alternado para filas
    colors = ['#ecf0f1', '#ffffff']
    
    # Obtener todos los continentes (completos e incompletos)
    all_continents = sorted(set(list(continents.keys()) + list(incomplete_by_continent.keys())))
    
    for idx, continent in enumerate(all_continents):
        bg_color = colors[idx % 2]
        
        # Pa√≠ses con cobertura completa
        complete = continents.get(continent, [])
        complete_str = ', '.join(complete) if complete else '-'
        complete_count = len(complete)
        
        # Pa√≠ses sin cobertura completa
        incomplete = incomplete_by_continent.get(continent, [])
        incomplete_str = ', '.join(incomplete) if incomplete else '-'
        incomplete_count = len(incomplete)
        
        html += f"""
                <tr style="background-color: {bg_color};">
                    <td style="padding: 10px; border: 1px solid #ddd; font-weight: bold; color: #2c3e50;">
                        {continent}
                    </td>
                    <td style="padding: 10px; border: 1px solid #ddd; text-align: center;">
                        <span style="background-color: #27ae60; color: white; padding: 3px 10px; border-radius: 12px; font-weight: bold;">
                            {complete_count}
                        </span>
                    </td>
                    <td style="padding: 10px; border: 1px solid #ddd; font-family: 'Courier New', monospace; color: #27ae60;">
                        {complete_str}
                    </td>
                    <td style="padding: 10px; border: 1px solid #ddd; text-align: center;">
                        <span style="background-color: #e74c3c; color: white; padding: 3px 10px; border-radius: 12px; font-weight: bold;">
                            {incomplete_count}
                        </span>
                    </td>
                    <td style="padding: 10px; border: 1px solid #ddd; font-family: 'Courier New', monospace; color: #e74c3c;">
                        {incomplete_str}
                    </td>
                </tr>
        """
    
    html += """
            </tbody>
        </table>
    </div>
    """
    
    # Mostrar HTML primero (tabla)
    display(HTML(html))
    
    # Mostrar estad√≠sticas despu√©s
    print(f"\nüìà ESTAD√çSTICAS DEL PER√çODO {year_start}-{year_end}")
    print("="*80)
    print(f"Duraci√≥n: {years_span} a√±os")
    print(f"Pa√≠ses con cobertura completa: {len(complete_countries)}")
    print(f"Pa√≠ses sin cobertura completa: {len(incomplete_countries)}")
    print(f"Porcentaje de cobertura: {len(complete_countries)/len(all_countries_no_gaps)*100:.1f}%")
    print("\nDistribuci√≥n por continente:")
    print(f"{'Continente':<15} {'Con datos':>12} {'Sin datos':>12} {'Total':>10}")
    print("-"*53)
    for continent in all_continents:
        complete_count = len(continents.get(continent, []))
        incomplete_count = len(incomplete_by_continent.get(continent, []))
        total = complete_count + incomplete_count
        print(f"{continent:<15} {complete_count:>12} {incomplete_count:>12} {total:>10}")

# Obtener rango de a√±os disponibles
all_years = sorted(gni_data['year'].unique())
year_min = int(all_years[0])
year_max = int(all_years[-1])

print("üéõÔ∏è Dashboard Interactivo: Pa√≠ses con Cobertura Completa")
print("="*80)
print("Seleccion√° un rango de a√±os para ver qu√© pa√≠ses tienen datos completos")
print("organizados por continente\n")

interact(
    display_countries_by_continent,
    year_range=IntRangeSlider(
        value=[2010, 2021],  # Rango por defecto (el de mejor cobertura)
        min=year_min,
        max=year_max,
        step=1,
        description='Per√≠odo:',
        continuous_update=False,
        orientation='horizontal',
        readout=True,
        readout_format='d',
        layout=widgets.Layout(width='800px'),
        style={'description_width': '100px'}
    )
)

üéõÔ∏è Dashboard Interactivo: Pa√≠ses con Cobertura Completa
Seleccion√° un rango de a√±os para ver qu√© pa√≠ses tienen datos completos
organizados por continente



interactive(children=(IntRangeSlider(value=(2010, 2021), continuous_update=False, description='Per√≠odo:', layo‚Ä¶

<function __main__.display_countries_by_continent(year_range)>