# üåûüî¨ An√°lisis Exploratorio HelioBio-Economic
## Investigaci√≥n de Correlaciones Solares-Econ√≥micas

**Autor:** Benjamin Cabeza Dur√°n (mechmind-dwv)  
**Asistente:** DeepSeek AI  
**Fecha:** Generado autom√°ticamente

---

Este notebook realiza an√°lisis exploratorio de datos para investigar las correlaciones entre actividad solar y ciclos econ√≥micos, extendiendo el trabajo pionero de Alexander Chizhevsky.

In [None]:
# Configuraci√≥n inicial
import warnings
warnings.filterwarnings('ignore')

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime, timedelta
import sys
import os

# A√±adir path para imports del proyecto
sys.path.append('../')

# Configuraci√≥n de visualizaci√≥n
plt.style.use('seaborn-v0_8-whitegrid')
sns.set_palette("husl")
plt.rcParams['figure.figsize'] = (12, 6)

print("‚úÖ M√≥dulos importados correctamente")

## 1. Carga y Preparaci√≥n de Datos

In [None]:
from app.services.nasa_solar_service import NASASolarService
from app.services.economic_data_service import EconomicDataService
from app.services.correlation_service import CorrelationService

# Inicializar servicios
nasa_service = NASASolarService()
economic_service = EconomicDataService()
correlation_service = CorrelationService()

# Cargar datos hist√≥ricos
print("üì• Cargando datos hist√≥ricos...")

# Datos solares (50 a√±os)
solar_data = await nasa_service.get_historical_solar_data(50)
print(f"üåû Datos solares: {len(solar_data)} registros")

# Datos econ√≥micos
economic_data = await economic_service.get_long_term_economic_data()
print(f"üíπ Datos econ√≥micos: {len(economic_data)} registros")

# Datos de mercado
market_data = await economic_service.get_market_data("^GSPC", "50y")
sp500_prices = [item['price'] for item in market_data['market_data']]
sp500_dates = [item['timestamp'] for item in market_data['market_data']]
sp500_series = pd.Series(sp500_prices, index=pd.to_datetime(sp500_dates))

print("‚úÖ Datos cargados correctamente")

## 2. An√°lisis Exploratorio de Series Temporales

In [None]:
# Visualizaci√≥n de series temporales
fig, axes = plt.subplots(2, 2, figsize=(15, 10))

# Manchas solares
if 'sunspots' in solar_data.columns:
    axes[0, 0].plot(solar_data.index, solar_data['sunspots'], color='orange', linewidth=1)
    axes[0, 0].set_title('Manchas Solares - Serie Temporal', fontsize=14, fontweight='bold')
    axes[0, 0].set_ylabel('N√∫mero de Manchas')
    axes[0, 0].grid(True, alpha=0.3)

# S&P 500
axes[0, 1].plot(sp500_series.index, sp500_series.values, color='green', linewidth=1)
axes[0, 1].set_title('S&P 500 - Serie Temporal', fontsize=14, fontweight='bold')
axes[0, 1].set_ylabel('Precio (USD)')
axes[0, 1].grid(True, alpha=0.3)

# Flujo solar
if 'solar_flux' in solar_data.columns:
    axes[1, 0].plot(solar_data.index, solar_data['solar_flux'], color='red', linewidth=1)
    axes[1, 0].set_title('Flujo Solar - Serie Temporal', fontsize=14, fontweight='bold')
    axes[1, 0].set_ylabel('Flux (sfu)')
    axes[1, 0].grid(True, alpha=0.3)

# GDP (si est√° disponible)
if 'GDP' in economic_data.columns:
    axes[1, 1].plot(economic_data.index, economic_data['GDP'], color='blue', linewidth=1)
    axes[1, 1].set_title('GDP - Serie Temporal', fontsize=14, fontweight='bold')
    axes[1, 1].set_ylabel('GDP (Billones USD)')
    axes[1, 1].grid(True, alpha=0.3)

plt.tight_layout()
plt.show()

## 3. An√°lisis de Ciclos y Estacionalidad

In [None]:
from scipy import signal
from scipy.fft import fft, fftfreq

# An√°lisis espectral de manchas solares
if 'sunspots' in solar_data.columns:
    sunspots_clean = solar_data['sunspots'].dropna()
    
    # FFT para detecci√≥n de ciclos
    n = len(sunspots_clean)
    yf = fft(sunspots_clean.values)
    xf = fftfreq(n, 1/12)  # Datos mensuales
    
    # Encontrar frecuencias dominantes
    idx = np.where(xf > 0)  # Frecuencias positivas
    frequencies = xf[idx]
    power = np.abs(yf[idx])
    
    # Convertir a per√≠odos en a√±os
    periods = 1 / frequencies
    
    fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(15, 5))
    
    # Serie temporal con media m√≥vil
    ax1.plot(sunspots_clean.index, sunspots_clean.values, alpha=0.7, label='Original')
    ax1.plot(sunspots_clean.index, sunspots_clean.rolling(132).mean(), 
             color='red', linewidth=2, label='Media m√≥vil 11 a√±os')
    ax1.set_title('Ciclos Solares - Manchas Solares', fontsize=14, fontweight='bold')
    ax1.set_ylabel('N√∫mero de Manchas')
    ax1.legend()
    ax1.grid(True, alpha=0.3)
    
    # Espectro de potencia
    ax2.plot(periods, power, color='purple')
    ax2.set_xlim(0, 20)  # Focus en ciclos hasta 20 a√±os
    ax2.set_title('Espectro de Potencia - Ciclos Solares', fontsize=14, fontweight='bold')
    ax2.set_xlabel('Per√≠odo (a√±os)')
    ax2.set_ylabel('Potencia')
    ax2.axvline(11, color='red', linestyle='--', alpha=0.7, label='Ciclo Schwabe (11 a√±os)')
    ax2.legend()
    ax2.grid(True, alpha=0.3)
    
    plt.tight_layout()
    plt.show()
    
    # Identificar ciclos dominantes
    dominant_idx = np.argsort(power)[-5:]  # Top 5 frecuencias
    print("üîç Ciclos dominantes detectados:")
    for idx in dominant_idx:
        if periods[idx] <= 20:  # Filtrar ciclos razonables
            print(f"  - {periods[idx]:.2f} a√±os (potencia: {power[idx]:.2f})")

## 4. Correlaci√≥n entre Variables Solares y Econ√≥micas

In [None]:
# Preparar datos para correlaci√≥n
if 'sunspots' in solar_data.columns and not sp500_series.empty:
    # Alinear fechas
    common_dates = solar_data.index.intersection(sp500_series.index)
    sunspots_aligned = solar_data.loc[common_dates, 'sunspots']
    sp500_aligned = sp500_series.loc[common_dates]
    
    # Remover tendencias
    sunspots_detrended = sunspots_aligned - sunspots_aligned.rolling(12).mean()
    sp500_detrended = sp500_aligned - sp500_aligned.rolling(12).mean()
    
    # Calcular correlaci√≥n con diferentes lags
    max_lag = 60  # 5 a√±os
    correlations = []
    lags = range(-max_lag, max_lag + 1)
    
    for lag in lags:
        if lag < 0:
            corr = np.corrcoef(sunspots_aligned.iloc[:lag].values, 
                             sp500_aligned.iloc[-lag:].values)[0, 1]
        elif lag > 0:
            corr = np.corrcoef(sunspots_aligned.iloc[lag:].values, 
                             sp500_aligned.iloc[:-lag].values)[0, 1]
        else:
            corr = np.corrcoef(sunspots_aligned.values, sp500_aligned.values)[0, 1]
        correlations.append(corr)
    
    # Visualizar correlaci√≥n cruzada
    fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(15, 5))
    
    # Serie temporal comparativa
    ax1.plot(sunspots_detrended.index, sunspots_detrended.values, 
             color='orange', label='Manchas Solares (detrended)', alpha=0.7)
    ax1.plot(sp500_detrended.index, sp500_detrended.values, 
             color='green', label='S&P 500 (detrended)', alpha=0.7)
    ax1.set_title('Series Temporales Alineadas (Detrended)', fontsize=14, fontweight='bold')
    ax1.legend()
    ax1.grid(True, alpha=0.3)
    
    # Correlaci√≥n cruzada
    ax2.plot(lags, correlations, color='blue', linewidth=2)
    ax2.axvline(0, color='red', linestyle='--', alpha=0.5, label='Lag 0')
    ax2.set_title('Correlaci√≥n Cruzada: Manchas Solares vs S&P 500', fontsize=14, fontweight='bold')
    ax2.set_xlabel('Lag (meses)')
    ax2.set_ylabel('Correlaci√≥n')
    ax2.legend()
    ax2.grid(True, alpha=0.3)
    
    plt.tight_layout()
    plt.show()
    
    # Encontrar lag √≥ptimo
    optimal_lag = lags[np.argmax(np.abs(correlations))]
    max_correlation = correlations[np.argmax(np.abs(correlations))]
    
    print(f"üîó Correlaci√≥n m√°xima: {max_correlation:.3f} en lag {optimal_lag} meses")
    print(f"üìä Correlaci√≥n contempor√°nea (lag 0): {correlations[max_lag]:.3f}")

## 5. An√°lisis de Crisis Hist√≥ricas vs Ciclos Solares

In [None]:
from app.core.economic_cycles import economic_cycle_analyzer

# An√°lisis de crisis hist√≥ricas
crisis_analysis = economic_cycle_analyzer.analyze_historical_crises()

# Visualizar crisis en contexto de ciclos solares
if 'sunspots' in solar_data.columns:
    fig, ax = plt.subplots(figsize=(15, 8))
    
    # Manchas solares
    ax.plot(solar_data.index, solar_data['sunspots'], 
            color='orange', linewidth=1, alpha=0.7, label='Manchas Solares')
    
    # Marcar crisis hist√≥ricas
    crisis_dates = {
        'Gran Depresi√≥n': '1929-10-29',
        'Crisis Petr√≥leo': '1973-10-16', 
        'Lunes Negro': '1987-10-19',
        'Burbuja Dot-com': '2000-03-10',
        'Crisis 2008': '2008-09-15',
        'COVID-19': '2020-03-23'
    }
    
    for crisis, date_str in crisis_dates.items():
        date = pd.to_datetime(date_str)
        if date in solar_data.index:
            crisis_value = solar_data.loc[date, 'sunspots']
            ax.scatter(date, crisis_value, color='red', s=100, zorder=5)
            ax.annotate(crisis, (date, crisis_value), 
                       xytext=(10, 10), textcoords='offset points',
                       bbox=dict(boxstyle='round,pad=0.3', facecolor='yellow', alpha=0.7))
    
    ax.set_title('Crisis Hist√≥ricas en Contexto de Ciclos Solares', fontsize=16, fontweight='bold')
    ax.set_ylabel('N√∫mero de Manchas Solares')
    ax.legend()
    ax.grid(True, alpha=0.3)
    
    plt.tight_layout()
    plt.show()
    
    # An√°lisis estad√≠stico
    print("üìà An√°lisis de Crisis vs Fases Solares:")
    print(f"  - Crisis en m√°ximos solares: {crisis_analysis['correlation_metrics']['crisis_at_solar_maximum']:.1%}")
    print(f"  - Crisis en m√≠nimos solares: {crisis_analysis['correlation_metrics']['crisis_at_solar_minimum']:.1%}")
    print(f"  - Correlaci√≥n general: {crisis_analysis['correlation_metrics']['overall_correlation']:.1%}")

## 6. Heatmap de Correlaciones Multivariadas

In [None]:
# Preparar matriz de correlaci√≥n
correlation_data = {}

if 'sunspots' in solar_data.columns:
    correlation_data['Sunspots'] = solar_data['sunspots']
if 'solar_flux' in solar_data.columns:
    correlation_data['Solar_Flux'] = solar_data['solar_flux']

correlation_data['SP500'] = sp500_series

if 'GDP' in economic_data.columns:
    correlation_data['GDP'] = economic_data['GDP']
if 'UNEMPLOYMENT' in economic_data.columns:
    correlation_data['Unemployment'] = economic_data['UNEMPLOYMENT']

# Crear DataFrame de correlaci√≥n
corr_df = pd.DataFrame(correlation_data)
corr_df = corr_df.ffill().bfill()  # Limpiar datos

# Matriz de correlaci√≥n
correlation_matrix = corr_df.corr()

# Visualizar heatmap
plt.figure(figsize=(10, 8))
mask = np.triu(np.ones_like(correlation_matrix, dtype=bool))  # M√°scara para tri√°ngulo superior

sns.heatmap(correlation_matrix, 
            mask=mask,
            annot=True, 
            cmap='RdBu_r', 
            center=0,
            square=True,
            fmt='.3f',
            cbar_kws={"shrink": .8})

plt.title('Matriz de Correlaci√≥n: Variables Solares vs Econ√≥micas', 
          fontsize=16, fontweight='bold', pad=20)
plt.tight_layout()
plt.show()

print("üîç An√°lisis de Correlaciones Clave:")
key_pairs = [('Sunspots', 'SP500'), ('Solar_Flux', 'GDP'), ('Sunspots', 'Unemployment')]
for var1, var2 in key_pairs:
    if var1 in correlation_matrix.columns and var2 in correlation_matrix.columns:
        corr_value = correlation_matrix.loc[var1, var2]
        strength = "Fuerte" if abs(corr_value) > 0.5 else "Moderada" if abs(corr_value) > 0.3 else "D√©bil"
        direction = "positiva" if corr_value > 0 else "negativa"
        print(f"  - {var1} vs {var2}: {corr_value:.3f} ({strength} {direction})")

## 7. An√°lisis de Ondas Largas (Kondratiev)

In [None]:
from app.core.kondratiev_analysis import kondratiev_analyzer

# An√°lisis de ondas largas
kondratiev_analysis = kondratiev_analyzer.analyze_long_waves()
kondratiev_report = kondratiev_analyzer.generate_kondratiev_report()

print("üåä AN√ÅLISIS DE ONDAS LARGAS KONDRATIEV")
print("=" * 50)
print(f"Onda Actual: #{kondratiev_analysis.current_wave.wave_number}")
print(f"Paradigma Tecnol√≥gico: {kondratiev_analysis.current_wave.technological_paradigm}")
print(f"Fase Actual: {kondratiev_analysis.current_phase.value}")
print(f"Progreso de Fase: {kondratiev_analysis.phase_progress:.1%}")
print(f"Pr√≥xima Transici√≥n: {kondratiev_analysis.next_phase_transition.strftime('%Y-%m')}")
print()

print("üîó Sincronizaci√≥n Solar:")
for metric, value in kondratiev_analysis.solar_correlation.items():
    print(f"  - {metric}: {value:.3f}")
print()

print("üìà Implicaciones Econ√≥micas:")
for key, value in kondratiev_analysis.economic_implications.items():
    if isinstance(value, list) and value:
        print(f"  - {key}:")
        for item in value[:3]:  # Mostrar primeros 3 items
            print(f"    ‚Ä¢ {item}")
    elif not isinstance(value, list):
        print(f"  - {key}: {value}")
print()

print("‚ö†Ô∏è Evaluaci√≥n de Riesgos:")
for key, value in kondratiev_analysis.risk_assessment.items():
    if not isinstance(value, list):
        print(f"  - {key}: {value}")

## 8. Conclusiones y Hallazgos Preliminares

In [None]:
print("üéØ CONCLUSIONES DEL AN√ÅLISIS EXPLORATORIO")
print("=" * 50)

hallazgos = []

# Hallazgo 1: Ciclos solares
if 'sunspots' in solar_data.columns:
    ciclo_promedio = solar_data['sunspots'].rolling(132).mean().std()
    hallazgos.append(f"‚Ä¢ Ciclo solar de ~11 a√±os confirmado (variabilidad: {ciclo_promedio:.1f} manchas)")

# Hallazgo 2: Correlaci√≥n solar-econ√≥mica
if 'sunspots' in solar_data.columns and not sp500_series.empty:
    corr_contemporanea = np.corrcoef(solar_data['sunspots'].dropna().values, 
                                   sp500_series.dropna().values)[0, 1]
    hallazgos.append(f"‚Ä¢ Correlaci√≥n contempor√°nea manchas solares-S&P500: {corr_contemporanea:.3f}")

# Hallazgo 3: Crisis y ciclos
crisis_corr = crisis_analysis['correlation_metrics']['overall_correlation']
hallazgos.append(f"‚Ä¢ Correlaci√≥n crisis-fases solares: {crisis_corr:.1%}")

# Hallazgo 4: Kondratiev actual
hallazgos.append(f"‚Ä¢ Onda Kondratiev actual: #{kondratiev_analysis.current_wave.wave_number} ({kondratiev_analysis.current_phase.value})")

# Hallazgo 5: Sincronizaci√≥n
sync_strength = kondratiev_analysis.solar_correlation.get('predicted_sync_strength', 0)
hallazgos.append(f"‚Ä¢ Sincronizaci√≥n solar-Kondratiev prevista: {sync_strength:.1%}")

print("\n".join(hallazgos))

print("\nüî¨ RECOMENDACIONES PARA INVESTIGACI√ìN FUTURA:")
recomendaciones = [
    "‚Ä¢ Profundizar an√°lisis de causalidad con tests de Granger",
    "‚Ä¢ Investigar mecanismos de transmisi√≥n solar-econ√≥mica", 
    "‚Ä¢ Extender an√°lisis a otros indicadores econ√≥micos",
    "‚Ä¢ Validar modelos predictivos con backtesting",
    "‚Ä¢ Estudiar efectos regionales diferenciados"
]
print("\n".join(recomendaciones))

print(f"\nüìÖ An√°lisis generado: {datetime.now().strftime('%Y-%m-%d %H:%M')}")
print("üåû HelioBio-Economic v1.0 - Investigaci√≥n en curso")