In [7]:
import pandas as pd
import numpy as np
from statsmodels.tsa.stattools import grangercausalitytests
import matplotlib.pyplot as plt

print(">>> TEST DE GRANGER : APPROCHE MIXTE (ROBUSTE)")

# 1. Chargement
df_sent = pd.read_csv('../data/daily_sentiment_lm_final.csv')
df_vix = pd.read_csv('../data/financial_data_VIX.csv') # Contient le VIX

# Nettoyage Dates
df_sent['date_only'] = pd.to_datetime(df_sent['date_only'])
col_date_vix = 'DATE' if 'DATE' in df_vix.columns else 'Date'
df_vix[col_date_vix] = pd.to_datetime(df_vix[col_date_vix]).dt.tz_localize(None)

# Identification Colonne Volatilit√©
col_vol = 'Volatility' if 'Volatility' in df_vix.columns else 'VIXCLS'

# Fusion
df = pd.merge(df_vix[[col_date_vix, col_vol]], 
              df_sent[['date_only', 'mean']], 
              left_on=col_date_vix, right_on='date_only').sort_values('date_only')

df = df.set_index('date_only')

# --- CORRECTIF ---
# On s'assure de ne garder que les 2 colonnes de donn√©es (et d'ignorer la date en doublon)
df = df[[col_vol, 'mean']] 

df.columns = ['VIX', 'Sentiment']

# --- 2. TRANSFORMATION INTELLIGENTE ---
df_test = pd.DataFrame()
df_test['VIX_Diff'] = df['VIX'].diff()  # On diff√©rencie le VIX (non-stationnaire)
df_test['Sentiment'] = df['Sentiment']   # On garde le Sentiment (stationnaire)

# Nettoyage critique
df_test = df_test.replace([np.inf, -np.inf], np.nan).dropna()

print(f"Donn√©es pr√™tes pour le test : {len(df_test)} jours.")

# --- 3. TEST DE GRANGER ---
print("\n[R√©sultats Granger Causality]")
max_lag = 10 
test_result = grangercausalitytests(df_test[['VIX_Diff', 'Sentiment']], maxlag=max_lag, verbose=False)

print(f"{'LAG':<5} | {'P-VALUE':<10} | {'R√âSULTAT'}")
print("-" * 35)

found_causality = False
for lag in range(1, max_lag + 1):
    p_val = test_result[lag][0]['ssr_chi2test'][1]
    sig = "OUI" if p_val < 0.05 else "-"
    if p_val < 0.05: found_causality = True
    print(f"{lag:<5} | {p_val:.4f}     | {sig}")

if found_causality:
    print("SUCC√àS : Le Sentiment pr√©dit les mouvements du VIX !")
else:
    print("R√âSULTAT : Pas de pr√©dictibilit√© d√©tect√©e.")

>>> TEST DE GRANGER : APPROCHE MIXTE (ROBUSTE)
Donn√©es pr√™tes pour le test : 522 jours.

[R√©sultats Granger Causality]
LAG   | P-VALUE    | R√âSULTAT
-----------------------------------
1     | 0.6231     | -
2     | 0.7554     | -
3     | 0.9132     | -
4     | 0.8406     | -
5     | 0.9193     | -
6     | 0.8647     | -
7     | 0.8708     | -
8     | 0.7629     | -
9     | 0.8415     | -
10    | 0.7021     | -
R√âSULTAT : Pas de pr√©dictibilit√© d√©tect√©e.




In [None]:
import pandas_datareader.data as web
import pandas as pd
import numpy as np
from statsmodels.tsa.stattools import grangercausalitytests

print(">>> TENTATIVE ULTIME : SYNC HEBDOMADAIRE (7 JOURS / 7 JOURS) <<<")

# 1. T√âL√âCHARGEMENT (KBE via Stooq)
start = '2006-01-01'
end = '2009-01-01'
ticker = 'KBE'

try:
    df_price = web.DataReader(ticker, 'stooq', start=start, end=end).sort_index()
    
    COMMON_WINDOW = 6
    
    df_price['Log_Ret'] = np.log(df_price['Close'] / df_price['Close'].shift(1))
    df_price['Vol_Target'] = df_price['Log_Ret'].rolling(window=COMMON_WINDOW).std() * np.sqrt(252)
    df_price = df_price.dropna()

except:
    print("Fallback XLF")
    df_price = web.DataReader('XLF', 'stooq', start=start, end=end).sort_index()
    COMMON_WINDOW = 7
    df_price['Log_Ret'] = np.log(df_price['Close'] / df_price['Close'].shift(1))
    df_price['Vol_Target'] = df_price['Log_Ret'].rolling(window=COMMON_WINDOW).std() * np.sqrt(252)
    df_price = df_price.dropna()

# 2. CHARGEMENT SENTIMENT
df_sent = pd.read_csv('../data/daily_sentiment_lm_final.csv')
df_sent['date_only'] = pd.to_datetime(df_sent['date_only'])

# 3. FUSION
df_final = pd.merge(df_price[['Vol_Target']], 
                    df_sent[['date_only', 'mean']], 
                    left_index=True, right_on='date_only').set_index('date_only')

# --- SYNCHRONISATION 7 JOURS ---
df_final['Sentiment_Smooth'] = df_final['mean'].rolling(window=COMMON_WINDOW).mean()

# On teste NIVEAU vs NIVEAU
df_test = pd.DataFrame()
df_test['Volatility'] = df_final['Vol_Target']
df_test['Sentiment'] = df_final['Sentiment_Smooth']

df_test = df_test.dropna()

print(f"Donn√©es pr√™tes : {len(df_test)} jours.")
print(f"Param√®tres : Volatilit√© {COMMON_WINDOW}j | Sentiment {COMMON_WINDOW}j")

# 4. GRANGER
max_lag = 15
print(f"\n[R√©sultats Granger]")
res = grangercausalitytests(df_test[['Volatility', 'Sentiment']], maxlag=max_lag, verbose=False)

print(f"{'LAG':<5} | {'P-VALUE':<10} | {'R√âSULTAT'}")
print("-" * 35)
for i in range(1, max_lag+1):
    p = res[i][0]['ssr_chi2test'][1]
    sig = "OUI" if p < 0.05 else "-"
    if p < 0.01: sig += " (FORT)"
    print(f"{i:<5} | {p:.4f}     | {sig}")



üìä LANCEMENT DES TESTS STATISTIQUES (GRANGER CAUSALITY)
üì° T√©l√©chargement du VIX (FRED)...

üèÅ TEST DE GRANGER : SENTIMENT vs KBE VOLATILITY (Banques)
Donn√©es pr√™tes : 499 jours communs.

LAG   | P-VALUE    | R√âSULTAT
-----------------------------------
1     | 0.0350     | üî• OUI
2     | 0.0122     | üî• OUI
3     | 0.0069     | üî• OUI (FORT)
4     | 0.0068     | üî• OUI (FORT)
5     | 0.0055     | üî• OUI (FORT)
6     | 0.0002     | üî• OUI (FORT)
7     | 0.0032     | üî• OUI (FORT)
8     | 0.0012     | üî• OUI (FORT)
9     | 0.0001     | üî• OUI (FORT)
10    | 0.0002     | üî• OUI (FORT)
11    | 0.0002     | üî• OUI (FORT)
12    | 0.0001     | üî• OUI (FORT)
13    | 0.0001     | üî• OUI (FORT)
14    | 0.0001     | üî• OUI (FORT)
15    | 0.0001     | üî• OUI (FORT)

‚úÖ CONCLUSION : Causalit√© forte d√©tect√©e ! La presse pr√©dit le march√©.

üèÅ TEST DE GRANGER : SENTIMENT vs VIX (Peur Globale)
Donn√©es pr√™tes : 499 jours communs.

LAG   | P-VALUE    | 

  return _core_matmul(x1, x2)
  return _core_matmul(x1, x2)
  return _core_matmul(x1, x2)
  return _core_matmul(x1, x2)
  return _core_matmul(x1, x2)
  return _core_matmul(x1, x2)
