# Corrélation cumul et lag des régions par rapport au CF

In [3]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from ipywidgets import interact, IntSlider
from matplotlib.colors import LinearSegmentedColormap
from sklearn.preprocessing import StandardScaler


sns.set_style("whitegrid")

#correction 

In [12]:
#chargement des données
cf = pd.read_csv("data/CF_1d.csv", index_col="Date", parse_dates=True)
tp = pd.read_csv("data/TP_1d.csv", index_col="Date", parse_dates=True)

cf_fr = cf.FR
tp_fr = tp[tp.columns[tp.columns.str.startswith("FR")]]

cf_mean = cf_fr.rename("CF")

#normalisation
scaler = StandardScaler()
tp_norm = pd.DataFrame(scaler.fit_transform(tp_fr), 
                       index=tp_fr.index, 
                       columns=tp_fr.columns)


tp.info

<bound method DataFrame.info of                     AT11          AT12          AT13          AT21  \
Date                                                                 
2015-01-01  1.597372e-05  4.130241e-05  2.705057e-05  1.192716e-06   
2015-01-02  4.782886e-05  6.080879e-05  5.485623e-05  1.442142e-05   
2015-01-03  1.626123e-04  2.802677e-04  2.174347e-04  7.486135e-05   
2015-01-04  2.265630e-04  4.013857e-04  2.626304e-04  1.496492e-04   
2015-01-05  1.843412e-04  2.907254e-04  1.915380e-04  8.510209e-05   
...                  ...           ...           ...           ...   
2023-12-27  7.610791e-08  1.371262e-06  0.000000e+00  1.049735e-07   
2023-12-28  2.825046e-08  3.075541e-07  4.441612e-07  9.367197e-07   
2023-12-29  1.534531e-07  3.835745e-06  3.443207e-06  1.010297e-06   
2023-12-30  0.000000e+00  4.833523e-08  0.000000e+00  1.123263e-07   
2023-12-31  4.777863e-07  3.287564e-06  1.337083e-06  1.332691e-04   

                    AT22          AT31          AT32     

In [5]:
def make_continuous_threshold_cmap(threshold):
    """
    Création d'une colormap continue :
    - Bleu foncé -> bleu clair -> blanc autour du threshold -> orange -> rouge
    - Beaucoup plus de nuances.
    """
    
    colors = [
        (0.0, (0, 0.15, 0.8)),    # bleu foncé
        (0.40, (0.3, 0.5, 1.0)),  # bleu clair
        (0.50, (1, 1, 1)),        # neutre autour du seuil
        (0.65, (1.0, 0.7, 0.2)),  # orange
        (1.0, (1.0, 0, 0))        # rouge
    ]
    
    cmap = LinearSegmentedColormap.from_list("smooth_blue_red", colors)
    return cmap


In [6]:
#Fonction interactive (analysis corrélation × lag × cumul)

def cross_corr_heatmap(max_lag, window, threshold=0.15):
    lags = range(0, max_lag + 1)
    corr_matrix = pd.DataFrame(index=tp_norm.columns, columns=lags)


    for region in tp_norm.columns:
        tp_wind = tp_norm[region].rolling(window=window).sum()

        for lag in lags:
            tp_lagged = tp_wind.shift(lag)
            df_tmp = pd.concat([tp_lagged, cf_mean], axis=1).dropna()
            corr_matrix.loc[region, lag] = df_tmp.iloc[:, 0].corr(df_tmp["CF"])

    corr_matrix = corr_matrix.astype(float)

    cmap = make_continuous_threshold_cmap(threshold)

    plt.figure(figsize=(14, 10))
    sns.heatmap(
        corr_matrix,
        cmap=cmap,
        vmin=-1, vmax=1,
        center=threshold,
        linewidths=.4
    )

    plt.title(
        f"Corrélation TP cumulées (fenêtre={window} jours) × CF\n"
        f"threshold={threshold:.2f})"
    )
    plt.xlabel("Lag (jours)")
    plt.ylabel("Régions")
    plt.show()

    # régions parasites = aucune corrélation au-dessus du threshold
    parasites = corr_matrix.abs().max(axis=1)
    parasites = parasites[parasites < threshold].index.tolist()

    print(f"\nRégions parasites :{parasites}")
    print()


In [7]:
#interface intéractive
interact(
    cross_corr_heatmap,
    max_lag=IntSlider(value=10, min=1, max=90, step=1,
                      description="Lag max", continuous_update=False),
    window=IntSlider(value=4, min=1, max=40, step=1,
                     description="Cumul (jours)", continuous_update=False)
)

interactive(children=(IntSlider(value=10, continuous_update=False, description='Lag max', max=90, min=1), IntS…

<function __main__.cross_corr_heatmap(max_lag, window, threshold=0.15)>

In [None]:
#chercher le couple (cumul_max, lag_max) qui maximise la corrélation par région au capacity factor

In [11]:
#initialisation des range de fenêtres et de lags
range_window = range(1, 41)  
range_lag = range(0, 31)     

results = []

for region in tp_norm.columns:
    best_corr = -1
    best_params = (None, None)
    
    for w in range_window:
        tp_cumul = tp_norm[region].rolling(window=w).sum()
        
        for l in range_lag:
            tp_shift = tp_cumul.shift(l)
            valid_idx = tp_shift.dropna().index.intersection(cf_mean.index)
            
            if len(valid_idx) > 0:
                current_corr = tp_shift.loc[valid_idx].corr(cf_mean.loc[valid_idx])
                if current_corr > best_corr: #recherche de max 
                    best_corr = current_corr
                    best_params = (w, l)
    
    #stockage des résultats
    results.append({
        'Région': region,
        'Max_Corrélation': best_corr,
        'Meilleur_Cumul_Jours': best_params[0],
        'Meilleur_Lag_Jours': best_params[1]
    })

#dataframe
df_results = pd.DataFrame(results).set_index('Région')
df_results = df_results.sort_values(by='Max_Corrélation', ascending=False)

# Affichage avec une heatmap de style pour visualiser les intensités
display(df_results.style.background_gradient(subset=['Max_Corrélation']))

Unnamed: 0_level_0,Max_Corrélation,Meilleur_Cumul_Jours,Meilleur_Lag_Jours
Région,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
FRC2,0.578009,40,0
FRI2,0.571042,40,0
FRK1,0.561395,40,0
FRI1,0.550905,40,0
FRJ2,0.547936,40,0
FRI3,0.536178,40,0
FRK2,0.521202,40,0
FRB0,0.518533,40,0
FRC1,0.508167,40,0
FRF3,0.462477,40,0
