In [2]:
import pandas as pd

In [3]:
df = pd.read_csv("base_VAR_trimestrielle.csv")

## test de stationnarité 

In [8]:
from statsmodels.tsa.stattools import adfuller

for col in ['gdp', 'cpi', 'unemp', 'rate_3m', 'rate_10y']:
    result = adfuller(df[col].dropna())
    print(f"\nVariable : {col}")
    print(f"ADF Statistic = {result[0]:.4f}")
    print(f"p-value = {result[1]:.4f}")



Variable : gdp
ADF Statistic = 3.9976
p-value = 1.0000

Variable : cpi
ADF Statistic = 2.4433
p-value = 0.9990

Variable : unemp
ADF Statistic = -2.4510
p-value = 0.1279

Variable : rate_3m
ADF Statistic = -2.5466
p-value = 0.1045

Variable : rate_10y
ADF Statistic = -2.3236
p-value = 0.1645


In [9]:
# différencier les séries (première différence)
df_diff = df.copy()
df_diff.set_index("observation_date", inplace=True)

# Appliquer la première différence à chaque variable
df_diff = df_diff.diff().dropna()

# Test ADF sur les séries différenciées
from statsmodels.tsa.stattools import adfuller

for col in df_diff.columns:
    result = adfuller(df_diff[col].dropna())
    print(f"\nVariable différenciée : {col}")
    print(f"ADF Statistic = {result[0]:.4f}")
    print(f"p-value = {result[1]:.4f}")


Variable différenciée : gdp
ADF Statistic = -4.9856
p-value = 0.0000

Variable différenciée : cpi
ADF Statistic = -4.4091
p-value = 0.0003

Variable différenciée : unemp
ADF Statistic = -12.3937
p-value = 0.0000

Variable différenciée : rate_3m
ADF Statistic = -4.8232
p-value = 0.0000

Variable différenciée : rate_10y
ADF Statistic = -6.1954
p-value = 0.0000


## le model var

In [10]:
from statsmodels.tsa.api import VAR
model = VAR(df_diff)

results = model.fit(1)

print(results.summary())

  Summary of Regression Results   
Model:                         VAR
Method:                        OLS
Date:           Sun, 13, Jul, 2025
Time:                     00:37:00
--------------------------------------------------------------------
No. of Equations:         5.00000    BIC:                    5.96818
Nobs:                     139.000    HQIC:                   5.59221
Log likelihood:          -1326.93    FPE:                    207.495
AIC:                      5.33484    Det(Omega_mle):         167.974
--------------------------------------------------------------------
Results for equation gdp
                 coefficient       std. error           t-stat            prob
------------------------------------------------------------------------------
const              67.921250        32.690167            2.078           0.038
L1.gdp              0.477701         0.167949            2.844           0.004
L1.cpi             21.559370        16.480168            1.308        

  self._init_dates(dates, freq)


In [11]:
# test de la non corelation des résudus 
from statsmodels.stats.stattools import durbin_watson

dw = durbin_watson(results.resid)

for col, val in zip(df_diff.columns, dw):
    print(f"{col} : Durbin-Watson = {val:.3f}")

gdp : Durbin-Watson = 2.018
cpi : Durbin-Watson = 2.109
unemp : Durbin-Watson = 1.923
rate_3m : Durbin-Watson = 2.164
rate_10y : Durbin-Watson = 2.027


donc modèle ne présente pas d'autocorrélation sérieuse dans les résidus.
C’est un bon signe : le modèle VAR est bien spécifié 

In [12]:
A1 = pd.DataFrame(results.coefs[0],
                  index=df_diff.columns,
                  columns=df_diff.columns)


In [13]:
A1

Unnamed: 0,gdp,cpi,unemp,rate_3m,rate_10y
gdp,0.477701,21.55937,255.608998,184.063765,-9.947408
cpi,0.00273,0.185658,0.83708,0.273666,-0.264321
unemp,-0.000357,-0.015737,-0.245442,-0.486483,-0.093409
rate_3m,9.6e-05,-0.017795,-0.001879,0.515793,0.066922
rate_10y,0.000419,-0.008007,0.105296,0.014202,-0.014856


In [14]:
const = pd.Series(results.intercept,
                  index=df_diff.columns,
                  name='const')

In [15]:
const

gdp         67.921250
cpi          0.649757
unemp        0.057962
rate_3m     -0.002213
rate_10y    -0.090516
Name: const, dtype: float64

In [16]:
residual = results.resid.std(axis=0).rename('residual')


In [17]:
residual

gdp         223.210880
cpi           1.409091
unemp         0.659909
rate_3m       0.377266
rate_10y      0.457565
Name: residual, dtype: float64

In [18]:

var = (
    pd.concat([A1, const, residual], axis=1) )
     

print(var)

               gdp        cpi       unemp     rate_3m  rate_10y      const  \
gdp       0.477701  21.559370  255.608998  184.063765 -9.947408  67.921250   
cpi       0.002730   0.185658    0.837080    0.273666 -0.264321   0.649757   
unemp    -0.000357  -0.015737   -0.245442   -0.486483 -0.093409   0.057962   
rate_3m   0.000096  -0.017795   -0.001879    0.515793  0.066922  -0.002213   
rate_10y  0.000419  -0.008007    0.105296    0.014202 -0.014856  -0.090516   

            residual  
gdp       223.210880  
cpi         1.409091  
unemp       0.659909  
rate_3m     0.377266  
rate_10y    0.457565  


In [19]:
len(var)

5

In [20]:
print(var.shape)


(5, 7)


In [None]:
var.to_csv("var.csv", index=True)

In [21]:
import numpy as np
import pandas as pd

# Extraire la matrice de covariance des résidus
sigma_u = results.sigma_u

# Appliquer la décomposition de Cholesky
var1chol = np.linalg.cholesky(sigma_u)

# Sauvegarder en CSV 
chol_df = pd.DataFrame(var1chol, columns=[f'V{i+1}' for i in range(var1chol.shape[1])])
chol_df.to_csv("var1chol.csv", index=False)

print(chol_df)

           V1        V2        V3        V4        V5
0  227.367864  0.000000  0.000000  0.000000  0.000000
1    0.643940  1.282779  0.000000  0.000000  0.000000
2   -0.578446  0.124524  0.318975  0.000000  0.000000
3    0.043549  0.088965  0.021186  0.370702  0.000000
4    0.092636  0.151110  0.029621  0.209089  0.375801


In [1]:
import os
os.getcwd()


'C:\\Users\\hp\\stage 2eme annee\\ESG\\VAR'