In [1]:
import pandas as pd 
import numpy as np

In [2]:
bills = pd.read_csv('DTB3.csv')
bills.head()

Unnamed: 0,observation_date,DTB3
0,2016-06-30,0.26
1,2016-07-01,0.27
2,2016-07-04,
3,2016-07-05,0.28
4,2016-07-06,0.27


In [3]:
# Assuming bills is your DataFrame
bills['observation_date'] = pd.to_datetime(bills['observation_date'])
bills.set_index('observation_date', inplace=True)

bills_monthly = bills.resample('M').last()

# Rename index format to add 'ME'
bills_monthly.index = bills_monthly.index.strftime('%Y-%m') + 'ME'

# If you want dates back as a column
bills_monthly = bills_monthly.reset_index()

print(bills_monthly.tail())


    observation_date  DTB3
104        2025-02ME  4.20
105        2025-03ME  4.21
106        2025-04ME  4.20
107        2025-05ME  4.25
108        2025-06ME  4.24


In [4]:


# Drop rows with non-positive values before taking logs
bills = bills[bills['DTB3'] > 0]
# Show rows with zero or negative values
print(bills[bills['DTB3'] <= 0])


Empty DataFrame
Columns: [DTB3]
Index: []


In [5]:
# Apply natural log to the series
bills['log_DTB3'] = np.log(bills['DTB3'])

# First difference of the log
bills['log_diff_DTB3'] = bills['log_DTB3'].diff()

# Drop rows with non-positive values before taking logs
bills = bills[bills['log_diff_DTB3'] > 0]

In [6]:
# First difference
bills_diff = bills_monthly['DTB3'].diff()

bills_log = np.log(bills_monthly['DTB3'])

bills_log_diff = np.log(bills_monthly['DTB3']).diff()

# Optionally, drop NaN values caused by differencing
bills_diff = bills_diff.dropna()
bills_log_diff = bills_log_diff.dropna()

In [7]:
bills.head()

Unnamed: 0_level_0,DTB3,log_DTB3,log_diff_DTB3
observation_date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2016-07-01,0.27,-1.309333,0.03774
2016-07-05,0.28,-1.272966,0.036368
2016-07-07,0.29,-1.237874,0.071459
2016-07-11,0.31,-1.171183,0.13815
2016-07-13,0.31,-1.171183,0.066691


In [11]:
from statsmodels.tsa.stattools import adfuller

# Run ADF test on second difference
adf_result = adfuller(bills_log_diff)

print('ADF Statistic:', adf_result[0])
print('p-value:', adf_result[1])
print('Critical Values:')
for key, value in adf_result[4].items():
    print(f'   {key}: {value}')

ADF Statistic: -4.941317195526755
p-value: 2.8960197787351513e-05
Critical Values:
   1%: -3.4942202045135513
   5%: -2.889485291005291
   10%: -2.5816762131519275


In [133]:
#first difference ok 
#second difference ok 
#log no 
#logdiff ok 

In [134]:
##################################################################################################
#################################################################################################
###################################################################################################

In [27]:
maturity = pd.read_csv('wam_index.csv')
maturity.head()

Unnamed: 0,Month,Weighted Avg Maturity
0,2016-07-01,39.927583
1,2016-08-01,34.917873
2,2016-09-01,41.687256
3,2016-10-01,41.190374
4,2016-11-01,47.418204


In [28]:
maturity.tail()

Unnamed: 0,Month,Weighted Avg Maturity
103,2025-02-01,37.900955
104,2025-03-01,33.568145
105,2025-04-01,27.175088
106,2025-05-01,33.131922
107,2025-06-01,33.032234


In [29]:
# Step 1: Keep only positive (non-zero, non-negative) values
maturity = maturity[maturity['Weighted Avg Maturity'] > 0].copy()

# Step 2: Now apply the log transformation
maturity['log'] = np.log(maturity['Weighted Avg Maturity'])

# Step 3: Get the first difference of the log
maturity['log_diff'] = maturity['log'].diff()

# Drop rows with non-positive values before taking logs
maturity = maturity[maturity['log_diff'] > 0]


In [30]:
# First difference
mat_diff = maturity['Weighted Avg Maturity'].diff()

# Second difference (difference of the first difference)
mat_2diff = mat_diff.diff()

mat_log = np.log(maturity['Weighted Avg Maturity'])

mat_log_diff = mat_log.diff()

# Optionally, drop NaN values caused by differencing
mat_2diff = mat_2diff.dropna()
mat_diff = mat_diff.dropna()
mat_log_diff = mat_log_diff.dropna()

In [31]:
from statsmodels.tsa.stattools import adfuller

# Run ADF test on second difference
adf_result = adfuller(maturity['Weighted Avg Maturity'])

print('ADF Statistic:', adf_result[0])
print('p-value:', adf_result[1])
print('Critical Values:')
for key, value in adf_result[4].items():
    print(f'   {key}: {value}')


ADF Statistic: -3.0125880634655617
p-value: 0.03373543926676156
Critical Values:
   1%: -3.596635636000432
   5%: -2.933297331821618
   10%: -2.6049909750566895


In [32]:
from statsmodels.tsa.stattools import adfuller

max_lag = 6
results = []

for lag in range(1, max_lag+1):
    result = adfuller(maturity['log_diff'], maxlag=lag, autolag=None)
    results.append({
        'lag': lag,
        'ADF Statistic': result[0],
        'p-value': result[1],
        'used_lag': result[2]
    })

import pandas as pd
df_adf = pd.DataFrame(results)
print(df_adf)


   lag  ADF Statistic   p-value  used_lag
0    1      -3.618955  0.005409         1
1    2      -3.659887  0.004714         2
2    3      -3.336459  0.013325         3
3    4      -2.787238  0.060120         4
4    5      -2.807972  0.057144         5
5    6      -1.984235  0.293528         6


In [33]:
adf_result_aic = adfuller(maturity['log_diff'], autolag='AIC')
adf_result_bic = adfuller(maturity['log_diff'], autolag='BIC')

print("ADF AIC-based lag:", adf_result_aic[2])
print("ADF BIC-based lag:", adf_result_bic[2])


ADF AIC-based lag: 1
ADF BIC-based lag: 1


In [34]:
#first difference ok
#log ok 
#log dif ok

In [35]:
from statsmodels.tsa.api import VAR

# Your stationary data (e.g., differenced maturity and T-bill)
data = pd.concat([bills_diff, mat_diff], axis=1).dropna()

model = VAR(data)
results = model.select_order(maxlags=10)

print(results.summary())


 VAR Order Selection (* highlights the minimums)  
       AIC         BIC         FPE         HQIC   
--------------------------------------------------
0       0.9847       1.067       2.677       1.015
1      0.7394*     0.9852*      2.096*     0.8301*
2       0.9026       1.312       2.471       1.054
3       0.9885       1.562       2.703       1.200
4       0.9400       1.677       2.592       1.212
5        1.087       1.988       3.035       1.419
6       0.9489       2.014       2.686       1.342
7        1.042       2.270       3.012       1.495
8        1.027       2.419       3.058       1.540
9        1.088       2.644       3.383       1.662
10       1.163       2.884       3.841       1.798
--------------------------------------------------


  self._init_dates(dates, freq)


In [36]:
combined = pd.concat([bills_diff, mat_diff], axis=1)
combined.columns = ['DTB3', 'Maturity']  # set column names manually
combined.dropna(inplace=True)  # drop rows with NaNs

In [37]:
from statsmodels.tsa.stattools import grangercausalitytests

# Test if 'Maturity_diff2' Granger-causes 'DTB3_diff2'
print("Testing if Maturity_diff Granger-causes DTB3_diff:")
grangercausalitytests(combined[['DTB3', 'Maturity']], maxlag=9)

# Test if 'DTB3_diff2' Granger-causes 'Maturity_diff2'
print("\nTesting if DTB3_diff Granger-causes Maturity_dif:")
grangercausalitytests(combined[['Maturity', 'DTB3']], maxlag=9)


Testing if Maturity_diff Granger-causes DTB3_diff:

Granger Causality
number of lags (no zero) 1
ssr based F test:         F=0.0570  , p=0.8123  , df_denom=49, df_num=1
ssr based chi2 test:   chi2=0.0605  , p=0.8057  , df=1
likelihood ratio test: chi2=0.0605  , p=0.8057  , df=1
parameter F test:         F=0.0570  , p=0.8123  , df_denom=49, df_num=1

Granger Causality
number of lags (no zero) 2
ssr based F test:         F=0.1165  , p=0.8903  , df_denom=46, df_num=2
ssr based chi2 test:   chi2=0.2582  , p=0.8789  , df=2
likelihood ratio test: chi2=0.2576  , p=0.8792  , df=2
parameter F test:         F=0.1165  , p=0.8903  , df_denom=46, df_num=2

Granger Causality
number of lags (no zero) 3
ssr based F test:         F=0.1621  , p=0.9213  , df_denom=43, df_num=3
ssr based chi2 test:   chi2=0.5654  , p=0.9043  , df=3
likelihood ratio test: chi2=0.5622  , p=0.9050  , df=3
parameter F test:         F=0.1621  , p=0.9213  , df_denom=43, df_num=3

Granger Causality
number of lags (no zero) 4
ssr

{1: ({'ssr_ftest': (0.06121564536736052, 0.8056185827357225, 49.0, 1),
   'ssr_chi2test': (0.06496354202250505, 0.7988161966151919, 1),
   'lrtest': (0.06492299634771825, 0.7988776414355475, 1),
   'params_ftest': (0.06121564536736262, 0.8056185827357225, 49.0, 1.0)},
  [<statsmodels.regression.linear_model.RegressionResultsWrapper at 0x7c8801ea3910>,
   <statsmodels.regression.linear_model.RegressionResultsWrapper at 0x7c8801ea3ca0>,
   array([[0., 1., 0.]])]),
 2: ({'ssr_ftest': (0.17931336597010875, 0.8364253583784143, 46.0, 2),
   'ssr_chi2test': (0.3976078984554585, 0.8197105824735283, 2),
   'lrtest': (0.39606598537670834, 0.8203427873774198, 2),
   'params_ftest': (0.17931336597010586, 0.8364253583784165, 46.0, 2.0)},
  [<statsmodels.regression.linear_model.RegressionResultsWrapper at 0x7c8801ee2e90>,
   <statsmodels.regression.linear_model.RegressionResultsWrapper at 0x7c8801ee3010>,
   array([[0., 0., 1., 0., 0.],
          [0., 0., 0., 1., 0.]])]),
 3: ({'ssr_ftest': (0.31399

In [21]:
# first diff results 
#Yes, for lags 1 and 2, DTB3 Granger-causes maturity.
#log results 
# none 
# log diff results 
# none 