In [9]:
### Library Imports
import yfinance as yf
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import statsmodels.api as sm
import numpy as np
import itertools
import warnings
warnings.filterwarnings("ignore")
from statsmodels.tsa.vector_ar.vecm import coint_johansen


In [7]:
### Function to Import Stock Data
def import_stock_data(tickers, start_date, end_date):
    data = pd.DataFrame()
    if len([tickers]) == 1:
        data[tickers] = yf.download(tickers, start_date, end_date)['Adj Close']
        data = pd.DataFrame(data)
    else:
        for t in tickers:
            data[t] = yf.download(tickers, start_date, end_date)['Adj Close']
    
    # Reset index to include the Date as a column
    data = data.reset_index()

    return data

# Import Stock Data
tickers = ['AAPL', 'MSFT', 'GOOG', 'JNJ', 'XOM', 'TSLA', 'JPM', 'UNH', 'NVDA', 'PG']
start_date = '2019-01-01'
end_date = '2024-01-01'
stock_data = import_stock_data(tickers, start_date, end_date)
print(stock_data.tail())

[*********************100%%**********************]  10 of 10 completed


           Date        AAPL        MSFT        GOOG         JNJ         XOM  \
1253 2023-12-22  192.656174  142.209030  150.689331  162.655762  371.799683   
1254 2023-12-26  192.108871  142.308670  151.348450  163.617722  371.879150   
1255 2023-12-27  192.208359  140.933624  151.552017  164.599060  371.293518   
1256 2023-12-28  192.636292  140.774185  151.774963  165.473572  372.494537   
1257 2023-12-29  191.591385  140.425430  151.930054  165.279251  373.248932   

           TSLA         JPM         UNH        NVDA         PG  
1253  48.816162  140.891907  252.539993  512.289185  98.548386  
1254  49.265030  141.531998  256.609985  512.013550  98.770782  
1255  49.402992  141.648376  261.440002  514.730957  98.306618  
1256  49.507965  141.328323  253.179993  516.808472  96.885117  
1257  49.507965  142.113861  248.479996  518.354187  96.682037  


In [8]:
### Compute Direct Hedge Daily Returns
def direct_daily_rets(data):
    # Ensure 'Date' is set as the index for proper computation
    data.set_index('Date', inplace=True)
    
    # Compute the daily returns of the forex data
    rets = data.pct_change().dropna()
    
    # Reset index to make date a column again
    rets.reset_index(inplace=True)

    return rets

### Call Function to Calculate Daily Returns
daily_returns = direct_daily_rets(stock_data)
print(daily_returns.head())


        Date      AAPL      MSFT      GOOG       JNJ       XOM      TSLA  \
0 2019-01-03 -0.099608 -0.028484 -0.015891 -0.014212 -0.036788 -0.060417   
1 2019-01-04  0.042689  0.053786  0.016783  0.036866  0.046509  0.064067   
2 2019-01-07 -0.002226 -0.002167 -0.006415  0.000695  0.001275  0.052941   
3 2019-01-08  0.019063  0.007385  0.023227 -0.001886  0.007251 -0.024896   
4 2019-01-09  0.016982 -0.001505 -0.007926 -0.001691  0.014300  0.019667   

        JPM       UNH      NVDA        PG  
0 -0.007012 -0.031472 -0.027270 -0.015354  
1  0.020411  0.057697  0.011695  0.036870  
2 -0.004000  0.054361  0.001920  0.005200  
3  0.003691  0.001164  0.013371  0.007271  
4 -0.016332  0.009483  0.001439  0.005275  


In [16]:
### Test Cointegration - Johansen Test
# https://medium.com/@cemalozturk/unveiling-cointegration-johansen-test-explained-with-python-examples-db8385219f1f
from statsmodels.tsa.vector_ar.vecm import coint_johansen
import numpy as np

def johansen_test(data, det_order=0, k_ar_diff=1):
    """
    Performs the Johansen cointegration test on multiple assets.

    Parameters:
    data (pandas.DataFrame): DataFrame containing daily returns (excluding date column).
    det_order (int): The order of deterministic terms.
                     -1: No constant or trend.
                      0: Constant term only.
                      1: Constant and trend terms.
    k_ar_diff (int): The number of lags to include in the VAR model.

    Returns:
    result: The JohansenTestResult object containing test statistics and critical values.
    """
    try:
        # Ensure Date column is removed if it exists
        if 'Date' in data.columns:
            data = data.drop(columns=['Date'])

        # Convert to NumPy array
        data_np = data.values

        # Run Johansen cointegration test
        result = coint_johansen(data_np, det_order, k_ar_diff)
        print(f'Johansen Test Results (det_order={det_order})\n')

        # Print test statistics
        print('Trace Statistics:', result.lr1)
        print('Max Eigenvalue Statistics:', result.lr2)
        
        # Print critical values at 1%, 5%, and 10%
        print('\nCritical Values (Trace Test):')
        print(f"1%: {result.cvt[:, 0]}, 5%: {result.cvt[:, 1]}, 10%: {result.cvt[:, 2]}")

        print('\nCritical Values (Max Eigenvalue Test):')
        print(f"1%: {result.cvm[:, 0]}, 5%: {result.cvm[:, 1]}, 10%: {result.cvm[:, 2]}\n")

        # Determine the number of cointegrating relationships
        r = sum(result.lr1 > result.cvt[:, 1])  # Compare trace test stats to 5% critical values
        print(f'Estimated number of cointegrating relationships: {r}')

        return result
    
    except Exception as e:
        print(f'An error occurred during the Johansen test: {e}')
        return None

# Johansen Test Function Return 
result = johansen_test(daily_returns, det_order=1, k_ar_diff=1)
print(result)


Johansen Test Results (det_order=1)

Trace Statistics: [5348.45381306 4671.28495513 4054.04646308 3459.60223303 2902.37188466
 2353.69549231 1827.66653296 1323.50533048  847.45688464  411.538322  ]
Max Eigenvalue Statistics: [677.16885793 617.23849205 594.44423005 557.23034837 548.67639235
 526.02895935 504.16120248 476.04844584 435.91856264 411.538322  ]

Critical Values (Trace Test):
1%: [251.6293 208.3582 169.0618 133.7852 102.4674  75.1027  51.6492  32.0645
  16.1619   2.7055], 5%: [259.0267 215.1268 175.1584 139.278  107.3429  79.3422  55.2459  35.0116
  18.3985   3.8415], 10%: [273.3838 228.2226 187.1891 150.0778 116.9829  87.7748  62.5202  41.0815
  23.1485   6.6349]

Critical Values (Max Eigenvalue Test):
1%: [64.5292 58.5316 52.5858 46.5583 40.5244 34.4202 28.2398 21.8731 15.0006
  2.7055], 5%: [67.904  61.8051 55.7302 49.5875 43.4183 37.1646 30.8151 24.2522 17.1481
  3.8415], 10%: [74.7434 68.503  62.1741 55.8171 49.4095 42.8612 36.193  29.2631 21.7465
  6.6349]

Estimated nu