In [11]:
### Import Functions and Libraries
import yfinance as yf
import pandas as pd
import numpy as np
import statsmodels.api as sm
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings("ignore")
import itertools


In [12]:
### Function to Get Commodities Data
def import_commod_data(tickers, start_date, end_date):
    data = pd.DataFrame()
    for t in tickers:
        data[t] = yf.download(t, start = start_date, end = end_date)['Adj Close']
    data = data.replace([np.inf, -np.inf], np.nan).dropna() # Drop nan and inf values
    return data
# List of all commodities available in yfinance
commodities_list = [
    'ES=F', 'YM=F', 'NQ=F', 'RTY=F', 'ZB=F', 'ZN=F', 'ZF=F', 'ZT=F',
    'GC=F', 'MGC=F', 'SI=F', 'SIL=F', 'PL=F', 'HG=F', 'PA=F', 'CL=F',
    'HO=F', 'NG=F', 'RB=F', 'BZ=F', 'ZC=F', 'ZO=F', 'KE=F',
    'ZR=F', 'ZM=F', 'ZL=F', 'ZS=F', 'GF=F', 'HE=F', 'LE=F', 'CC=F',
    'KC=F', 'CT=F', 'OJ=F', 'SB=F'
]
metals = ['GC=F', 'MGC=F', 'SI=F', 'SIL=F', 'PL=F', 'HG=F', 'PA=F']
energy = ['CL=F', 'HO=F', 'NG=F', 'RB=F', 'BZ=F']
agriculture = ['ZC=F', 'ZO=F', 'KE=F', 'ZR=F', 'ZM=F', 'ZL=F', 'ZS=F', 'GF=F', 'HE=F', 'LE=F', 'CC=F', 'KC=F', 'CT=F', 'OJ=F', 'SB=F']

### Function Call to Load Commodities Data for the Date Range
start_date = '2021-07-01'
end_date = '2024-07-01'
# Download data
data = import_commod_data(agriculture, start_date, end_date)
#print(data.tail())


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%*******

In [13]:
### Create list of combinations of all unique pairs
def get_all_pairs(pairs):
    # Generate all unique combinations of pairs
    unique_pairings = list(itertools.combinations(pairs, 2))
    # Convert each tuple into a list
    unique_pairings = [list(pair) for pair in unique_pairings]
    
    return unique_pairings

all_pairs = get_all_pairs(agriculture)
n = len(agriculture)
total = (n*(n-1))/2
print('The total number of pairs is', len(all_pairs), 'and should be:', str(int(total)))


The total number of pairs is 105 and should be: 105


In [14]:

### Compute and Filter based on P-Values (< 0.05)
# The cointegration test checks whether there is a long-term equilibrium relationship between the two time series
def check_cointegration(series1, series2):
    # Perform the cointegration test from .coint() function
    result = sm.tsa.stattools.coint(series1, series2)  
    # Extract the p-value from the cointegration test results
    p_val = result[1]  
    return p_val

# Create empty array to store pairs that are cointegrated
cointegrated_pairs = []
# Loop through all pairs
for pairing in all_pairs:
    # Get each pair from each pairing -> if pairing = ['ES=F', 'YM=F'], pair1 = 'ES=F' and pair2 = 'YM=F'
    pair1 = pairing[0]
    pair2 = pairing[1]
    # Check for cointegration
    p_value = check_cointegration(data[pair1], data[pair2])
    # Filter based on p-value
    if p_value < 0.05:
        cointegrated_pairs.append(pairing)

#print(cointegrated_pairs)
coint_percentage = len(cointegrated_pairs) / len(all_pairs) * 100
print(f'The total number of cointegrated pairs is {len(cointegrated_pairs)}, giving a total percentage of {round(coint_percentage, 2)}%')
print(cointegrated_pairs)



The total number of cointegrated pairs is 11, giving a total percentage of 10.48%
[['ZC=F', 'KE=F'], ['ZO=F', 'ZR=F'], ['ZO=F', 'CT=F'], ['KE=F', 'ZL=F'], ['KE=F', 'ZS=F'], ['ZR=F', 'ZL=F'], ['ZR=F', 'GF=F'], ['ZR=F', 'LE=F'], ['ZR=F', 'CC=F'], ['ZR=F', 'KC=F'], ['ZR=F', 'CT=F']]
