In [15]:
### Library Imports
import yfinance as yf
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import statsmodels.api as sm
import numpy as np
import warnings
warnings.filterwarnings("ignore")

### Resources
# https://www.cmcmarkets.com/en/trading-guides/pairs-trading -> Overview of Pairs Trading
# https://hudsonthames.org/an-introduction-to-cointegration/ -> Cointegration for Pairs Trading


In [16]:
### Function to Commodities Data
# List of all commodities available in yfinance
commodities_list = [
    'ES=F', 'YM=F', 'NQ=F', 'RTY=F', 'ZB=F', 'ZN=F', 'ZF=F', 'ZT=F',
    'GC=F', 'MGC=F', 'SI=F', 'SIL=F', 'PL=F', 'HG=F', 'PA=F', 'CL=F',
    'HO=F', 'NG=F', 'RB=F', 'BZ=F', 'B0=F', 'ZC=F', 'ZO=F', 'KE=F',
    'ZR=F', 'ZM=F', 'ZL=F', 'ZS=F', 'GF=F', 'HE=F', 'LE=F', 'CC=F',
    'KC=F', 'CT=F', 'LBS=F', 'OJ=F', 'SB=F'
]
metals = ['GC=F', 'MGC=F', 'SI=F', 'SIL=F', 'PL=F', 'HG=F', 'PA=F']
metals_test = ['SI=F', 'HG=F']
energy = ['CL=F', 'HO=F', 'NG=F', 'RB=F', 'BZ=F', 'B0=F']
agriculture = ['ZC=F', 'ZO=F', 'KE=F', 'ZR=F', 'ZM=F', 'ZL=F', 'ZS=F', 'GF=F', 'HE=F', 'LE=F', 'CC=F', 'KC=F', 'CT=F', 'LBS=F', 'OJ=F', 'SB=F']

def import_commod_data(tickers, start_date):
    data = pd.DataFrame()
    if len(tickers) == 1:
        data[tickers[0]] = yf.download(tickers[0], start_date)['Adj Close']
    else:
        for t in tickers:
            data[t] = yf.download(t, start_date)['Adj Close']

    # Drop rows with NaN or Inf values
    data = data.replace([np.inf, -np.inf], np.nan).dropna()
    
    return data

start_date = '2023-07-01'
end_date = '2024-07-01'
commod_data = import_commod_data(metals_test, start_date)
commod_data = commod_data.reset_index() # Turn Multi-Index into Column in Pandas 
commod_data.tail()


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Unnamed: 0,Date,SI=F,HG=F
252,2024-07-03,30.548,4.541
253,2024-07-05,31.388,4.6585
254,2024-07-08,30.618,4.622
255,2024-07-09,30.771999,4.578
256,2024-07-10,31.1,4.588


In [17]:
### Function to Compute Spread and Z-Score

def spread_and_zscore(series1, series2):
    spread = series1 - series2
    z_score = (spread - spread.mean()) / spread.std()
    return spread, z_score

### Function Call to Compute Spread and Z-Score
# Define Metals
metal1 = commod_data[metals_test[0]]
metal2 = commod_data[metals_test[1]]
# Function Call
spread, z_score = spread_and_zscore(metal1, metal2)
print('Most recent computed spread is: ', round(spread.iloc[-1], 4))
print('Most recent computed z-score is: ', round(z_score.iloc[-1], 4))


Most recent computed spread is:  26.512
Most recent computed z-score is:  2.3294


In [18]:
### Function to check for cointegration
# The cointegration test checks whether there is a long-term equilibrium relationship between the two time series
def check_cointegration(series1, series2):
    result = sm.tsa.stattools.coint(series1, series2)
    # The second element of the result is the p-value of the cointegration test
    p_val = result[1] 
    return p_val

### Function Call to Compute P Value
# If the p-value is less than a significance level (commonly 0.05), the null hypothesis of no cointegration is rejected, 
# indicating that the series are cointegrated
p_value = check_cointegration(metal1, metal2)

if p_value > 0.05:
    print(
        'The computed p-value is:', round(p_value, 4), 
        'and thus the null hypothesis of no cointegration cannot be rejected, \n'
        'so the assumption of mean reversion may not hold and these pairs should NOT be considered'
    )
elif p_value < 0.05:
    print(
        'The computed p-value is:', round(p_value, 4), 
        'and thus the null hypothesis of no cointegration is rejected, \n'
        'so the assumption of mean reversion holds and these pairs should be considered'
    )


The computed p-value is: 0.0251 and thus the null hypothesis of no cointegration is rejected, 
so the assumption of mean reversion holds and these pairs should be considered


In [19]:
### Calculate Trend Signals Based on Z-Score
''' 
The z-score is used to generate trading signals based on predefined thresholds:
- When the z-score is below -1, it indicates that the spread is significantly below its mean (i.e., one asset 
  is undervalued compared to the other). This triggers a long position in the undervalued asset and a short 
  position in the overvalued asset.
- When the z-score is above 1, it indicates that the spread is significantly above its mean (i.e., one asset 
  is overvalued compared to the other). This triggers a short position in the overvalued asset and a long 
  position in the undervalued asset.
- Positions are exited when the z-score reverts to a value between -0.5 and 0.5, indicating that the spread 
  has reverted to its mean.
'''
# Function to generate trading signals based on z-score
def generate_trading_signals(z_score):
    longs = z_score < -1
    shorts = z_score > 1
    exits = abs(z_score) < 0.5
    return longs, shorts, exits

### Function to calculate positions
def calculate_positions(commod_data):
    positions = pd.DataFrame(index = commod_data.index)


In [20]:
def calculate_position(commod_data, metal1, metal2):
    # Call Positions Function
    positions = calculate_positions(commod_data)
    # Compute p Value
    p_value = check_cointegration(metal1, metal2)
    # Check p value and compute position
    if p_value < 0.05:
        print(
        'The computed p-value is:', round(p_value, 4), 
        'and thus the null hypothesis of no cointegration is rejected, \n'
        'so the assumption of mean reversion holds and these pairs should be considered'
        )
        # Compute spread, z-score, and generate the trading signals
        spread, z_score = spread_and_zscore(metal1, metal2)
        print('Most recent computed spread is: ', round(spread.iloc[-1], 4))
        print('Most recent computed z-score is: ', round(z_score.iloc[-1], 4))

        # Call function to generate trading signals
        longs, shorts, exits = generate_trading_signals(z_score)
        
        positions[f'{metal1}_{metal2}_long'] = 0
        positions[f'{metal1}_{metal2}_short'] = 0
        positions.loc[longs, f'{metal1}_{metal2}_long'] = 1000
        positions.loc[shorts, f'{metal1}_{metal2}_short'] = -1000
        positions.loc[exits, [f'{metal1}_{metal2}_long', f'{metal1}_{metal2}_short']] = 0

        # Carry forward positions
        positions[f'{metal1}_{metal2}_long'] = positions[f'{metal1}_{metal2}_long'].fillna(method='ffill')
        positions[f'{metal1}_{metal2}_short'] = positions[f'{metal1}_{metal2}_short'].fillna(method='ffill')
    else:
        print(f"{metal1} and {metal2} are NOT cointegrated with p-value: {p_value}")
    
    return positions