# Pairs Trading Strategy

In [8]:
# Importing of libraries
import pandas as pd
import numpy as np 
import matplotlib.pyplot as plt
import yfinance as yf

## 🧺 Asset Universe Selection/Data preprocessing + cleaning


In [9]:
# Data preprocessing

def normalize_data(ticker1, ticker2, startdate, enddate):
    """
    This function cleans and normalizes the two tickers we wish to pair trade in a given timeframe. 

    Argument:
    ticker1, ticker2 -- the two tickers we want to trade
    date1, date2 -- starting and ending dates of the timeframe

    Returns:
    log_zscore_data -- closing prices for the two tickers, which have been through log transformation and normalization
    
    """
    # Log transformation on the data
    tickers = [ticker1, ticker2]
    data = yf.download(tickers, start=startdate, end=enddate, auto_adjust=True)
    data = data.dropna()
    log_data1 = np.log1p(data["Close"][ticker1])
    log_data2 = np.log1p(data["Close"][ticker2])
    
    # Calculating the z-scores 
    log_zscore_data1 = (log_data1-log_data1.mean())/log_data1.std()
    log_zscore_data2 = (log_data2-log_data2.mean())/log_data2.std()

    return log_zscore_data1, log_zscore_data2


# Example usage 
d1, d2 = normalize_data('AAPL', 'MSFT', '2020-01-01', '2024-12-31')
print (d1.head())

[*********************100%***********************]  2 of 2 completed

Date
2020-01-02   -2.249312
2020-01-03   -2.280818
2020-01-06   -2.255225
2020-01-07   -2.270426
2020-01-08   -2.218962
Name: AAPL, dtype: float64





## 🔍 Cointegration Testing (Engle-Granger)


## 📐 Spread & Z-score Calculation