In [None]:
# Cell 1: Plot Control Settings
SHOW_PLOTS = {
    'initial_prices': True,    # Initial price comparison plot
    'returns': True,           # Returns plot
    'spread_vs_theo': True,    # Market vs Theoretical Price plot
    'spread': True,            # Raw spread plot
    'trading_bands': True,     # Spread with trading bands
    'signals': True,           # Trading signals plot
    'performance': True        # Performance and PnL plots
}

# Cell 2: Imports
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from statsmodels.tsa.stattools import adfuller
import statsmodels.api as sm

# Cell 3: Load and prepare data
df = pd.read_parquet('nasdaq_daily.parquet')
symbols = ['MSFT', 'META']
df['date'] = pd.to_datetime(df['date'])
mask = (df['symbol'].isin(symbols)) & (df['date'] >= '2017-01-01')
df_filtered = df[mask].copy()
pairs_data = df_filtered.pivot(index='date', columns='symbol', values='close')

# Validation
print("Data shape:", pairs_data.shape)
print("\nFirst 5 rows:")
print(pairs_data.head())
print("\nDate range:")
print(f"Start: {pairs_data.index.min()}")
print(f"End: {pairs_data.index.max()}")

# Cell 4: Initial visualization
if SHOW_PLOTS['initial_prices']:
    plt.figure(figsize=(12,6))
    pairs_data.plot()
    plt.title('MSFT vs META Close Prices')
    plt.ylabel('Price')
    plt.show()

# Cell 5: Calculate returns and add to dataframe
pairs_data['MSFT_%Return'] = pairs_data['MSFT'].pct_change() * 100
pairs_data['META_%Return'] = pairs_data['META'].pct_change() * 100

# Cell 6: ADF Test function
def adf_test(data, column):
    result = adfuller(data[column].dropna())
    print(f'ADF Test for {column}')
    print('ADF Statistic:', result[0])
    print('p-value:', result[1])
    print('Critical values:')
    for key, value in result[4].items():
        print(f'\t{key}: {value}')
    print('\n')
    return result[1]

# Cell 7: Run analysis and show results
# Statistics
for symbol in symbols:
    col = f'{symbol}_%Return'
    print(f"\n{symbol} Returns:")
    print(pairs_data[col].describe())
    print("\nStationarity Test:")
    p_value = adf_test(pairs_data, col)

# Correlations
print("\nCorrelation Analysis:")
corr_pearson = pairs_data['MSFT_%Return'].corr(pairs_data['META_%Return'], method='pearson')
corr_spearman = pairs_data['MSFT_%Return'].corr(pairs_data['META_%Return'], method='spearman')
print(f'Pearson correlation: {corr_pearson:.4f}')
print(f'Spearman correlation: {corr_spearman:.4f}')

# Cell 8: Plot returns
if SHOW_PLOTS['returns']:
    plt.figure(figsize=(12,6))
    plt.plot(pairs_data.index, pairs_data['MSFT_%Return'], label='MSFT Returns')
    plt.plot(pairs_data.index, pairs_data['META_%Return'], label='META Returns')
    plt.title('Returns Comparison')
    plt.legend()
    plt.show()

# Cell 9: Build Cointegration Model
model = sm.OLS(pairs_data['MSFT'], pairs_data['META'])
model = model.fit()
hedgeRatio = round(model.params[0], 2)

# Calculate theoretical price and spread
pairs_data['MSFT_tPrice'] = pairs_data['META'] * hedgeRatio
pairs_data['Spread'] = pairs_data['MSFT'] - pairs_data['MSFT_tPrice']

# Cell 10: Plot Spread and Prices
if SHOW_PLOTS['spread_vs_theo']:
    plt.figure(figsize=(12,6))
    plt.plot(pairs_data.index, pairs_data['MSFT'], label='Market Price MSFT')
    plt.plot(pairs_data.index, pairs_data['MSFT_tPrice'], label='Theoretical Price MSFT')
    plt.title('Market vs Theoretical Price')
    plt.legend()
    plt.show()

if SHOW_PLOTS['spread']:
    plt.figure(figsize=(12,6))
    plt.plot(pairs_data.index, pairs_data['Spread'], label='Spread')
    plt.title('Price Spread')
    plt.legend()
    plt.show()

# Cell 11: Test Spread Stationarity
print("Testing spread stationarity:")
pValueResidual = adf_test(pairs_data, "Spread")

is_tradeable = round(pValueResidual,2) < 0.05

if is_tradeable:
    print("MSFT and META are suitable for Pairs Trading System")
else:
    print("Check another pair, or change time interval")
    raise SystemExit("Pair is not suitable for trading - adjust parameters or try different pair")

# Cell 12: Calculate Trading Bands
def getTradeBands(prices, rate=50):
    sma = prices.rolling(rate).mean()
    std = prices.rolling(rate).std()
    bandUp = sma + std * 1.5
    bandDown = sma - std * 1.5
    return bandUp, bandDown

spreadPrices = pairs_data['Spread'].copy()
bandUp, bandDown = getTradeBands(spreadPrices)

# Cell 13: Plot Trading Bands
if SHOW_PLOTS['trading_bands']:
    plt.figure(figsize=(12,6))
    plt.plot(pairs_data.index, spreadPrices, label='Spread', c='b')
    plt.plot(pairs_data.index, bandUp, label='Upper Band', c='g')
    plt.plot(pairs_data.index, bandDown, label='Lower Band', c='r')
    plt.title('Spread with Trading Bands')
    plt.legend()
    plt.show()

# Cell 14: Generate Trading Signals
def pairsTradeStrategy(data, bandDown, bandUp):
    buyPrice = []
    sellPrice = []
    spreadSignal = []
    signal = 0
    
    for i in range(len(data)):
        if i > 0:
            if data.iloc[i-1] > bandDown.iloc[i-1] and data.iloc[i] < bandDown.iloc[i]:
                if signal != 1:
                    buyPrice.append(data.iloc[i])
                    sellPrice.append(np.nan)
                    signal = 1
                    spreadSignal.append(signal)
                else:
                    buyPrice.append(np.nan)
                    sellPrice.append(np.nan)
                    spreadSignal.append(0)
            elif data.iloc[i-1] < bandUp.iloc[i-1] and data.iloc[i] > bandUp.iloc[i]:
                if signal != -1:
                    buyPrice.append(np.nan)
                    sellPrice.append(data.iloc[i])
                    signal = -1
                    spreadSignal.append(signal)
                else:
                    buyPrice.append(np.nan)
                    sellPrice.append(np.nan)
                    spreadSignal.append(0)
            else:
                buyPrice.append(np.nan)
                sellPrice.append(np.nan)
                spreadSignal.append(0)
        else:
            buyPrice.append(np.nan)
            sellPrice.append(np.nan)
            spreadSignal.append(0)
            
    return buyPrice, sellPrice, spreadSignal

# Generate signals
buyPrice, sellPrice, spreadSignal = pairsTradeStrategy(spreadPrices, bandDown, bandUp)

# Cell 15: Plot Signals
if SHOW_PLOTS['signals']:
    plt.figure(figsize=(15,7))
    plt.plot(pairs_data.index, spreadPrices, label='Spread', c='b')
    plt.plot(pairs_data.index, bandUp, label='Upper Band', c='g')
    plt.plot(pairs_data.index, bandDown, label='Lower Band', c='r')
    plt.scatter(pairs_data.index, buyPrice, marker='^', color='g', label='Buy Signal', s=100)
    plt.scatter(pairs_data.index, sellPrice, marker='v', color='r', label='Sell Signal', s=100)
    plt.title('Trading Signals')
    plt.legend()
    plt.show()

# Cell 16: Calculate PnL
pairs_data['Signal'] = spreadSignal
tradeFrame = pairs_data[symbols].copy()
tradeFrame['Signal'] = spreadSignal

position = 0
long = 0
short = 0
pnl = []
marginReq = []

def openLong(data, index, position, long):
    entryLongA = data['MSFT'].iloc[index]
    entryShortB = data['META'].iloc[index]
    return entryLongA, entryShortB, 1, 1

def closeLong(data, index, position, long):
    exitLongA = data['MSFT'].iloc[index]
    exitShortB = data['META'].iloc[index]
    return exitLongA, exitShortB, 0, 0

def openShort(data, index, position, short):
    entryShortA = data['MSFT'].iloc[index]
    entryLongB = data['META'].iloc[index]
    return entryShortA, entryLongB, 1, 1

def closeShort(data, index, position, short):
    exitShortA = data['MSFT'].iloc[index]
    exitLongB = data['META'].iloc[index]
    return exitShortA, exitLongB, 0, 0

for i in range(len(tradeFrame)):
    if position == 0:
        if tradeFrame['Signal'].iloc[i] == 1:
            entryLongA, entryShortB, position, long = openLong(tradeFrame, i, position, long)
            marginReq.append(entryLongA + entryShortB*1.5)
            continue
        elif tradeFrame['Signal'].iloc[i] == -1:
            entryShortA, entryLongB, position, short = openShort(tradeFrame, i, position, short)
            marginReq.append(entryShortA*1.5 + entryLongB)
            continue
    elif position == 1:
        if tradeFrame['Signal'].iloc[i] == -1 and long:
            exitLongA, exitShortB, position, long = closeLong(tradeFrame, i, position, long)
            profit = (exitLongA - entryLongA) + (entryShortB - exitShortB)
            pnl.append(round(profit,5))
            entryShortA, entryLongB, position, short = openShort(tradeFrame, i, position, short)
            marginReq.append(entryShortA*1.5 + entryLongB)
            continue
        elif tradeFrame['Signal'].iloc[i] == 1 and short:
            exitShortA, exitLongB, position, short = closeShort(tradeFrame, i, position, short)
            profit = (entryShortA - exitShortA) + (exitLongB - entryLongB)
            pnl.append(round(profit,5))
            entryLongA, entryShortB, position, long = openLong(tradeFrame, i, position, long)
            marginReq.append(entryLongA + entryShortB*1.5)
            continue

# Cell 17: Calculate Performance Metrics
if len(marginReq) > 1:
    marginReq.pop(1)

totalsum = 0
totalcumsum = []

for i in pnl:
    totalsum = totalsum + i
    totalcumsum.append(totalsum)

tradeResults = pd.DataFrame(list(zip(pnl, totalcumsum, marginReq)), 
                          columns=['PnL', 'Cumulative', 'Margin'])
tradeResults['Returns'] = tradeResults['PnL']/tradeResults['Margin']*100
tradeResults['Equity'] = (1 + tradeResults['Returns']/100).cumprod() * 100

# Cell 18: Plot Results
if SHOW_PLOTS['performance']:
    plt.figure(figsize=(15,7))
    plt.subplot(2,1,1)
    plt.plot(tradeResults.index, tradeResults['Equity'], label='Equity Curve')
    plt.title('Equity Curve')
    plt.legend()
    
    plt.subplot(2,1,2)
    plt.bar(tradeResults.index, tradeResults['PnL'], color='maroon', width=0.4)
    plt.title('PnL per Trade')
    plt.tight_layout()
    plt.show()

    # Return distribution plot
    plt.figure(figsize=(15, 7))
    tradeResults['Returns'].hist(bins=20)
    plt.xlabel('Return')
    plt.ylabel('Frequency')
    plt.title('Return Distribution')
    plt.show()

# Cell 19: Calculate and Display Performance Statistics
equity = tradeResults['Equity']
total_return = ((equity.iloc[-1] / equity.iloc[0]) - 1) * 100
drawdown = equity - equity.cummax()
max_drawdown = (drawdown / equity.cummax()).min() * 100
calmar_ratio = total_return / abs(max_drawdown) if max_drawdown != 0 else np.inf

print("Performance Metrics:")
print(f"Total Return: {total_return:.2f}%")
print(f"Maximum Drawdown: {abs(max_drawdown):.2f}%")
print(f"Calmar Ratio: {calmar_ratio:.2f}")
print(f"Number of Trades: {len(pnl)}")
print(f"Win Rate: {(np.array(pnl) > 0).mean()*100:.2f}%")