# Pair Trading of BABA US Equity and 9988 HK Equity

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import statsmodels.api as sm
from xbbg import blp
from datetime import datetime
from statsmodels.api import OLS
from statsmodels.tsa.stattools import coint


def run_pairs_strategy(ticker1, ticker2, start_date, end_date):
    # Clean up ticker names for column names
    def sanitize_ticker(ticker):
        return ticker.replace(' ', '_').replace('.', '').replace('&', 'and').replace('/', '_')

    ticker1_clean = sanitize_ticker(ticker1)
    ticker2_clean = sanitize_ticker(ticker2)

    # Retrieve data
    df1 = blp.bdh(ticker1, 'PX_LAST', start_date, end_date, FX='USD')
    df2 = blp.bdh(ticker2, 'PX_LAST', start_date, end_date, FX='USD')

    # Data processing
    df1.reset_index(inplace=True)
    df2.reset_index(inplace=True)
    df1.columns = ['Date', 'Adj Close']
    df2.columns = ['Date', 'Adj Close']

    df = pd.merge(df1, df2, on='Date', suffixes=('_' + ticker1_clean, '_' + ticker2_clean))
    df.set_index('Date', inplace=True)
    df.sort_index(inplace=True)
    coint_t, pvalue, crit_value = coint(df['Adj Close_' + ticker1_clean], df['Adj Close_' + ticker2_clean])
    print( coint_t, pvalue, crit_value)

    trainset = np.arange(0, 350)
    testset = np.arange(trainset.shape[0], df.shape[0])

    # Build OLS model
    model = sm.OLS(df['Adj Close_' + ticker1_clean].iloc[trainset],
                   df['Adj Close_' + ticker2_clean].iloc[trainset])
    results = model.fit()
    hedgeRatio = results.params

    # Compute spread
    spread = df['Adj Close_' + ticker1_clean] - hedgeRatio[0] * df['Adj Close_' + ticker2_clean]
    spreadMean = np.mean(spread.iloc[trainset])
    spreadStd = np.std(spread.iloc[trainset])

    df['zscore'] = (spread - spreadMean) / spreadStd

    # Positions
    df['positions_' + ticker1_clean + '_Long'] = 0
    df['positions_' + ticker2_clean + '_Long'] = 0
    df['positions_' + ticker1_clean + '_Short'] = 0
    df['positions_' + ticker2_clean + '_Short'] = 0

    df.loc[df.zscore >= 3, ('positions_' + ticker1_clean + '_Short', 'positions_' + ticker2_clean + '_Short')] = [-1, 1]  # Short spread
    df.loc[df.zscore <= -3, ('positions_' + ticker1_clean + '_Long', 'positions_' + ticker2_clean + '_Long')] = [1, -1]  # Buy spread
    df.loc[df.zscore <= 2.5, ('positions_' + ticker1_clean + '_Short', 'positions_' + ticker2_clean + '_Short')] = 0  # Exit short spread
    df.loc[df.zscore >= -2.5, ('positions_' + ticker1_clean + '_Long', 'positions_' + ticker2_clean + '_Long')] = 0  # Exit long spread

    df.fillna(method='ffill', inplace=True)  # Carry forward existing positions unless there is an exit signal

    # Calculate positions and PnL
    positions_Long = df[['positions_' + ticker1_clean + '_Long', 'positions_' + ticker2_clean + '_Long']]
    positions_Short = df[['positions_' + ticker1_clean + '_Short', 'positions_' + ticker2_clean + '_Short']]
    positions = positions_Long.values + positions_Short.values
    positions = pd.DataFrame(positions, columns=[ticker1_clean, ticker2_clean], index=df.index)
    dailyret = df[['Adj Close_' + ticker1_clean, 'Adj Close_' + ticker2_clean]].pct_change()

    pnl = (positions.shift() * dailyret.values).sum(axis=1)
    pnl = pnl - abs(positions - positions.shift()).sum(axis=1) * 0.002 # pnl - cost of transactions at 0.005
    sharpeTrainset = np.sqrt(252) * np.mean(pnl[trainset[1:]]) / np.std(pnl[trainset[1:]])
    sharpeTestset = np.sqrt(252) * np.mean(pnl[testset]) / np.std(pnl[testset])

    # Plot spread and cumulative PnL
    plt.figure(figsize=(12, 6))
    plt.subplot(211)
    plt.title(f"Spread for {ticker1} and {ticker2}")
    plt.plot(spread.iloc[trainset], label='Train Spread')
    plt.plot(spread.iloc[testset], label='Test Spread')
    plt.legend()

    plt.subplot(212)
    plt.title(f"Cumulative PnL for {ticker1} and {ticker2}")
    plt.plot(np.cumsum(pnl[testset]), label='Test PnL')
    plt.legend()
    plt.tight_layout()
    plt.show()

    # Save positions
    positions.to_pickle(f'positions_{ticker1_clean}_{ticker2_clean}.pkl')

    # Return outputs
    return {
        'sharpeTrainset': sharpeTrainset,
        'sharpeTestset': sharpeTestset,
        'positions': positions,
        'pnl': pnl,
        'df': df
    }

# Define your list of pairs
pairs_list = [
    # Hong Kong/China
    ('SILV Comdty', 'SIL US Equity')  # Alibaba Group Holding Ltd.
    #('TCEHY US Equity', '700 HK Equity'),  # Tencent Holdings Ltd.
    #('MNSO US Equity', '9896 HK Equity'),  # MINISO Group Holding Ltd.
    #('JD US Equity', '9618 HK Equity')    # JD.com Inc.
    #('PNGAY US Equity', '2318 HK Equity'), # Ping An Insurance (Group) Co. of China Ltd.
    #('BIDU US Equity', '9888 HK Equity'),  # Baidu Inc.
    #('NTES US Equity', '9999 HK Equity'),  # NetEase Inc.
    #('XIACY US Equity', '1810 HK Equity'), # Xiaomi Corp.
    #('MPNGY US Equity', '3690 HK Equity'), # Meituan Dianping
    #('LI US Equity', '2015 HK Equity'),    # Li Auto Inc.
    #('XPEV US Equity', '9868 HK Equity'),  # XPeng Inc.
    #('TCOM US Equity', '9961 HK Equity'),  # Trip.com Group Ltd.
    #('YUMC US Equity', '9987 HK Equity'),  # Yum China Holdings Inc.
]

start_date = '2020-01-01'  # Adjust the start date as needed
end_date = datetime.today().strftime('%Y-%m-%d')  # Today's date in 'YYYY-MM-DD' format

# Loop over each pair and run the strategy
for ticker1, ticker2 in pairs_list:
    print(f"Processing pair: {ticker1}, {ticker2}")
    results = run_pairs_strategy(ticker1, ticker2, start_date, end_date)
    print(f"Sharpe Ratio (Train): {results['sharpeTrainset']}")
    print(f"Sharpe Ratio (Test): {results['sharpeTestset']}")

In [None]:
# Combine PnLs into a DataFrame
pnl_df = pd.concat(pnl_list, axis=1)
pnl_df.fillna(0, inplace=True)

# Calculate total PnL by averaging PnLs (equal weights)
total_pnl = pnl_df.mean(axis=1)

# Compute cumulative PnL
cumulative_pnl = total_pnl.cumsum()

# Compute cumulative returns
cumulative_returns = (1 + total_pnl).cumprod()

# Number of days
N = len(total_pnl)

# Compute annualized return
annualized_return = (cumulative_returns.iloc[-1]) ** (252 / N) - 1

# Compute annualized volatility
annualized_volatility = total_pnl.std() * np.sqrt(252)

# Compute maximum drawdown
running_max = cumulative_returns.cummax()
drawdown = cumulative_returns / running_max - 1
max_drawdown = drawdown.min()

# Compute Sharpe Ratio for the combined portfolio
sharpe_total = np.sqrt(252) * total_pnl.mean() / total_pnl.std()
print(f"Combined Portfolio Sharpe Ratio: {sharpe_total}")

# Print the annualized performance metrics
print(f"Annualized Return: {annualized_return:.2%}")
print(f"Annualized Standard Deviation (Volatility): {annualized_volatility:.2%}")
print(f"Max Drawdown: {max_drawdown:.2%}")

# Plot cumulative PnL of the combined portfolio
plt.figure(figsize=(12, 6))
plt.title("Cumulative PnL of Combined Portfolio")
plt.plot(cumulative_pnl)
plt.xlabel("Date")
plt.ylabel("Cumulative PnL")
plt.show()