In [50]:
!pip install joblib tqdm clickhouse-driver pandas numpy statsmodels seaborn matplotlib pyarrow fastparquet yfinance



In [51]:
import yfinance as yf
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from statsmodels.tsa.stattools import adfuller
import statsmodels.api as sm
from itertools import combinations

SHOW_PLOTS = False

dax_tickers = [
    'ADS.DE',  # Adidas AG
    'AIR.DE',  # Airbus SE
    'ALV.DE',  # Allianz SE
    'BAS.DE',  # BASF SE
    'BAYN.DE', # Bayer AG
    'BEI.DE',  # Beiersdorf AG
    'BMW.DE',  # Bayerische Motoren Werke AG (BMW)
    'BNR.DE',  # Brenntag SE
    'CON.DE',  # Continental AG
    '1COV.DE', # Covestro AG
    'DHER.DE', # Delivery Hero SE
    'DBK.DE',  # Deutsche Bank AG
    'DB1.DE',  # Deutsche Börse AG
    'DPSTF',  # Deutsche Post AG
    'DTE.DE',  # Deutsche Telekom AG
    'EOAN.DE', # E.ON SE
    'FRE.DE',  # Fresenius SE & Co. KGaA
    'FME.DE',  # Fresenius Medical Care AG & Co. KGaA
    'HNR1.DE', # Hannover Rück SE
    'HEI.DE',  # HeidelbergCement AG
    'HFG.DE',  # HelloFresh SE
    'HEN3.DE', # Henkel AG & Co. KGaA
    'IFX.DE',  # Infineon Technologies AG
    'LIN.DE',  # Linde plc
    'MBG.DE',  # Mercedes-Benz Group AG
    'MRK.DE',  # Merck KGaA
    'MTX.DE',  # MTU Aero Engines AG
    'MUV2.DE', # Münchener Rückversicherungs-Gesellschaft AG (Munich Re)
    'PUM.DE',  # Puma SE
    'QIA.DE',  # Qiagen N.V.
    'RWE.DE',  # RWE AG
    'SAP.DE',  # SAP SE
    'SRT3.DE', # Sartorius AG
    'SIE.DE',  # Siemens AG
    'SHL.DE',  # Siemens Healthineers AG
    'SY1.DE',  # Symrise AG
    'VOW3.DE', # Volkswagen AG
    'VNA.DE',  # Vonovia SE
    'ZAL.DE'   # Zalando SE
]


all_data = {}

In [52]:
def dataDownloader(ticker):
    df = yf.download(ticker, start="2021-11-15", interval="1d", progress=False)["Close"]
    df.index = pd.to_datetime(df.index, format='%Y/%m/%d').strftime('%Y-%m-%d')
    return df

# Download all data once
for ticker in dax_tickers:
    print(f"Downloading {ticker}")
    all_data[ticker] = dataDownloader(ticker)

Downloading ADS.DE
Downloading AIR.DE
Downloading ALV.DE
Downloading BAS.DE
Downloading BAYN.DE
Downloading BEI.DE
Downloading BMW.DE
Downloading BNR.DE
Downloading CON.DE
Downloading 1COV.DE
Downloading DHER.DE
Downloading DBK.DE
Downloading DB1.DE
Downloading DPSTF
Downloading DTE.DE
Downloading EOAN.DE
Downloading FRE.DE
Downloading FME.DE
Downloading HNR1.DE
Downloading HEI.DE
Downloading HFG.DE
Downloading HEN3.DE
Downloading IFX.DE
Downloading LIN.DE
Downloading MBG.DE
Downloading MRK.DE
Downloading MTX.DE
Downloading MUV2.DE
Downloading PUM.DE
Downloading QIA.DE
Downloading RWE.DE
Downloading SAP.DE
Downloading SRT3.DE
Downloading SIE.DE
Downloading SHL.DE
Downloading SY1.DE
Downloading VOW3.DE
Downloading VNA.DE
Downloading ZAL.DE


In [53]:
def sortData(dataframe):
    if not dataframe.index.is_monotonic_decreasing:
        dataframe.sort_index(inplace=True, ascending=False)
        dataframe.drop(index=dataframe.index[-1], axis=0, inplace=True)
    return dataframe

def detectNull(dataframe, colname):
    if dataframe[colname].isnull().values.any():
        dataframe.loc[:, colname] = dataframe[colname].interpolate(method='linear')
    return dataframe

def detectOutliers(dataframe, colname):
    thres = 3
    mean = np.mean(dataframe[colname])
    std = np.std(dataframe[colname])
    mask = abs((dataframe[colname] - mean) / std) > thres
    if mask.any():
        temp_series = dataframe[colname].copy()
        temp_series[mask] = np.nan
        dataframe.loc[:, colname] = temp_series.interpolate(method='linear')
    return dataframe

def pricePlot(dataframe, colname):
    if SHOW_PLOTS:
        fig, ax = plt.subplots(figsize=(10,8))
        dataframe.plot.line(y=colname, color='crimson', ax=ax)
        plt.ylabel(colname)
        plt.show()

def ADFTest(data, colname):
    if len(data[colname].dropna()) < 30:
        return 1.0
    try:
        adfStats = adfuller(data[colname].dropna(), maxlag=0)
        return round(adfStats[1], 2)
    except:
        return 1.0

In [54]:
def getTradeBands(prices, rate=50):
    sma = prices.rolling(rate).mean()
    std = prices.rolling(rate).std()
    bandUp = sma + std * 1.5
    bandDown = sma - std * 1.5
    return bandUp, bandDown

def pairsTradeStrategy(data, bandDown, bandUp):
    buyPrice = []
    sellPrice = []
    spreadSignal = []
    signal = 0
    
    for i in range(len(data)):
        if i == 0:
            buyPrice.append(np.nan)
            sellPrice.append(np.nan)
            spreadSignal.append(0)
            continue
            
        if data[i-1] > bandDown[i-1] and data[i] < bandDown[i]:
            if signal != 1:
                buyPrice.append(data[i])
                sellPrice.append(np.nan)
                signal = 1
                spreadSignal.append(signal)
            else:
                buyPrice.append(np.nan)
                sellPrice.append(np.nan)
                spreadSignal.append(0)
        elif data[i-1] < bandUp[i-1] and data[i] > bandUp[i]:
            if signal != -1:
                buyPrice.append(np.nan)
                sellPrice.append(data[i])
                signal = -1
                spreadSignal.append(signal)
            else:
                buyPrice.append(np.nan)
                sellPrice.append(np.nan)
                spreadSignal.append(0)
        else:
            buyPrice.append(np.nan)
            sellPrice.append(np.nan)
            spreadSignal.append(0)
            
    return buyPrice, sellPrice, spreadSignal

In [55]:
def analyze_pair(ticker1, ticker2):
    try:
        # Prepare data from cached downloads
        pairsData = pd.DataFrame()
        pairsData[ticker1] = all_data[ticker1]
        pairsData[ticker2] = all_data[ticker2]

        # Data validation
        detectNull(pairsData, ticker1)
        detectNull(pairsData, ticker2)

        first_index1 = pairsData[ticker1].first_valid_index()
        first_index2 = pairsData[ticker2].first_valid_index()
        if first_index1 != first_index2:
            pairsData.dropna(inplace=True)
            
        if len(pairsData) < 30:
            return False, None

        detectOutliers(pairsData, ticker1)
        detectOutliers(pairsData, ticker2)

        pairsData[f"{ticker1}_%Return"] = round(pairsData[ticker1].pct_change(), 4)*100
        pairsData[f"{ticker2}_%Return"] = round(pairsData[ticker2].pct_change(), 4)*100

        sortData(pairsData)

        pValue1 = ADFTest(pairsData, f"{ticker1}_%Return")
        pValue2 = ADFTest(pairsData, f"{ticker2}_%Return")

        if pValue1 == 0.00 and pValue2 == 0.00:
            model = sm.OLS(pairsData[f"{ticker1}_%Return"], pairsData[f"{ticker2}_%Return"]).fit()
            hedgeRatio = round(model.params.iloc[0], 2)
            
            pairsData['Spread'] = pairsData[ticker2] - model.params.iloc[0] * pairsData[ticker1]
            pValueSpread = ADFTest(pairsData, "Spread")

            if pValueSpread == 0.00:
                pearson = pairsData[f"{ticker1}_%Return"].corr(pairsData[f"{ticker2}_%Return"], method='pearson')
                spearman = pairsData[f"{ticker1}_%Return"].corr(pairsData[f"{ticker2}_%Return"], method='spearman')
                kendall = pairsData[f"{ticker1}_%Return"].corr(pairsData[f"{ticker2}_%Return"], method='kendall')

                # Calculate trading metrics for cointegrated pairs
                spreadPrices = pairsData['Spread'].sort_index(ascending=True)
                bandUp, bandDown = getTradeBands(spreadPrices)
                buyPrice, sellPrice, spreadSignal = pairsTradeStrategy(spreadPrices, bandDown, bandUp)
                
                tradeFrame = pairsData[[ticker1, ticker2]].copy().sort_index(ascending=True)
                tradeFrame['Signal'] = spreadSignal

                position = 0
                long = 0
                short = 0
                pnl = []
                marginReq = []
                
                entryLongA = entryShortB = entryShortA = entryLongB = 0

                for i in range(len(tradeFrame['Signal'])):
                    if position == 0:
                        if tradeFrame['Signal'].iloc[i] == 1:
                            entryLongA = tradeFrame[ticker1].iloc[i]
                            entryShortB = tradeFrame[ticker2].iloc[i]
                            position = 1
                            long = 1
                            marginReq.append(entryLongA + entryShortB*1.5)
                            continue
                        elif tradeFrame['Signal'].iloc[i] == -1:
                            entryShortA = tradeFrame[ticker1].iloc[i]
                            entryLongB = tradeFrame[ticker2].iloc[i]
                            position = 1
                            short = 1
                            marginReq.append(entryShortA*1.5 + entryLongB)
                            continue
                    elif position == 1:
                        if tradeFrame['Signal'].iloc[i] == -1 and long:
                            exitLongA = tradeFrame[ticker1].iloc[i]
                            exitShortB = tradeFrame[ticker2].iloc[i]
                            profit = (exitLongA - entryLongA) + (entryShortB - exitShortB)
                            pnl.append(round(profit,5))
                            
                            entryShortA = tradeFrame[ticker1].iloc[i]
                            entryLongB = tradeFrame[ticker2].iloc[i]
                            position = 1
                            long = 0
                            short = 1
                            marginReq.append(entryShortA*1.5 + entryLongB)
                            continue
                            
                        elif tradeFrame['Signal'].iloc[i] == 1 and short:
                            exitShortA = tradeFrame[ticker1].iloc[i]
                            exitLongB = tradeFrame[ticker2].iloc[i]
                            profit = (entryShortA - exitShortA) + (exitLongB - entryLongB)
                            pnl.append(round(profit,5))
                            
                            entryLongA = tradeFrame[ticker1].iloc[i]
                            entryShortB = tradeFrame[ticker2].iloc[i]
                            position = 1
                            short = 0
                            long = 1
                            marginReq.append(entryLongA + entryShortB*1.5)
                            continue

                if len(marginReq) > 1:
                    marginReq.pop(1)
                
                if len(pnl) > 0:
                    totalsum = 0
                    totalcumsum = []
                    
                    for i in pnl:
                        totalsum = totalsum + i
                        totalcumsum.append(totalsum)
                        
                    tradeResults = pd.DataFrame(list(zip(pnl,totalcumsum,marginReq)),
                                              columns=['PnL','Cumulative','Margin'])
                    tradeResults['Returns'] = tradeResults['PnL']/tradeResults['Margin']*100
                    tradeResults['Equity'] = (1 + tradeResults['Returns']/100).cumprod() * 100

                    if SHOW_PLOTS:
                        spreadPrices.plot(figsize=(30,15),label='Spread',c='b')
                        plt.plot(bandUp,label='Up Band', c='g')
                        plt.plot(bandDown,label='Down Band', c='r')
                        plt.scatter(pairsData.sort_index(ascending=True).iloc[0:].index, 
                                   buyPrice, marker = '^', color = 'green', label = 'BUY', s = 200)
                        plt.scatter(pairsData.sort_index(ascending=True).iloc[0:].index, 
                                   sellPrice, marker = 'v', color = 'red', label = 'SELL', s = 200)
                        plt.title(f'Spread Trading Signals for {ticker1}-{ticker2}')
                        plt.legend(loc="upper left")
                        plt.show()

                        plt.figure(figsize=(30,15))
                        plt.plot(tradeResults.index, tradeResults.Equity, label='Equity Curve', c='b')
                        plt.bar(tradeResults.index, tradeResults.PnL, color='maroon', width=0.4)
                        plt.ylabel('Value')
                        plt.title(f'Trading Performance for {ticker1}-{ticker2}')
                        y_min = tradeResults['Equity'].min()
                        y_max = tradeResults['Equity'].max()
                        y_margin = (y_max - y_min) * 0.1
                        plt.ylim(y_min - y_margin, y_max + y_margin)
                        plt.legend()
                        plt.show()

                        plt.figure(figsize=(15, 7))
                        tradeResults['Returns'].hist(bins=20)
                        plt.xlabel('Return')
                        plt.ylabel('Frequency')
                        plt.title(f'Return Distribution for {ticker1}-{ticker2}')
                        plt.show()

                    equity = tradeResults['Equity']
                    total_return = ((equity.iloc[-1] / equity.iloc[0]) - 1) * 100
                    drawdown = equity - equity.cummax()
                    max_drawdown = (drawdown / equity.cummax()).min() * 100
                    calmar_ratio = total_return / abs(max_drawdown)
                    
                    print(f"\nTrading Performance Metrics for {ticker1}-{ticker2}:")
                    print(f"Total Return: {round(total_return,2)}%")
                    print(f"Maximum Drawdown: {abs(round(max_drawdown,2))}%")
                    print(f"Calmar Ratio: {round(calmar_ratio,2)}")
                    print(f"Number of Trades: {len(pnl)}")
                else:
                    print(f"\nNo trades executed for pair {ticker1}-{ticker2}")

                if SHOW_PLOTS:
                    pairsData.Spread.plot(figsize=(8,4))
                    plt.ylabel('Spread')
                    plt.show()

                    pairsData[ticker1].plot(figsize=(30,15), label=f'Market Price {ticker1}')
                    (pairsData[ticker1] * hedgeRatio).plot(figsize=(30,15), label=f'Theoretical Price {ticker1}')
                    plt.legend(loc='upper right', prop={'size':30})
                    plt.ylabel('Price')
                    plt.show()

                return True, (pearson, spearman, kendall, hedgeRatio)
        
        return False, None

    except Exception as e:
        print(f"Error analyzing {ticker1} - {ticker2}: {str(e)}")
        return False, None

In [56]:
successful_pairs = 0
failed_pairs = 0
total_pairs = 0

for ticker1, ticker2 in combinations(dax_tickers, 2):
    total_pairs += 1  
    success, stats = analyze_pair(ticker1, ticker2)
    
    if success:
        successful_pairs += 1
        pearson, spearman, kendall, hedgeRatio = stats
        print(f"Successfully found pair trading opportunity:")
        print(f"Pair: {ticker1} - {ticker2}")
        print(f"Hedge Ratio: {hedgeRatio}")
        print(f"Correlations:")
        print(f"Pearson's: {pearson:.4f}")
        print(f"Spearman's: {spearman:.4f}")
        print(f"Kendall's: {kendall:.4f}")
    else:
        failed_pairs += 1

print("\nAnalysis Complete")
print(f"Total Pairs Tested: {total_pairs}")
print(f"Successful Pairs: {successful_pairs}")
print(f"Failed Pairs: {failed_pairs}")

Error analyzing ADS.DE - CON.DE: exog contains inf or nans
Error analyzing ADS.DE - DHER.DE: exog contains inf or nans
Error analyzing ADS.DE - HFG.DE: exog contains inf or nans
Error analyzing ADS.DE - PUM.DE: exog contains inf or nans
Error analyzing ADS.DE - ZAL.DE: exog contains inf or nans
Error analyzing AIR.DE - CON.DE: exog contains inf or nans
Error analyzing AIR.DE - DHER.DE: exog contains inf or nans
Error analyzing AIR.DE - HFG.DE: exog contains inf or nans
Error analyzing AIR.DE - PUM.DE: exog contains inf or nans
Error analyzing AIR.DE - ZAL.DE: exog contains inf or nans
Error analyzing ALV.DE - CON.DE: exog contains inf or nans
Error analyzing ALV.DE - DHER.DE: exog contains inf or nans
Error analyzing ALV.DE - HFG.DE: exog contains inf or nans
Error analyzing ALV.DE - PUM.DE: exog contains inf or nans
Error analyzing ALV.DE - ZAL.DE: exog contains inf or nans
Error analyzing BAS.DE - CON.DE: exog contains inf or nans
Error analyzing BAS.DE - DHER.DE: exog contains inf o

  if data[i-1] > bandDown[i-1] and data[i] < bandDown[i]:
  elif data[i-1] < bandUp[i-1] and data[i] > bandUp[i]:
  buyPrice.append(data[i])
  sellPrice.append(data[i])
  if data[i-1] > bandDown[i-1] and data[i] < bandDown[i]:
  elif data[i-1] < bandUp[i-1] and data[i] > bandUp[i]:
  buyPrice.append(data[i])
  sellPrice.append(data[i])


Error analyzing CON.DE - HFG.DE: exog contains inf or nans
Error analyzing CON.DE - PUM.DE: exog contains inf or nans
Error analyzing CON.DE - ZAL.DE: exog contains inf or nans
Error analyzing 1COV.DE - DHER.DE: exog contains inf or nans
Error analyzing 1COV.DE - HFG.DE: exog contains inf or nans
Error analyzing 1COV.DE - PUM.DE: exog contains inf or nans
Error analyzing 1COV.DE - ZAL.DE: exog contains inf or nans
Error analyzing DHER.DE - HFG.DE: exog contains inf or nans
Error analyzing DHER.DE - PUM.DE: exog contains inf or nans
Error analyzing DHER.DE - ZAL.DE: exog contains inf or nans
Error analyzing DBK.DE - HFG.DE: exog contains inf or nans
Error analyzing DBK.DE - PUM.DE: exog contains inf or nans
Error analyzing DBK.DE - ZAL.DE: exog contains inf or nans
Error analyzing DB1.DE - HFG.DE: exog contains inf or nans
Error analyzing DB1.DE - PUM.DE: exog contains inf or nans
Error analyzing DB1.DE - ZAL.DE: exog contains inf or nans
Error analyzing DPSTF - HFG.DE: exog contains in

  if data[i-1] > bandDown[i-1] and data[i] < bandDown[i]:
  elif data[i-1] < bandUp[i-1] and data[i] > bandUp[i]:
  buyPrice.append(data[i])
  sellPrice.append(data[i])


Error analyzing EOAN.DE - ZAL.DE: exog contains inf or nans
Error analyzing FRE.DE - HFG.DE: exog contains inf or nans
Error analyzing FRE.DE - PUM.DE: exog contains inf or nans
Error analyzing FRE.DE - ZAL.DE: exog contains inf or nans
Error analyzing FME.DE - HFG.DE: exog contains inf or nans
Error analyzing FME.DE - PUM.DE: exog contains inf or nans
Error analyzing FME.DE - ZAL.DE: exog contains inf or nans
Error analyzing HNR1.DE - HFG.DE: exog contains inf or nans
Error analyzing HNR1.DE - PUM.DE: exog contains inf or nans

Trading Performance Metrics for HNR1.DE-SAP.DE:
Total Return: -24.63%
Maximum Drawdown: 24.63%
Calmar Ratio: -1.0
Number of Trades: 7
Successfully found pair trading opportunity:
Pair: HNR1.DE - SAP.DE
Hedge Ratio: 0.3
Correlations:
Pearson's: 0.2996
Spearman's: 0.2527
Kendall's: 0.1732
Error analyzing HNR1.DE - ZAL.DE: exog contains inf or nans
Error analyzing HEI.DE - HFG.DE: exog contains inf or nans
Error analyzing HEI.DE - PUM.DE: exog contains inf or nans

  if data[i-1] > bandDown[i-1] and data[i] < bandDown[i]:
  elif data[i-1] < bandUp[i-1] and data[i] > bandUp[i]:
  buyPrice.append(data[i])
  sellPrice.append(data[i])


Error analyzing HFG.DE - ZAL.DE: exog contains inf or nans
Error analyzing HEN3.DE - PUM.DE: exog contains inf or nans
Error analyzing HEN3.DE - ZAL.DE: exog contains inf or nans
Error analyzing IFX.DE - PUM.DE: exog contains inf or nans
Error analyzing IFX.DE - ZAL.DE: exog contains inf or nans
Error analyzing LIN.DE - PUM.DE: exog contains inf or nans

Trading Performance Metrics for LIN.DE-SAP.DE:
Total Return: -28.7%
Maximum Drawdown: 28.7%
Calmar Ratio: -1.0
Number of Trades: 11
Successfully found pair trading opportunity:
Pair: LIN.DE - SAP.DE
Hedge Ratio: 0.34
Correlations:
Pearson's: 0.4019
Spearman's: 0.4040
Kendall's: 0.2789
Error analyzing LIN.DE - ZAL.DE: exog contains inf or nans
Error analyzing MBG.DE - PUM.DE: exog contains inf or nans

Trading Performance Metrics for MBG.DE-SAP.DE:
Total Return: -4.04%
Maximum Drawdown: 7.51%
Calmar Ratio: -0.54
Number of Trades: 9
Successfully found pair trading opportunity:
Pair: MBG.DE - SAP.DE
Hedge Ratio: 0.47
Correlations:
Pearson

  if data[i-1] > bandDown[i-1] and data[i] < bandDown[i]:
  elif data[i-1] < bandUp[i-1] and data[i] > bandUp[i]:
  buyPrice.append(data[i])
  sellPrice.append(data[i])
  if data[i-1] > bandDown[i-1] and data[i] < bandDown[i]:
  elif data[i-1] < bandUp[i-1] and data[i] > bandUp[i]:
  buyPrice.append(data[i])
  sellPrice.append(data[i])


Error analyzing MRK.DE - ZAL.DE: exog contains inf or nans
Error analyzing MTX.DE - PUM.DE: exog contains inf or nans
Error analyzing MTX.DE - ZAL.DE: exog contains inf or nans
Error analyzing MUV2.DE - PUM.DE: exog contains inf or nans
Error analyzing MUV2.DE - ZAL.DE: exog contains inf or nans
Error analyzing PUM.DE - ZAL.DE: exog contains inf or nans
Error analyzing QIA.DE - ZAL.DE: exog contains inf or nans
Error analyzing RWE.DE - ZAL.DE: exog contains inf or nans
Error analyzing SAP.DE - ZAL.DE: exog contains inf or nans
Error analyzing SRT3.DE - ZAL.DE: exog contains inf or nans
Error analyzing SIE.DE - ZAL.DE: exog contains inf or nans
Error analyzing SHL.DE - ZAL.DE: exog contains inf or nans
Error analyzing SY1.DE - ZAL.DE: exog contains inf or nans
Error analyzing VOW3.DE - ZAL.DE: exog contains inf or nans
Error analyzing VNA.DE - ZAL.DE: exog contains inf or nans

Analysis Complete
Total Pairs Tested: 741
Successful Pairs: 6
Failed Pairs: 735
