In [None]:
# Cell 1: Plot Control Settings
SHOW_PLOTS = {
   'initial_prices': True,    
   'returns': True,           
   'spread_vs_theo': True,    
   'spread': True,            
   'trading_bands': True,     
   'signals': True,           
   'performance': True        
}

# Cell 2: Imports
import pandas as pd
import numpy as np
from statsmodels.tsa.stattools import adfuller
import statsmodels.api as sm
import matplotlib.pyplot as plt
from itertools import combinations

# Cell 3: Analysis Functions
def adf_test(data, column):
   if len(data[column]) == 0:
       return 1.0  
   if data[column].nunique() <= 1:
       return 1.0  
   result = adfuller(data[column].dropna())
   return result[1]

def analyze_pair(df, symbol1, symbol2, start_date='2021-01-01'):
   try:
       mask = (df['symbol'].isin([symbol1, symbol2])) & (df['date'] >= start_date)
       df_filtered = df[mask].copy()
       pairs_data = df_filtered.pivot(index='date', columns='symbol', values='close')
       
       if len(pairs_data) < 252:  
           return None
           
       if not all(sym in pairs_data.columns for sym in [symbol1, symbol2]):
           return None
           
       X = pairs_data[symbol2].values
       y = pairs_data[symbol1].values
       
       if np.all(X == X[0]) or np.all(y == y[0]):
           return None
           
       hedgeRatio = round(np.sum(X * y) / np.sum(X * X), 2)
       
       pairs_data[f'{symbol1}_tPrice'] = pairs_data[symbol2] * hedgeRatio
       pairs_data['Spread'] = pairs_data[symbol1] - pairs_data[f'{symbol1}_tPrice']
       
       p_value = adf_test(pairs_data, "Spread")
       is_tradeable = round(p_value,2) < 0.01
       
       if is_tradeable:
           return {
               'pair': (symbol1, symbol2),
               'hedge_ratio': hedgeRatio,
               'p_value': p_value,
               'is_tradeable': is_tradeable,
               'data': pairs_data
           }
       return None
       
   except Exception as e:
       print(f"Error analyzing {symbol1}-{symbol2}: {str(e)}")
       return None

# Cell 4: Trading Band Function
def getTradeBands(prices, rate=50):
   sma = prices.rolling(rate).mean()
   std = prices.rolling(rate).std()
   bandUp = sma + std * 1.5
   bandDown = sma - std * 1.5
   return bandUp, bandDown

# Cell 5: Trading Signal Generation
def pairsTradeStrategy(data, bandDown, bandUp):
   buyPrice = []
   sellPrice = []
   spreadSignal = []
   signal = 0
   
   for i in range(len(data)):
       if i > 0:
           if data.iloc[i-1] > bandDown.iloc[i-1] and data.iloc[i] < bandDown.iloc[i]:
               if signal != 1:
                   buyPrice.append(data.iloc[i])
                   sellPrice.append(np.nan)
                   signal = 1
                   spreadSignal.append(signal)
               else:
                   buyPrice.append(np.nan)
                   sellPrice.append(np.nan)
                   spreadSignal.append(0)
           elif data.iloc[i-1] < bandUp.iloc[i-1] and data.iloc[i] > bandUp.iloc[i]:
               if signal != -1:
                   buyPrice.append(np.nan)
                   sellPrice.append(data.iloc[i])
                   signal = -1
                   spreadSignal.append(signal)
               else:
                   buyPrice.append(np.nan)
                   sellPrice.append(np.nan)
                   spreadSignal.append(0)
           else:
               buyPrice.append(np.nan)
               sellPrice.append(np.nan)
               spreadSignal.append(0)
       else:
           buyPrice.append(np.nan)
           sellPrice.append(np.nan)
           spreadSignal.append(0)
           
   return buyPrice, sellPrice, spreadSignal

# Cell 6: PnL Functions
def openLong(data, index, symbol1, symbol2, position, long):
   entryLongA = data[symbol1].iloc[index]
   entryShortB = data[symbol2].iloc[index]
   return entryLongA, entryShortB, 1, 1

def closeLong(data, index, symbol1, symbol2, position, long):
   exitLongA = data[symbol1].iloc[index]
   exitShortB = data[symbol2].iloc[index]
   return exitLongA, exitShortB, 0, 0

def openShort(data, index, symbol1, symbol2, position, short):
   entryShortA = data[symbol1].iloc[index]
   entryLongB = data[symbol2].iloc[index]
   return entryShortA, entryLongB, 1, 1

def closeShort(data, index, symbol1, symbol2, position, short):
   exitShortA = data[symbol1].iloc[index]
   exitLongB = data[symbol2].iloc[index]
   return exitShortA, exitLongB, 0, 0

# Cell 7: Calculate PnL for a pair - Überarbeitet
def calculate_pair_pnl(pairs_data, symbol1, symbol2, spreadSignal):
    tradeFrame = pairs_data[[symbol1, symbol2]].copy()
    tradeFrame['Signal'] = spreadSignal

    position = 0
    pnl = []
    marginReq = []
    
    entryLongA = entryShortB = entryShortA = entryLongB = 0

    for i in range(len(tradeFrame)):
        current_signal = tradeFrame['Signal'].iloc[i]
        
        if current_signal == 0:
            continue
            
        # Long Position öffnen
        if position == 0 and current_signal == 1:
            entryLongA = tradeFrame[symbol1].iloc[i]
            entryShortB = tradeFrame[symbol2].iloc[i]
            position = 1
            marginReq.append(entryLongA + entryShortB)
            
        # Short Position öffnen
        elif position == 0 and current_signal == -1:
            entryShortA = tradeFrame[symbol1].iloc[i]
            entryLongB = tradeFrame[symbol2].iloc[i]
            position = -1
            marginReq.append(entryShortA + entryLongB)
            
        # Long Position schließen und Short öffnen
        elif position == 1 and current_signal == -1:
            exitLongA = tradeFrame[symbol1].iloc[i]
            exitShortB = tradeFrame[symbol2].iloc[i]
            profit = (exitLongA - entryLongA) - (exitShortB - entryShortB)
            pnl.append(profit)
            
            # Öffne neue Short Position
            entryShortA = exitLongA
            entryLongB = exitShortB
            position = -1
            marginReq.append(entryShortA + entryLongB)
            
        # Short Position schließen und Long öffnen
        elif position == -1 and current_signal == 1:
            exitShortA = tradeFrame[symbol1].iloc[i]
            exitLongB = tradeFrame[symbol2].iloc[i]
            profit = (entryShortA - exitShortA) - (exitLongB - entryLongB)
            pnl.append(profit)
            
            # Öffne neue Long Position
            entryLongA = exitShortA
            entryShortB = exitLongB
            position = 1
            marginReq.append(entryLongA + entryShortB)

    return pnl, marginReq

# Cell 8: Calculate Performance Metrics
def calculate_performance_metrics(pnl, marginReq):
    if not pnl or not marginReq:
        return {
            'total_return': 0,
            'max_drawdown': 0,
            'calmar_ratio': 0,
            'num_trades': 0,
            'win_rate': 0,
            'trade_results': pd.DataFrame()
        }
    
    pnl_array = np.array(pnl)
    margin_array = np.array(marginReq[:len(pnl)])
    
    # Calculate returns and equity curve
    returns = pnl_array / margin_array * 100
    equity = (1 + returns/100).cumprod() * 100
    
    # Performance metrics
    total_return = equity[-1] - 100 if len(equity) > 0 else 0
    drawdowns = 1 - equity / np.maximum.accumulate(equity)
    max_drawdown = np.max(drawdowns) * 100 if len(drawdowns) > 0 else 0
    win_rate = (pnl_array > 0).mean() * 100 if len(pnl_array) > 0 else 0
    
    # Create results DataFrame
    trade_results = pd.DataFrame({
        'PnL': pnl_array,
        'Returns': returns,
        'Equity': equity,
        'Margin': margin_array
    })
    
    metrics = {
        'total_return': total_return,
        'max_drawdown': max_drawdown,
        'calmar_ratio': abs(total_return / max_drawdown) if max_drawdown != 0 else 0,
        'num_trades': len(pnl),
        'win_rate': win_rate,
        'trade_results': trade_results
    }
    
    return metrics

In [None]:
# Cell 9: Main Analysis
# Load data
df = pd.read_parquet('nasdaq_daily.parquet')
symbols = df['symbol'].unique().tolist()
results = []
total_pairs = 0
best_pairs = []

print("Starting pair analysis...")

# Analyze all pairs
for symbol1, symbol2 in combinations(symbols, 2):
    total_pairs += 1
    result = analyze_pair(df, symbol1, symbol2)
    
    if result:
        pairs_data = result['data']
        symbol1, symbol2 = result['pair']
        
        # Calculate trading bands
        spreadPrices = pairs_data['Spread']
        bandUp, bandDown = getTradeBands(spreadPrices)
        
        # Generate signals
        buyPrice, sellPrice, spreadSignal = pairsTradeStrategy(spreadPrices, bandDown, bandUp)
        
        # Calculate PnL and metrics
        pnl, marginReq = calculate_pair_pnl(pairs_data, symbol1, symbol2, spreadSignal)
        
        if pnl:  # Only if we have trades
            metrics = calculate_performance_metrics(pnl, marginReq)
            result.update({
                'metrics': metrics,
                'signals': {
                    'spread': spreadPrices,
                    'bandUp': bandUp,
                    'bandDown': bandDown,
                    'buyPrice': buyPrice,
                    'sellPrice': sellPrice
                }
            })
            best_pairs.append(result)
            
            #print(f"\nAnalyzed pair: {symbol1}-{symbol2}")
            #print(f"Hedge Ratio: {result['hedge_ratio']:.2f}")
            #print(f"Total Return: {metrics['total_return']:.2f}%")
            #print(f"Win Rate: {metrics['win_rate']:.2f}%")
            #print(f"Number of Trades: {metrics['num_trades']}")
    
    if total_pairs % 100 == 0:
        print(f"Processed {total_pairs} pairs...")

# Sort pairs by return
best_pairs.sort(key=lambda x: x['metrics']['total_return'], reverse=True)

print(f"\nTop {min(5, len(best_pairs))} Performing Pairs:")
for idx, pair in enumerate(sorted(best_pairs, 
                                key=lambda x: x['metrics']['total_return'], 
                                reverse=True)[:5], 1):
    symbol1, symbol2 = pair['pair']
    metrics = pair['metrics']
    print(f"\n{idx}. {symbol1}-{symbol2}")
    print(f"Total Return: {metrics['total_return']:.2f}%")
    print(f"Max Drawdown: {metrics['max_drawdown']:.2f}%")
    print(f"Calmar Ratio: {metrics['calmar_ratio']:.2f}")
    print(f"Win Rate: {metrics['win_rate']:.2f}%")
    print(f"Number of Trades: {metrics['num_trades']}")

    if SHOW_PLOTS['signals']:
        # Plot Trading Signals
        plt.figure(figsize=(15,7))
        signals = pair['signals']
        plt.plot(signals['spread'].index, signals['spread'], label='Spread', c='b')
        plt.plot(signals['spread'].index, signals['bandUp'], label='Upper Band', c='g')
        plt.plot(signals['spread'].index, signals['bandDown'], label='Lower Band', c='r')
        plt.scatter(signals['spread'].index, signals['buyPrice'], 
                   marker='^', color='g', label='Buy Signal', s=100)
        plt.scatter(signals['spread'].index, signals['sellPrice'], 
                   marker='v', color='r', label='Sell Signal', s=100)
        plt.title(f'Trading Signals for {symbol1}-{symbol2}')
        plt.legend()
        plt.show()
        
        if SHOW_PLOTS['performance']:
            # Plot Performance
            trade_results = metrics['trade_results']
            plt.figure(figsize=(15,7))
            
            # Equity Curve
            plt.subplot(2,1,1)
            plt.plot(range(len(trade_results)), trade_results['Equity'], label='Equity Curve')
            plt.title(f'Equity Curve {symbol1}-{symbol2}')
            plt.legend()
            
            # PnL per Trade
            plt.subplot(2,1,2)
            plt.bar(range(len(trade_results)), trade_results['PnL'], 
                   color='maroon', width=0.4)
            plt.title('PnL per Trade')
            plt.tight_layout()
            plt.show()
            
            # Return Distribution
            plt.figure(figsize=(10,5))
            plt.hist(trade_results['Returns'], bins=30, color='blue', alpha=0.7)
            plt.title(f'Return Distribution {symbol1}-{symbol2}')
            plt.xlabel('Return (%)')
            plt.ylabel('Frequency')
            plt.show()


# Füge vor dem letzten Print-Statement ein:

print("\n=== OVERALL STRATEGY ANALYSIS ===")

# Performance Metrics
total_returns = [p['metrics']['total_return'] for p in best_pairs]
win_rates = [p['metrics']['win_rate'] for p in best_pairs]
drawdowns = [p['metrics']['max_drawdown'] for p in best_pairs]
trade_counts = [p['metrics']['num_trades'] for p in best_pairs]
calmar_ratios = [p['metrics']['calmar_ratio'] for p in best_pairs]

# Calculate overall metrics
all_trades_pnl = []
all_returns = []
for pair in best_pairs:
    trade_results = pair['metrics']['trade_results']
    all_trades_pnl.extend(trade_results['PnL'].tolist())
    all_returns.extend(trade_results['Returns'].tolist())

print("\n--- Performance Metrics ---")
print(f"Number of Trading Pairs: {len(best_pairs)}")
print(f"Average Return: {np.mean(total_returns):.2f}%")
print(f"Median Return: {np.median(total_returns):.2f}%")
print(f"Best Return: {np.max(total_returns):.2f}%")
print(f"Worst Return: {np.min(total_returns):.2f}%")
print(f"Average Win Rate: {np.mean(win_rates):.2f}%")
print(f"Average Calmar Ratio: {np.mean(calmar_ratios):.2f}")

print("\n--- Risk Metrics ---")
print(f"Average Max Drawdown: {np.mean(drawdowns):.2f}%")
print(f"Worst Drawdown: {np.max(drawdowns):.2f}%")
print(f"Value at Risk (95%): {np.percentile(all_returns, 5):.2f}%")
print(f"Expected Shortfall (95%): {np.mean([r for r in all_returns if r <= np.percentile(all_returns, 5)]):.2f}%")
print(f"Return Volatility: {np.std(all_returns):.2f}%")

print("\n--- Trading Statistics ---")
print(f"Total Number of Trades: {sum(trade_counts)}")
print(f"Average Trades per Pair: {np.mean(trade_counts):.1f}")
print(f"Profit Factor: {np.sum([x for x in all_trades_pnl if x > 0]) / abs(np.sum([x for x in all_trades_pnl if x < 0])):.2f}")
print(f"Average Profit per Trade: {np.mean(all_trades_pnl):.2f}")
print(f"Median Profit per Trade: {np.median(all_trades_pnl):.2f}")

print("\n--- Return Distribution ---")
percentiles = [1, 5, 10, 25, 50, 75, 90, 95, 99]
for p in percentiles:
    print(f"{p}th Percentile Return: {np.percentile(all_returns, p):.2f}%")

# Optional: Portfolio Analysis
if len(best_pairs) > 1:
    print("\n--- Portfolio Analysis ---")
    # Calculate correlation between pair returns
    pair_returns = pd.DataFrame({
        f"{p['pair'][0]}-{p['pair'][1]}": p['metrics']['trade_results']['Returns']
        for p in best_pairs
    })
    avg_corr = pair_returns.corr().values[np.triu_indices_from(pair_returns.corr().values, k=1)].mean()
    print(f"Average Correlation between Pairs: {avg_corr:.2f}")
    
    # Calculate diversification ratio
    portfolio_vol = np.std(pair_returns.mean(axis=1))
    weighted_vol = np.mean([np.std(pair_returns[col]) for col in pair_returns.columns])
    div_ratio = weighted_vol / portfolio_vol
    print(f"Diversification Ratio: {div_ratio:.2f}")

print(f"\nAnalysis complete. Found {len(best_pairs)} tradeable pairs out of {total_pairs} combinations.")