In [11]:
# Import Required Libraries
import backtrader as bt
import backtrader.feeds as btfeeds
import pandas as pd
import datetime
import numpy as np
import os
import talib

In [12]:
# Install TA-Lib if not available
try:
    import talib
    print("✓ TA-Lib is available")
except ImportError:
    print("Installing TA-Lib...")
    import subprocess
    import sys
    
    # Try to install TA-Lib
    try:
        subprocess.check_call([sys.executable, "-m", "pip", "install", "TA-Lib"])
        import talib
        print("✓ TA-Lib installed successfully")
    except Exception as e:
        print(f"❌ Failed to install TA-Lib: {e}")
        print("💡 Please install TA-Lib manually:")
        print("   conda install -c conda-forge ta-lib")
        print("   or download from: https://www.lfd.uci.edu/~gohlke/pythonlibs/#ta-lib")

✓ TA-Lib is available


In [13]:
long_conditions = {
        'bullish_reversal_patterns': [
            'CDLHAMMER',              # Hammer
            'CDLINVERTEDHAMMER',      # Inverted Hammer
            'CDLMORNINGSTAR',         # Morning Star
            'CDLMORNINGDOJISTAR',     # Morning Doji Star
            'CDLENGULFING',           # Bullish Engulfing
            'CDLPIERCING',            # Piercing Pattern
            'CDLHARAMI',              # Bullish Harami
            'CDLHARAMICROSS',         # Bullish Harami Cross
            'CDLTAKURI',              # Takuri (Dragonfly Doji)
        ],
        
        'bullish_continuation_patterns': [
            'CDL3WHITESOLDIERS',      # Three White Soldiers
            'CDLRISEFALL3METHODS',    # Rising Three Methods
            'CDLMATHOLD',             # Mat Hold
            'CDLSEPARATINGLINES',     # Bullish Separating Lines
            'CDLTASUKIGAP',           # Bullish Tasuki Gap uptrend
        ],
        
        'bullish_bottom_patterns': [
            'CDLABANDONEDBABY',       # Abandoned Baby
            'CDLLADDERBOTTOM',        # Ladder Bottom
            'CDLMATCHINGLOW',         # Matching Low
            'CDLUNIQUE3RIVER',        # Unique Three River
        ],
        
        'bullish_special_patterns': [
            'CDL3INSIDE',             # Three Inside Up
            'CDL3OUTSIDE',            # Three Outside Up
            'CDLBELTHOLD',            # Belt Hold
            'CDLBREAKAWAY',           # Breakaway
            'CDLKICKING',             # Kicking
            'CDLKICKINGBYLENGTH',     # Kicking By Length
            'CDLSTICKSANDWICH',       # Stick Sandwich
        ]
    }

In [14]:
# sell_conditions = {
#         'bearish_reversal_patterns': [
#             'CDLSHOOTINGSTAR',        # Shooting Star
#             'CDLHANGINGMAN',          # Hanging Man
#             'CDLEVENINGSTAR',         # Evening Star
#             'CDLEVENINGDOJISTAR',     # Evening Doji Star
#             'CDLENGULFING',           # Bearish Engulfing (negative signal)
#             'CDLDARKCLOUDCOVER',      # Dark Cloud Cover
#             'CDLGRAVESTONEDOJI',      # Gravestone Doji
#             'CDLHARAMI',              # Bearish Harami (negative signal)
#             'CDLHARAMICROSS',         # Bearish Harami Cross (negative signal)
#         ],
        
#         'bearish_continuation_patterns': [
#             'CDL3BLACKCROWS',         # Three Black Crows
#             'CDLFALL3METHODS',        # Falling Three Methods
#             'CDLIDENTICAL3CROWS',     # Identical Three Crows
#             'CDLADVANCEBLOCK',        # Advance Block
#             'CDLSTALLEDPATTERN',      # Stalled Pattern
#         ],
        
#         'bearish_top_patterns': [
#             'CDL2CROWS',              # Two Crows
#             'CDLUPSIDEGAP2CROWS',     # Upside Gap Two Crows
#             'CDLXSIDEGAP3METHODS',    # Upside/Downside Gap Three Methods
#         ],
        
#         'bearish_special_patterns': [
#             'CDL3INSIDE',             # Three Inside Down
#             'CDL3OUTSIDE',            # Three Outside Down
#             'CDL3LINESTRIKE',         # Three Line Strike (bearish context)
#             'CDLINNECK',              # In-Neck Line
#             'CDLONNECK',              # On-Neck Line
#             'CDLTHRUSTING',           # Thrusting Pattern
#             'CDLBELTHOLD',            # Belt Hold (bearish context)
#             'CDLBREAKAWAY',           # Breakaway (bearish context)
#         ]
#     }

In [None]:
# # Strategy Candlestick Patterns - Practical Implementation

# class Long_Candlesticks(bt.Strategy):
#     """
#     Strategy focusing on long candlestick patterns for trade entries
#     Entry signals based on bullish patterns using TA-Lib candlestick functions
    
#     Stop-loss and take-profit based on ATR

#     Ideally suited for swing trading with signals on 1H, 4H charts

#     """
    
#     params = (
#         ('printlog', False),
#         ('atr_period', 14),
#         ('atr_sl_multiplier', 2.0),
#         ('atr_tp_multiplier', 3.0),
#         ('fixed_atr', 40),  # Fixed ATR value in points

#         # to check here
#         ('lookback_period', 50),  # Number of bars to look back for pattern detection
#     )

#     def __init__(self):
#         # ATR for stop-loss and take-profit calculations
#         self.atr = self.p.fixed_atr

#         self.stop_loss = None
#         self.take_profit = None
        
#         self.order = None
#         self.trade_count = 0
        
#         # Store OHLC data for TA-Lib calculations
#         self.open_data = []
#         self.high_data = []
#         self.low_data = []
#         self.close_data = []

#     def log(self, txt, dt=None, doprint=False):
#         if self.params.printlog or doprint:
#             dt = dt or self.datas[0].datetime.date(0)
#             print(f'{dt.isoformat()}, {txt}')

#     def notify_order(self, order):
#         if order.status in [order.Completed]:
#             if order.isbuy():
#                 self.log(f'BUY EXECUTED, Price: {order.executed.price:.2f}')
#             else:
#                 self.log(f'SELL EXECUTED, Price: {order.executed.price:.2f}')
#         self.order = None

#     def notify_trade(self, trade):
#         if not trade.isclosed:
#             return
#         self.trade_count += 1
#         self.log(f'TRADE #{self.trade_count}: PROFIT {trade.pnl:.2f}', doprint=True)

#     def check_bullish_patterns(self, open_prices, high_prices, low_prices, close_prices):
#         """Check for bullish candlestick patterns using TA-Lib"""
#         bullish_signals = []
        
#         try:
#             # Convert to numpy arrays
#             open_arr = np.array(open_prices, dtype=np.float64)
#             high_arr = np.array(high_prices, dtype=np.float64)
#             low_arr = np.array(low_prices, dtype=np.float64)
#             close_arr = np.array(close_prices, dtype=np.float64)
            
#             # Check bullish reversal patterns
#             hammer = talib.CDLHAMMER(open_arr, high_arr, low_arr, close_arr)
#             inverted_hammer = talib.CDLINVERTEDHAMMER(open_arr, high_arr, low_arr, close_arr)
#             morning_star = talib.CDLMORNINGSTAR(open_arr, high_arr, low_arr, close_arr)
#             morning_doji = talib.CDLMORNINGDOJISTAR(open_arr, high_arr, low_arr, close_arr)
#             engulfing = talib.CDLENGULFING(open_arr, high_arr, low_arr, close_arr)
#             piercing = talib.CDLPIERCING(open_arr, high_arr, low_arr, close_arr)
#             harami = talib.CDLHARAMI(open_arr, high_arr, low_arr, close_arr)

#             # added patterns
#             takuri = talib.CDLTAKURI(open_arr, high_arr, low_arr, close_arr)  # Takuri (Dragonfly Doji)
#             rise_fall = talib.CDLRISEFALL3METHODS(open_arr, high_arr, low_arr, close_arr)  # Rising Three Methods
#             mat_hold = talib.CDLMATHOLD(open_arr, high_arr, low_arr, close_arr)  # Mat Hold
#             separating_lines = talib.CDLSEPARATINGLINES(open_arr, high_arr, low_arr, close_arr)  # Bullish Separating Lines
#             tasuki_gap = talib.CDLTASUKIGAP(open_arr, high_arr, low_arr, close_arr)  # Bullish Tasuki Gap uptrend
#             abandoned_baby = talib.CDLABANDONEDBABY(open_arr, high_arr, low_arr, close_arr)  # Abandoned Baby
#             ladder_bottom = talib.CDLLADDERBOTTOM(open_arr, high_arr, low_arr, close_arr)  # Ladder Bottom
#             matching_low = talib.CDLMATCHINGLOW(open_arr, high_arr, low_arr, close_arr)  # Matching Low
#             unique_three_river = talib.CDLUNIQUE3RIVER(open_arr, high_arr, low_arr, close_arr)  # Unique Three River
#             three_inside = talib.CDL3INSIDE(open_arr, high_arr, low_arr, close_arr)  # Three Inside Up
#             three_outside = talib.CDL3OUTSIDE(open_arr, high_arr, low_arr, close_arr)  # Three Outside Up
#             belt_hold = talib.CDLBELTHOLD(open_arr, high_arr, low_arr, close_arr)  # Belt Hold
#             breakaway = talib.CDLBREAKAWAY(open_arr, high_arr, low_arr, close_arr)  # Breakaway
#             kicking = talib.CDLKICKING(open_arr, high_arr, low_arr, close_arr)  # Kicking
#             kicking_by_length = talib.CDLKICKINGBYLENGTH(open_arr, high_arr, low_arr, close_arr)  # Kicking By Length
#             sticks_sandwich = talib.CDLSTICKSANDWICH(open_arr, high_arr, low_arr, close_arr)  # Stick Sandwich

#             # Check bullish continuation patterns
#             three_white_soldiers = talib.CDL3WHITESOLDIERS(open_arr, high_arr, low_arr, close_arr)
            
#             # Combine all bullish signals (positive values indicate bullish patterns)
#             patterns = [hammer, inverted_hammer, morning_star, morning_doji, 
#                        engulfing, piercing, harami, three_white_soldiers, 
#                        takuri, rise_fall, mat_hold, separating_lines, tasuki_gap, 
#                        abandoned_baby, ladder_bottom, matching_low, unique_three_river, 
#                        three_inside, three_outside, belt_hold, breakaway, kicking, 
#                        kicking_by_length, sticks_sandwich]

#             # Check if any pattern shows a bullish signal (> 0) in the last bar
#             for pattern in patterns:
#                 if len(pattern) > 0 and pattern[-1] > 0:
#                     bullish_signals.append(pattern[-1])
            
#             return len(bullish_signals) > 0
            
#         except Exception as e:
#             self.log(f"Pattern detection error: {e}")
#             return False

#     def next(self):
#         # Store current OHLC data
#         self.open_data.append(float(self.data.open[0]))
#         self.high_data.append(float(self.data.high[0]))
#         self.low_data.append(float(self.data.low[0]))
#         self.close_data.append(float(self.data.close[0]))
        
#         # Keep only the last lookback_period bars
#         if len(self.close_data) > self.p.lookback_period:
#             self.open_data = self.open_data[-self.p.lookback_period:]
#             self.high_data = self.high_data[-self.p.lookback_period:]
#             self.low_data = self.low_data[-self.p.lookback_period:]
#             self.close_data = self.close_data[-self.p.lookback_period:]
#         # Wait for enough data for pattern detection
#         # Wait for enough data and ATR to stabilize
#         if len(self.close_data) < 30 or len(self.data) < 60:
#             return

#         if self.order:
#             return

#         if not self.position:
#             # Check for bullish candlestick patterns
#             if self.check_bullish_patterns(self.open_data, self.high_data, 
#                                          self.low_data, self.close_data):
#                 self.log('BUY SIGNAL: Bullish candlestick pattern detected')
#                 self.order = self.buy()
#                 # Set stop-loss and take-profit based on fixed ATR
#                 self.stop_loss = self.data.close[0] - (self.atr * self.p.atr_sl_multiplier)
#                 self.take_profit = self.data.close[0] + (self.atr * self.p.atr_tp_multiplier)
#                 self.log(f'SET SL: {self.stop_loss:.2f}, TP: {self.take_profit:.2f}')
#         else:
#             # Manage existing position with ATR-based stop-loss and take-profit
#             if self.data.close[0] <= self.stop_loss:
#                 self.log('STOP-LOSS HIT')
#                 self.order = self.sell()
#             elif self.data.close[0] >= self.take_profit:
#                 self.log('TAKE-PROFIT HIT')
#                 self.order = self.sell()

#     def stop(self):
#         self.log(f'Long_Candlesticks Strategy: {self.trade_count} trades, Final Value: ${self.broker.getvalue():.2f}', doprint=True)

# print("✓ Long_Candlesticks Strategy created - using TA-Lib candlestick patterns!")

✓ Long_Candlesticks Strategy created - using TA-Lib candlestick patterns!


In [None]:
# # Backtest Function for Long_Candlesticks Strategy

# def test_long_candlesticks_strategy():
#     """Test the Long_Candlesticks strategy with S&P 500 data"""
#     print("=== TESTING Long_Candlesticks ===\n")
    
#     cerebro = bt.Cerebro()
    
#     # Load S&P 500 data
#     spy_file = r'../10_backtrader\01_CDL_Patterns_Strategy\01_data\SP500_generating_synthetic_financial_return_series\SP500_bootstrap_prices.csv'

#     df = pd.read_csv(spy_file, skiprows=3, header=None)
#     df.columns = ['Date', 'Open', 'High', 'Low', 'Close', 'Volume']
#     df['Date'] = pd.to_datetime(df['Date'])
#     df = df.sort_values('Date').dropna()
    
#     # Use last 20 years for testing
#     end_date = df['Date'].max()
#     start_date = end_date - pd.DateOffset(years=20)
#     df_recent = df[df['Date'] >= start_date].copy()
    
#     print(f"📊 Data: {len(df_recent)} trading days from {start_date.date()} to {end_date.date()}")
    
#     # Create temporary file for backtrader
#     temp_file = 'temp_simplified_test.csv'
#     df_recent.to_csv(temp_file, index=False)
    
#     # Create data feed
#     data = btfeeds.GenericCSVData(
#         dataname=temp_file,
#         dtformat=('%Y-%m-%d'),
#         datetime=0, open=1, high=2, low=3, close=4, volume=5,
#         openinterest=-1, headers=True,
#     )

#     # Add data and strategy
#     cerebro.adddata(data)
#     cerebro.addstrategy(Long_Candlesticks, printlog=False)
    
#     # Set initial capital and commission
#     initial_capital = 100000.0
#     cerebro.broker.setcash(initial_capital)
#     cerebro.broker.setcommission(commission=0.001)  # 0.1% commission
    
#     # Add analyzers
#     cerebro.addanalyzer(bt.analyzers.TradeAnalyzer, _name="trades")
#     cerebro.addanalyzer(bt.analyzers.SharpeRatio, _name="sharpe")
#     cerebro.addanalyzer(bt.analyzers.DrawDown, _name="drawdown")

#     print(f"💰 Starting Capital: ${initial_capital:,.2f}")
#     print("🚀 Running backtest...\n")
    
#     # Run backtest
#     results = cerebro.run()
    
#     # Clean up temporary file
#     if os.path.exists(temp_file):
#         os.remove(temp_file)
    
#     # Calculate results
#     final_value = cerebro.broker.getvalue()
#     total_return = ((final_value - initial_capital) / initial_capital) * 100
    
#     print(f"\n=== BACKTEST RESULTS ===")
#     print(f"💰 Final Portfolio Value: ${final_value:,.2f}")
#     print(f"📈 Total Return: {total_return:.2f}%")
#     print(f"💵 Net Profit: ${final_value - initial_capital:,.2f}")
    
#     # Analyze trades
#     strat = results[0]
#     trade_analysis = strat.analyzers.trades.get_analysis()
    
#     if trade_analysis and hasattr(trade_analysis, 'total'):
#         total_trades = getattr(trade_analysis.total, 'total', 0)
#         print(f"\n=== TRADE ANALYSIS ===")
#         print(f"📊 Total Trades: {total_trades}")
        
#         # Winning trades
#         if hasattr(trade_analysis, 'won'):
#             won_trades = getattr(trade_analysis.won, 'total', 0)
#             won_pnl = getattr(trade_analysis.won, 'pnl', {}).get('total', 0)
#             win_rate = (won_trades / total_trades * 100) if total_trades > 0 else 0
#             print(f"✅ Winning Trades: {won_trades} ({win_rate:.1f}%)")
#             if won_pnl:
#                 print(f"💰 Winning PnL: ${won_pnl:.2f}")
        
#         # Losing trades  
#         if hasattr(trade_analysis, 'lost'):
#             lost_trades = getattr(trade_analysis.lost, 'total', 0)
#             lost_pnl = getattr(trade_analysis.lost, 'pnl', {}).get('total', 0)
#             print(f"❌ Losing Trades: {lost_trades}")
#             if lost_pnl:
#                 print(f"💸 Losing PnL: ${lost_pnl:.2f}")
    
#     # Performance metrics
#     print(f"\n=== PERFORMANCE METRICS ===")
    
#     # Sharpe Ratio
#     sharpe_analysis = strat.analyzers.sharpe.get_analysis()
#     if sharpe_analysis and 'sharperatio' in sharpe_analysis:
#         sharpe_ratio = sharpe_analysis['sharperatio'] or 0
#         print(f"📊 Sharpe Ratio: {sharpe_ratio:.4f}")

#     # Drawdown Analysis
#     drawdown_analysis = strat.analyzers.drawdown.get_analysis()
#     if drawdown_analysis:
#         max_dd = drawdown_analysis.get('max', {}).get('drawdown', 0)
#         print(f"📉 Max Drawdown: {max_dd:.2f}%")
    
#     return results, total_trades, final_value

# print("✓ Backtest function ready for Long_Candlesticks")

✓ Backtest function ready for Long_Candlesticks


In [None]:
# # Execute Long_Candlesticks Backtest
# try:
#     results, trade_count, final_value = test_long_candlesticks_strategy()
    
#     print(f"\n🎉 BACKTEST COMPLETED SUCCESSFULLY!")
#     print(f"📊 Summary: {trade_count} trades, Final Value: ${final_value:,.2f}")
    
#     if trade_count > 10:
#         print("✅ Strategy shows active trading with multiple trades")
#     elif trade_count > 1:
#         print("📈 Strategy executed multiple trades")
#     else:
#         print("⚠️ Strategy was very selective with few trades")
        
# except Exception as e:
#     print(f"❌ Error running backtest: {str(e)}")
#     print("💡 Make sure the S&P 500 data file exists at the specified path")

=== TESTING Long_Candlesticks ===

❌ Error running backtest: [Errno 2] No such file or directory: '../10_backtrader\\01_CDL_Patterns_Strategy\\01_data\\SP500_generating_synthetic_financial_return_series\\SP500_bootstrap_prices.csv'
💡 Make sure the S&P 500 data file exists at the specified path


# Step 1: Synthetic Data Preparation for Multiple Backtests

This section prepares synthetic price data for running multiple backtests on randomly selected price series.

In [18]:
# Step 1: Load and Prepare Synthetic Data for Multiple Backtests

import random
import pandas as pd
import numpy as np
from datetime import datetime, timedelta

def load_synthetic_data():
    """Load synthetic price data and prepare for backtesting"""
    print("=== STEP 1: SYNTHETIC DATA PREPARATION ===\n")
    
    # Load the synthetic data file - using absolute path from current location
    spy_file = r'01_data\SP500_generating_synthetic_financial_return_series\SP500_bootstrap_prices.csv'
    
    try:
        # Read the CSV file
        df = pd.read_csv(spy_file)
        print(f"✅ Successfully loaded synthetic data: {df.shape[0]} rows, {df.shape[1]} columns")
        
        # Display basic info about the data
        print(f"📊 Data shape: {df.shape}")
        print(f"🔢 Available price series: {df.shape[1]} columns")
        print(f"📅 Time period: {df.shape[0]} days")
        
        # Show first few column names to verify structure
        print(f"\n📋 First 10 columns: {list(df.columns[:10])}")
        print(f"📋 Last 5 columns: {list(df.columns[-5:])}")
        
        return df
        
    except FileNotFoundError:
        print(f"❌ Error: Could not find file at {spy_file}")
        print("💡 Please make sure the synthetic data file exists at the specified path")
        return None
    except Exception as e:
        print(f"❌ Error loading data: {str(e)}")
        return None

def select_random_columns(df, num_series=10, seed=42):
    """Randomly select columns from the synthetic data"""
    if df is None:
        return None, None
    
    # Set random seed for reproducibility
    random.seed(seed)
    np.random.seed(seed)
    
    # Get all price columns (excluding index if present)
    price_columns = [col for col in df.columns if col.startswith('price_boot_')]
    
    if len(price_columns) < num_series:
        print(f"⚠️ Warning: Only {len(price_columns)} price columns available, selecting all")
        selected_columns = price_columns
    else:
        # Randomly select specified number of columns
        selected_columns = random.sample(price_columns, num_series)
    
    print(f"\n🎲 Randomly selected {len(selected_columns)} price series:")
    for i, col in enumerate(selected_columns, 1):
        # Extract series number from column name
        series_num = col.replace('price_boot_', '')
        print(f"   {i:2d}. {col} (Series #{series_num})")
    
    return selected_columns, price_columns

def create_date_index(num_days, start_date='2024-09-30'):
    """Create synthetic date index starting from specified date"""
    start = datetime.strptime(start_date, '%Y-%m-%d')
    dates = [start + timedelta(days=i) for i in range(num_days)]
    return dates

def prepare_synthetic_data():
    """Main function to prepare all synthetic data"""
    print("🚀 Starting synthetic data preparation...\n")
    
    # Load the data
    df = load_synthetic_data()
    if df is None:
        return None, None, None
    
    # Select random columns
    selected_columns, all_price_columns = select_random_columns(df, num_series=10)
    if selected_columns is None:
        return None, None, None
    
    # Create date index
    dates = create_date_index(len(df))
    print(f"\n📅 Created date index from {dates[0].strftime('%Y-%m-%d')} to {dates[-1].strftime('%Y-%m-%d')}")
    
    # Extract selected price series with date index
    selected_data = df[selected_columns].copy()
    selected_data.index = dates
    
    print(f"\n✅ Data preparation completed successfully!")
    print(f"📊 Final dataset shape: {selected_data.shape}")
    print(f"📈 Price range across all series:")
    print(f"   Min: ${selected_data.min().min():.2f}")
    print(f"   Max: ${selected_data.max().max():.2f}")
    print(f"   Mean: ${selected_data.mean().mean():.2f}")
    
    return selected_data, selected_columns, dates

# Execute Step 1
synthetic_data, selected_series, date_index = prepare_synthetic_data()

🚀 Starting synthetic data preparation...

=== STEP 1: SYNTHETIC DATA PREPARATION ===

✅ Successfully loaded synthetic data: 2160 rows, 1001 columns
📊 Data shape: (2160, 1001)
🔢 Available price series: 1001 columns
📅 Time period: 2160 days

📋 First 10 columns: ['Unnamed: 0', 'price_boot_0', 'price_boot_1', 'price_boot_2', 'price_boot_3', 'price_boot_4', 'price_boot_5', 'price_boot_6', 'price_boot_7', 'price_boot_8']
📋 Last 5 columns: ['price_boot_995', 'price_boot_996', 'price_boot_997', 'price_boot_998', 'price_boot_999']

🎲 Randomly selected 10 price series:
    1. price_boot_654 (Series #654)
    2. price_boot_114 (Series #114)
    3. price_boot_25 (Series #25)
    4. price_boot_759 (Series #759)
    5. price_boot_281 (Series #281)
    6. price_boot_250 (Series #250)
    7. price_boot_228 (Series #228)
    8. price_boot_142 (Series #142)
    9. price_boot_754 (Series #754)
   10. price_boot_104 (Series #104)

📅 Created date index from 2024-09-30 to 2030-08-29

✅ Data preparation comp

# Step 2: OHLCV Generation from Single Price Series

This section creates realistic OHLCV (Open, High, Low, Close, Volume) data from single closing price series for candlestick pattern analysis.

In [19]:
# Step 2: Generate OHLCV Data from Single Price Series

def generate_ohlcv_from_prices(price_series, volatility_factor=0.02, volume_base=1000000):
    """
    Generate realistic OHLCV data from a single price series
    
    Parameters:
    -----------
    price_series : pandas.Series
        Series of closing prices with datetime index
    volatility_factor : float
        Maximum intraday volatility as percentage (default: 2%)
    volume_base : int
        Base volume for generating synthetic volume data
    
    Returns:
    --------
    pandas.DataFrame
        DataFrame with Date, Open, High, Low, Close, Volume columns
    """
    np.random.seed(42)  # For reproducible results
    
    prices = price_series.values
    dates = price_series.index
    n_days = len(prices)
    
    # Initialize arrays
    opens = np.zeros(n_days)
    highs = np.zeros(n_days)
    lows = np.zeros(n_days)
    closes = prices.copy()  # Use actual prices as closes
    volumes = np.zeros(n_days)
    
    # Generate OHLCV data
    for i in range(n_days):
        current_close = closes[i]
        
        # Generate Open price
        if i == 0:
            # First day: open slightly different from close
            open_noise = np.random.uniform(-0.005, 0.005)  # ±0.5%
            opens[i] = current_close * (1 + open_noise)
        else:
            # Subsequent days: open near previous close with gap
            gap_factor = np.random.uniform(-0.01, 0.01)  # ±1% gap
            opens[i] = closes[i-1] * (1 + gap_factor)
        
        # Generate intraday range
        daily_volatility = np.random.uniform(0.005, volatility_factor)  # 0.5% to volatility_factor%
        
        # Calculate high and low ensuring they respect OHLC constraints
        range_size = current_close * daily_volatility
        
        # High must be >= max(Open, Close)
        min_high = max(opens[i], current_close)
        high_extension = np.random.uniform(0, range_size)
        highs[i] = min_high + high_extension
        
        # Low must be <= min(Open, Close)
        max_low = min(opens[i], current_close)
        low_extension = np.random.uniform(0, range_size)
        lows[i] = max_low - low_extension
        
        # Ensure low is not negative
        lows[i] = max(lows[i], current_close * 0.9)  # Safety: low can't be more than 10% below close
        
        # Generate volume (higher volume on days with larger price movements)
        price_change = abs(current_close - opens[i]) / opens[i] if opens[i] > 0 else 0
        volume_multiplier = 1 + (price_change * 5)  # Higher volume on volatile days
        volume_noise = np.random.uniform(0.5, 1.5)
        volumes[i] = int(volume_base * volume_multiplier * volume_noise)
    
    # Create DataFrame
    ohlcv_df = pd.DataFrame({
        'Date': dates,
        'Open': opens,
        'High': highs,
        'Low': lows,
        'Close': closes,
        'Volume': volumes.astype(int)
    })
    
    return ohlcv_df

def validate_ohlcv_data(df):
    """Validate OHLCV data for proper candlestick constraints"""
    errors = []
    
    for i, row in df.iterrows():
        # Check basic OHLC constraints
        if row['High'] < max(row['Open'], row['Close']):
            errors.append(f"Row {i}: High < max(Open, Close)")
        
        if row['Low'] > min(row['Open'], row['Close']):
            errors.append(f"Row {i}: Low > min(Open, Close)")
        
        if row['High'] < row['Low']:
            errors.append(f"Row {i}: High < Low")
        
        if row['Volume'] <= 0:
            errors.append(f"Row {i}: Invalid volume")
    
    return errors

def generate_all_ohlcv_series(synthetic_data, selected_series):
    """Generate OHLCV data for all selected price series"""
    print("=== STEP 2: OHLCV GENERATION ===\n")
    
    if synthetic_data is None or selected_series is None:
        print("❌ Error: No synthetic data available. Please run Step 1 first.")
        return None
    
    ohlcv_datasets = {}
    validation_results = {}
    
    print(f"🔧 Generating OHLCV data for {len(selected_series)} price series...\n")
    
    for i, column in enumerate(selected_series, 1):
        series_num = column.replace('price_boot_', '')
        print(f"   {i:2d}. Processing Series #{series_num}...")
        
        # Extract price series
        price_series = synthetic_data[column]
        
        # Generate OHLCV data
        ohlcv_df = generate_ohlcv_from_prices(price_series)
        
        # Validate the data
        validation_errors = validate_ohlcv_data(ohlcv_df)
        
        if validation_errors:
            print(f"       ⚠️  Found {len(validation_errors)} validation errors")
            validation_results[column] = validation_errors
        else:
            print(f"       ✅ OHLCV data generated successfully")
            validation_results[column] = []
        
        # Store the dataset
        ohlcv_datasets[column] = ohlcv_df
        
        # Print basic statistics
        price_range = ohlcv_df['Close'].max() - ohlcv_df['Close'].min()
        avg_volume = ohlcv_df['Volume'].mean()
        print(f"       📊 Price range: ${price_range:.2f}, Avg volume: {avg_volume:,.0f}")
    
    print(f"\n✅ OHLCV generation completed for all {len(selected_series)} series!")
    
    # Summary statistics
    total_errors = sum(len(errors) for errors in validation_results.values())
    if total_errors == 0:
        print("🎉 All datasets passed validation checks!")
    else:
        print(f"⚠️  Total validation errors across all series: {total_errors}")
    
    return ohlcv_datasets, validation_results

# Execute Step 2
if 'synthetic_data' in globals() and synthetic_data is not None:
    ohlcv_data, validation_summary = generate_all_ohlcv_series(synthetic_data, selected_series)
    
    # Display sample data from first series
    if ohlcv_data:
        first_series = list(ohlcv_data.keys())[0]
        print(f"\n📋 Sample OHLCV data from {first_series}:")
        print(ohlcv_data[first_series].head())
else:
    print("❌ Error: Please run Step 1 first to load synthetic data.")

=== STEP 2: OHLCV GENERATION ===

🔧 Generating OHLCV data for 10 price series...

    1. Processing Series #654...
       ✅ OHLCV data generated successfully
       📊 Price range: $2215.34, Avg volume: 1,016,696
    2. Processing Series #114...
       ✅ OHLCV data generated successfully
       📊 Price range: $708.45, Avg volume: 1,017,168
    3. Processing Series #25...
       ✅ OHLCV data generated successfully
       📊 Price range: $1169.83, Avg volume: 1,016,609
    4. Processing Series #759...
       ✅ OHLCV data generated successfully
       📊 Price range: $1130.11, Avg volume: 1,016,834
    5. Processing Series #281...
       ✅ OHLCV data generated successfully
       📊 Price range: $1647.76, Avg volume: 1,017,184
    6. Processing Series #250...
       ✅ OHLCV data generated successfully
       📊 Price range: $1169.83, Avg volume: 1,016,609
    4. Processing Series #759...
       ✅ OHLCV data generated successfully
       📊 Price range: $1130.11, Avg volume: 1,016,834
    5. Pro

# Step 3: Individual Backtest Execution

This section runs separate backtests on each of the 10 synthetic OHLCV datasets and collects detailed performance metrics.

In [20]:
# Step 3: Individual Backtest Execution on Multiple Series

import backtrader as bt
import backtrader.feeds as btfeeds
import os
import numpy as np

def run_single_series_backtest(ohlcv_df, series_name, initial_capital=100000.0):
    """
    Run backtest on a single OHLCV dataset
    
    Parameters:
    -----------
    ohlcv_df : pandas.DataFrame
        DataFrame with Date, Open, High, Low, Close, Volume columns
    series_name : str
        Name identifier for the series (e.g., 'price_boot_654')
    initial_capital : float
        Starting capital for backtest
    
    Returns:
    --------
    dict
        Dictionary containing all performance metrics and results
    """
    
    # Create Cerebro instance
    cerebro = bt.Cerebro()
    
    # Create temporary CSV file for backtrader
    temp_filename = f'temp_{series_name}_backtest.csv'
    ohlcv_df.to_csv(temp_filename, index=False)
    
    try:
        # Create data feed
        data = btfeeds.GenericCSVData(
            dataname=temp_filename,
            dtformat=('%Y-%m-%d'),
            datetime=0, open=1, high=2, low=3, close=4, volume=5,
            openinterest=-1, headers=True,
        )
        
        # Add data and strategy
        cerebro.adddata(data)
        cerebro.addstrategy(Long_Candlesticks, printlog=False)
        
        # Set broker parameters
        cerebro.broker.setcash(initial_capital)
        cerebro.broker.setcommission(commission=0.001)  # 0.1% commission
        
        # Add analyzers
        cerebro.addanalyzer(bt.analyzers.TradeAnalyzer, _name="trades")
        cerebro.addanalyzer(bt.analyzers.SharpeRatio, _name="sharpe")
        cerebro.addanalyzer(bt.analyzers.DrawDown, _name="drawdown")
        cerebro.addanalyzer(bt.analyzers.Returns, _name="returns")
        
        # Run backtest
        results = cerebro.run()
        
        # Extract results
        strategy = results[0]
        final_value = cerebro.broker.getvalue()
        total_return = ((final_value - initial_capital) / initial_capital) * 100
        
        # Extract analyzer results
        trade_analysis = strategy.analyzers.trades.get_analysis()
        sharpe_analysis = strategy.analyzers.sharpe.get_analysis()
        drawdown_analysis = strategy.analyzers.drawdown.get_analysis()
        returns_analysis = strategy.analyzers.returns.get_analysis()
        
        # Compile results
        results_dict = {
            'series_name': series_name,
            'series_num': series_name.replace('price_boot_', ''),
            'initial_capital': initial_capital,
            'final_value': final_value,
            'total_return_pct': total_return,
            'net_profit': final_value - initial_capital,
            'total_trades': 0,
            'winning_trades': 0,
            'losing_trades': 0,
            'win_rate_pct': 0,
            'winning_pnl': 0,
            'losing_pnl': 0,
            'sharpe_ratio': 0,
            'max_drawdown_pct': 0,
            'avg_return': 0,
            'backtest_success': True,
            'error_message': None
        }
        
        # Extract trade statistics
        if trade_analysis and hasattr(trade_analysis, 'total'):
            results_dict['total_trades'] = getattr(trade_analysis.total, 'total', 0)
            
            if hasattr(trade_analysis, 'won'):
                results_dict['winning_trades'] = getattr(trade_analysis.won, 'total', 0)
                results_dict['winning_pnl'] = getattr(trade_analysis.won, 'pnl', {}).get('total', 0)
                
            if hasattr(trade_analysis, 'lost'):
                results_dict['losing_trades'] = getattr(trade_analysis.lost, 'total', 0)
                results_dict['losing_pnl'] = getattr(trade_analysis.lost, 'pnl', {}).get('total', 0)
            
            # Calculate win rate
            if results_dict['total_trades'] > 0:
                results_dict['win_rate_pct'] = (results_dict['winning_trades'] / results_dict['total_trades']) * 100
        
        # Extract performance metrics
        if sharpe_analysis and 'sharperatio' in sharpe_analysis:
            results_dict['sharpe_ratio'] = sharpe_analysis['sharperatio'] or 0
            
        if drawdown_analysis:
            results_dict['max_drawdown_pct'] = drawdown_analysis.get('max', {}).get('drawdown', 0)
            
        if returns_analysis:
            results_dict['avg_return'] = returns_analysis.get('ravg', 0)
        
        # Handle case where no trades occurred (this is actually a valid result)
        if results_dict['total_trades'] == 0:
            results_dict['error_message'] = "No trades executed - strategy found no bullish patterns"
        
        return results_dict
        
    except Exception as e:
        # Return error results
        return {
            'series_name': series_name,
            'series_num': series_name.replace('price_boot_', ''),
            'initial_capital': initial_capital,
            'final_value': initial_capital,
            'total_return_pct': 0,
            'net_profit': 0,
            'total_trades': 0,
            'winning_trades': 0,
            'losing_trades': 0,
            'win_rate_pct': 0,
            'winning_pnl': 0,
            'losing_pnl': 0,
            'sharpe_ratio': 0,
            'max_drawdown_pct': 0,
            'avg_return': 0,
            'backtest_success': False,
            'error_message': str(e)
        }
        
    finally:
        # Clean up temporary file
        if os.path.exists(temp_filename):
            os.remove(temp_filename)

def run_all_backtests(ohlcv_datasets, initial_capital=100000.0):
    """Run backtests on all OHLCV datasets"""
    print("=== STEP 3: INDIVIDUAL BACKTEST EXECUTION ===\n")
    
    if not ohlcv_datasets:
        print("❌ Error: No OHLCV datasets available. Please run Steps 1 and 2 first.")
        return None
    
    print(f"🚀 Running {len(ohlcv_datasets)} individual backtests...")
    print(f"💰 Initial Capital: ${initial_capital:,.2f}")
    print(f"🔧 Strategy: Long_Candlesticks with Fixed ATR (40 points)")
    print(f"📊 Commission: 0.1%")
    print(f"⏱️  Data Period: 90 days (Short-term testing)")
    print()
    
    all_results = []
    
    for i, (series_name, ohlcv_df) in enumerate(ohlcv_datasets.items(), 1):
        series_num = series_name.replace('price_boot_', '')
        print(f"   {i:2d}. Testing Series #{series_num}...", end=" ")
        
        # Run backtest for this series
        result = run_single_series_backtest(ohlcv_df, series_name, initial_capital)
        all_results.append(result)
        
        # Display quick results
        if result['backtest_success']:
            if result['total_trades'] > 0:
                print(f"✅ Return: {result['total_return_pct']:+6.2f}% | Trades: {result['total_trades']:2d} | Win Rate: {result['win_rate_pct']:5.1f}%")
            else:
                print(f"⚪ No Trades | Return: {result['total_return_pct']:+6.2f}% | Reason: No patterns found")
        else:
            print(f"❌ Error: {result['error_message']}")
    
    print(f"\n✅ All {len(ohlcv_datasets)} backtests completed!")
    
    return all_results

def display_backtest_summary(all_results):
    """Display comprehensive summary of all backtest results"""
    if not all_results:
        print("❌ No results to display")
        return
    
    # Filter successful results
    successful_results = [r for r in all_results if r['backtest_success']]
    failed_results = [r for r in all_results if not r['backtest_success']]
    trading_results = [r for r in successful_results if r['total_trades'] > 0]
    no_trade_results = [r for r in successful_results if r['total_trades'] == 0]
    
    print(f"\n{'='*85}")
    print(f"🎉 COMPREHENSIVE BACKTEST RESULTS SUMMARY")
    print(f"{'='*85}")
    
    print(f"\n📊 EXECUTION SUMMARY:")
    print(f"   • Total Backtests: {len(all_results)}")
    print(f"   • Successful: {len(successful_results)} ✅")
    print(f"   • Failed: {len(failed_results)} ❌")
    print(f"   • With Trades: {len(trading_results)} 🔄")
    print(f"   • No Trades (No Patterns): {len(no_trade_results)} ⚪")
    
    if not successful_results:
        print("⚠️ No successful backtests to analyze")
        return
    
    # Show all results (including no-trade scenarios)
    print(f"\n📋 INDIVIDUAL RESULTS:")
    print(f"{'Series':>8} {'Status':>12} {'Return %':>10} {'Profit $':>12} {'Trades':>8} {'Win %':>8} {'Sharpe':>8} {'DD %':>8}")
    print(f"{'-'*80}")
    
    for result in successful_results:
        status = "Active" if result['total_trades'] > 0 else "No Trades"
        win_rate = result['win_rate_pct'] if result['total_trades'] > 0 else 0
        sharpe = result['sharpe_ratio'] if result['sharpe_ratio'] != 0 else 0
        
        print(f"#{result['series_num']:>7} "
              f"{status:>11} "
              f"{result['total_return_pct']:>9.2f} "
              f"${result['net_profit']:>11,.0f} "
              f"{result['total_trades']:>7d} "
              f"{win_rate:>7.1f} "
              f"{sharpe:>7.3f} "
              f"{result['max_drawdown_pct']:>7.2f}")
    
    # Analysis for trading results only
    if trading_results:
        print(f"\n📈 TRADING STATISTICS (Active Series Only):")
        returns = [r['total_return_pct'] for r in trading_results]
        profits = [r['net_profit'] for r in trading_results]
        trades = [r['total_trades'] for r in trading_results]
        win_rates = [r['win_rate_pct'] for r in trading_results]
        sharpe_ratios = [r['sharpe_ratio'] for r in trading_results if r['sharpe_ratio'] != 0]
        drawdowns = [r['max_drawdown_pct'] for r in trading_results]
        
        print(f"   • Active Trading Series: {len(trading_results)}/{len(successful_results)}")
        print(f"   • Average Return: {np.mean(returns):+.2f}% (Range: {min(returns):+.2f}% to {max(returns):+.2f}%)")
        print(f"   • Average Profit: ${np.mean(profits):,.0f} (Total: ${sum(profits):,.0f})")
        print(f"   • Average Trades: {np.mean(trades):.1f} (Range: {min(trades)}-{max(trades)})")
        print(f"   • Average Win Rate: {np.mean(win_rates):.1f}% (Range: {min(win_rates):.1f}%-{max(win_rates):.1f}%)")
        if sharpe_ratios:
            print(f"   • Average Sharpe: {np.mean(sharpe_ratios):.3f} (Range: {min(sharpe_ratios):.3f}-{max(sharpe_ratios):.3f})")
        print(f"   • Average Drawdown: {np.mean(drawdowns):.2f}% (Range: {min(drawdowns):.2f}%-{max(drawdowns):.2f}%)")
        
        # Performance Rankings
        profitable_series = [r for r in trading_results if r['total_return_pct'] > 0]
        loss_series = [r for r in trading_results if r['total_return_pct'] < 0]
        
        print(f"\n🏆 PERFORMANCE RANKINGS:")
        print(f"   • Profitable Trading Series: {len(profitable_series)}/{len(trading_results)} ({len(profitable_series)/len(trading_results)*100:.1f}%)")
        print(f"   • Loss-Making Trading Series: {len(loss_series)}/{len(trading_results)} ({len(loss_series)/len(trading_results)*100:.1f}%)")
        
        if profitable_series:
            best_performer = max(profitable_series, key=lambda x: x['total_return_pct'])
            print(f"   • Best Performer: Series #{best_performer['series_num']} ({best_performer['total_return_pct']:+.2f}%)")
        
        if loss_series:
            worst_performer = min(loss_series, key=lambda x: x['total_return_pct'])
            print(f"   • Worst Performer: Series #{worst_performer['series_num']} ({worst_performer['total_return_pct']:+.2f}%)")
    
    else:
        print(f"\n⚪ PATTERN DETECTION ANALYSIS:")
        print(f"   • No bullish candlestick patterns were detected in any of the {len(successful_results)} series")
        print(f"   • This could indicate:")
        print(f"     - Short time period (90 days) limits pattern opportunities")
        print(f"     - Strategy parameters may need adjustment for synthetic data")
        print(f"     - Generated OHLCV patterns may not trigger TA-Lib detection")
        print(f"     - Market conditions in synthetic data may be neutral/bearish")
    
    # Failed backtests
    if failed_results:
        print(f"\n❌ FAILED BACKTESTS:")
        for result in failed_results:
            print(f"   • Series #{result['series_num']}: {result['error_message']}")
    
    print(f"\n{'='*85}")

# Execute Step 3
if 'ohlcv_data' in globals() and ohlcv_data is not None:
    backtest_results = run_all_backtests(ohlcv_data, initial_capital=100000.0)
    
    if backtest_results:
        display_backtest_summary(backtest_results)
    
else:
    print("❌ Error: Please run Steps 1 and 2 first to generate OHLCV data.")

=== STEP 3: INDIVIDUAL BACKTEST EXECUTION ===

🚀 Running 10 individual backtests...
💰 Initial Capital: $100,000.00
🔧 Strategy: Long_Candlesticks with Fixed ATR (40 points)
📊 Commission: 0.1%
⏱️  Data Period: 90 days (Short-term testing)

    1. Testing Series #654... 2024-12-28, TRADE #1: PROFIT -85.62
2025-03-04, TRADE #2: PROFIT -124.18
2025-07-17, TRADE #3: PROFIT -39.57
2025-08-31, TRADE #4: PROFIT 167.72
2025-11-01, TRADE #5: PROFIT 124.40
2026-01-29, TRADE #6: PROFIT 169.57
2026-03-18, TRADE #7: PROFIT -42.56
2026-11-13, TRADE #8: PROFIT 162.61
2026-12-26, TRADE #9: PROFIT 70.98
2027-01-12, TRADE #10: PROFIT 81.97
2027-04-21, TRADE #11: PROFIT -9.81
2027-06-04, TRADE #12: PROFIT -16.01
2027-09-02, TRADE #13: PROFIT 240.02
2027-09-22, TRADE #14: PROFIT 128.30
2027-10-26, TRADE #15: PROFIT 177.62
2028-01-05, TRADE #16: PROFIT 172.18
2028-02-04, TRADE #17: PROFIT -80.04
2028-05-20, TRADE #18: PROFIT 218.54
2028-09-18, TRADE #19: PROFIT 124.86
2028-11-03, TRADE #20: PROFIT 165.50
202