In [None]:
import requests
import pandas as pd
import re
from sqlalchemy import create_engine, inspect, text
import matplotlib.pyplot as plt
from abc import abstractmethod
from itertools import combinations
from dataclasses import dataclass
from backtesting import Backtest, Strategy
import numpy as np
from scipy.cluster.hierarchy import dendrogram, linkage, fcluster
import seaborn as sns
import math
import time
import concurrent.futures
import os
from dotenv import load_dotenv
load_dotenv()

api_key = os.getenv("api_key")

##### Database


In [None]:
class PostgresManager:
    def __init__(self, host, port, dbname, user, password):
        self.db_params = {
            'host': host,
            'port': port,
            'dbname': dbname,
            'user': user,
            'password': password
        }
        self.engine_str = (
            f"postgresql://{user}:{password}@{host}:{port}/{dbname}"
        )
        self.engine = create_engine(self.engine_str)
        print("✅ PostgreSQL connection initialized.")

    def upload_dataframe(self, df: pd.DataFrame, table_name: str, if_exists='replace'):
        """
        Uploads a DataFrame to PostgreSQL.
        - if_exists: 'replace', 'append', or 'fail'
        """
        try:
            df.to_sql(table_name, self.engine, if_exists=if_exists, index=False, method='multi')
            print(f"✅ Data uploaded to table '{table_name}'.")
        except Exception as e:
            print(f"❌ Failed to upload to '{table_name}': {e}")
    
    def getTicker30MinData(self, ticker: str) -> pd.DataFrame:
        """
        Retrieves 30-minute interval data for a given ticker from PostgreSQL
        """
        table_name = f"{ticker.upper()}_30MinData"
        try:
            query = f"SELECT * FROM \"{table_name}\""
            df = pd.read_sql_query(query, self.engine)
            
            # Convert 'date' column to datetime and set as index
            if 'date' in df.columns:
                df['date'] = pd.to_datetime(df['date'])
                df.set_index('date', inplace=True)
                df.sort_index(inplace=True)
                print(f"✅ Retrieved {len(df)} rows of 30-min data for {ticker}")
            else:
                print(f"❌ 'date' column missing in table {table_name}")
            return df
        except Exception as e:
            print(f"❌ Failed to fetch data for {ticker}: {e}")
            return pd.DataFrame()
    def getTickerEODData(self, ticker: str) -> pd.DataFrame:
        """
        Retrieves EOD data for a given ticker from PostgreSQL
        """
        table_name = f"{ticker.upper()}_EOD_Data"
        try:
            query = f"SELECT * FROM \"{table_name}\""
            df = pd.read_sql_query(query, self.engine)
            
            # Convert 'date' column to datetime and set as index
            if 'date' in df.columns:
                df['date'] = pd.to_datetime(df['date'])
                df.set_index('date', inplace=True)
                df.sort_index(inplace=True)
                print(f"✅ Retrieved {len(df)} rows of EOD data for {ticker}")
            else:
                print(f"❌ 'date' column missing in table {table_name}")
            return df
        except Exception as e:
            print(f"❌ Failed to fetch data for {ticker}: {e}")
            return pd.DataFrame()
    
    def getTickerFundamentalsData(self, ticker: str) -> pd.DataFrame:
        """
        Retrieves fundamentals data for a given ticker from PostgreSQL
        """
        table_name = f"{ticker.upper()}_FundamentalsData"
        try:
            query = f"SELECT * FROM \"{table_name}\""
            df = pd.read_sql_query(query, self.engine)
            
            # Convert 'date' column to datetime and set as index
            if 'date' in df.columns:
                df['date'] = pd.to_datetime(df['date'])
                df.set_index('date', inplace=True)
                df.sort_index(inplace=True)
                print(f"✅ Retrieved {len(df)} rows of fundamentals data for {ticker}")
            else:
                print(f"❌ 'date' column missing in table {table_name}")
            return df
        except Exception as e:
            print(f"❌ Failed to fetch data for {ticker}: {e}")
            return pd.DataFrame()
    def getTickerCombinedData(self, ticker: str) -> pd.DataFrame:
        """
        Retrieves combined data for a given ticker from PostgreSQL
        """
        table_name = f"{ticker.upper()}_CombinedData"
        try:
            query = f"SELECT * FROM \"{table_name}\""
            df = pd.read_sql_query(query, self.engine)
            
            # Convert 'date' column to datetime and set as index
            if 'date' in df.columns:
                df['date'] = pd.to_datetime(df['date'])
                df.set_index('date', inplace=True)
                df.sort_index(inplace=True)
                print(f"✅ Retrieved {len(df)} rows of combined data for {ticker}")
            else:
                print(f"❌ 'date' column missing in table {table_name}")
            return df
        except Exception as e:
            print(f"❌ Failed to fetch data for {ticker}: {e}")
            return pd.DataFrame()
        
    def get_tickers_from_30min_tables(self):
        """
        Extracts all tickers from tables that match the {ticker}_30MinData format.
        """
        try:
            inspector = inspect(self.engine)
            all_tables = inspector.get_table_names()
            pattern = re.compile(r'^(.*)_30MinData$', re.IGNORECASE)
            tickers = [match.group(1).upper() for table in all_tables if (match := pattern.match(table))]
            return tickers
        except Exception as e:
            print(f"❌ Failed to inspect tables: {e}")
            return []
    def get_tickers_from_EOD_tables(self):
        """
        Extracts all tickers from tables that match the {ticker}_30MinData format.
        """
        try:
            inspector = inspect(self.engine)
            all_tables = inspector.get_table_names()
            pattern = re.compile(r'^(.*)_EOD_Data$', re.IGNORECASE)
            tickers = [match.group(1).upper() for table in all_tables if (match := pattern.match(table))]
            return tickers
        except Exception as e:
            print(f"❌ Failed to inspect tables: {e}")
            return []
        
    def get_tickers_from_Fundamentals_tables(self):
        """
        Extracts all tickers from tables that match the {ticker}_Fundamentals format.
        """
        try:
            inspector = inspect(self.engine)
            all_tables = inspector.get_table_names()
            pattern = re.compile(r'^(.*)_FundamentalsData$', re.IGNORECASE)
            tickers = [match.group(1).upper() for table in all_tables if (match := pattern.match(table))]
            return tickers
        except Exception as e:
            print(f"❌ Failed to inspect tables: {e}")
            return []
    
    def get_tickers_from_combined_tables(self):
        try:
            inspector = inspect(self.engine)
            all_tables = inspector.get_table_names()
            pattern = re.compile(r'^(.*)_CombinedData$', re.IGNORECASE)
            tickers = [match.group(1).upper() for table in all_tables if (match := pattern.match(table))]
            return tickers
        except Exception as e:
            print(f"❌ Failed to inspect tables: {e}")
            return []
        
#example usage
pg = PostgresManager(
    host=os.getenv('host'),
    port=os.getenv('port'),
    dbname=os.getenv('dbname'),
    user=os.getenv('user'),
    password=os.getenv('password')
)
# Get all tickers from 30-min tables
tickers = pg.get_tickers_from_30min_tables()
print("Tickers found:", tickers)

# Fetch 30-min data for a specific ticker
ticker_data = pg.getTicker30MinData('AAPL')
print(ticker_data.head())

##### Getting EOD Data


In [None]:
# 1. Get valid symbols from CompanyProfiles
query = """
SELECT symbol FROM "CompanyProfiles"
WHERE "marketCap" IS NOT NULL AND "marketCap" > 0
"""
symbols_df = pd.read_sql_query(text(query), pg.engine)
tickers = symbols_df['symbol'].tolist()

print(f"✅ Retrieved {len(tickers)} tickers with market cap > 0")

# 2. Get valid EODData tables that exist in the database
existing_EOD_tables = pg.get_tickers_from_EOD_tables()
valid_EOD_tickers = [t for t in tickers if t in existing_EOD_tables]
existing_Fundamentals_tables = pg.get_tickers_from_Fundamentals_tables()
valid_tickers = [t for t in valid_EOD_tickers if t in existing_Fundamentals_tables]
print(f"✅ Found {len(existing_EOD_tables)} existing EODData tables")
print(f"✅ Found {len(existing_Fundamentals_tables)} existing FundamentalsData tables")
print(f"✅ {len(valid_EOD_tickers)} tickers with available EODData")
print(f"✅ {len(existing_Fundamentals_tables)} tickers with available FundamentalsData")
print(f"✅ {len(valid_tickers)} tickers with available FundamentalsData")

print(f"✅ {len(valid_tickers)} tickers with available EODData")



In [None]:
columnsOfInterest = ['filingDate_income', 'ebitda', 'ebit', 'grossProfit', 'revenue', 'researchAndDevelopmentExpenses',
                     'costAndExpenses', 'totalCurrentLiabilities', 'weightedAverageShsOut', 'epsDiluted', 'totalLiabilities','totalAssets']

combinedDataDict = {}
existing_tables = pg.get_tickers_from_combined_tables()

for ticker in valid_tickers:
    try:
        dataframeName = f"{ticker}_CombinedData"
        # Check if the combined data already exists
        
        fundamentals_df = pg.getTickerFundamentalsData(ticker)
        if fundamentals_df.empty:
            print(f"❌ No fundamentals data for {ticker}")
            continue
        
        # Filter columns of interest
        filtered_df = fundamentals_df[columnsOfInterest]
        
        # Convert 'filingDate_income' to datetime
        filtered_df['filingDate_income'] = pd.to_datetime(filtered_df['filingDate_income'], errors='coerce')
        
        # Drop rows with NaN values in 'filingDate_income'
        filtered_df.dropna(subset=['filingDate_income'], inplace=True)
        
        # Set 'filingDate_income' as index
        #filtered_df.set_index('filingDate_income', inplace=True)
        
        #getting the EOD data for the same ticker
        eod_df = pg.getTickerEODData(ticker)
        if eod_df.empty:
            print(f"❌ No EOD data for {ticker}")
            continue
        # Merge EOD data with fundamentals on date index
        # forward fill missing values when combining the two dataframes
        combined_df = eod_df.join(filtered_df, how='outer', lsuffix='_eod', rsuffix='_fundamentals')
        combined_df.ffill(inplace=True)
        combined_df.dropna(inplace=True)  # Drop rows with any NaN values after merging
        if combined_df.empty:
            print(f"❌ Combined data is empty for {ticker}")
            continue
        #the market cap is the 'weightedAverageShsOut' * 'close'
        combined_df['marketCap'] = combined_df['weightedAverageShsOut'] * combined_df['close']
        combined_df['bookValue'] = combined_df['totalAssets'] - combined_df['totalLiabilities']
        combined_df['debtToEquity'] = combined_df['totalLiabilities'] / ((combined_df['totalAssets'] - combined_df['totalLiabilities']))
        combined_df['bookToMarket'] = combined_df['bookValue'] / combined_df['marketCap']
        combined_df['enterpriseValue'] = combined_df['marketCap'] + combined_df['totalLiabilities'] - combined_df['totalAssets']
        combined_df['ebitValue'] = combined_df['ebitda'] / combined_df['enterpriseValue']
        combined_df['ebitdaValue'] = combined_df['ebitda'] / combined_df['marketCap']
        combined_df['profitValue'] = combined_df['grossProfit'] / combined_df['marketCap']
        combinedDataDict[ticker] = combined_df.copy()
        combined_df.reset_index(inplace=True)  # Reset index to have 'date' as a column
        
        pg.upload_dataframe(combined_df, dataframeName, if_exists='replace')
        print(f"✅ Processed {ticker} successfully with {len(combined_df)} rows of combined data")
    except Exception as e:
        print(f"❌ Error processing {ticker}: {e}")

In [None]:
combinedDataDict['CVS']

In [None]:
pg.get_tickers_from_combined_tables()

##### Example Backtest


In [None]:
def SIGNAL():
    return df.signal

class SignalStrategy(Strategy):
    stopLoss = 1  # Represents percentage (e.g., 1 = 1%)
    takeProfit = 15  # Represents percentage (e.g., 15 = 15%)
    
    def init(self):
        self.signal_indicator = self.I(lambda: self.data.df['signal'])
    
    def next(self):
        try:
            current_signal = self.signal_indicator[-1]
            current_price = self.data.Close[-1]
            
            # Calculate SL/TP as percentages of current price
            sl_percent = self.stopLoss * 0.01
            tp_percent = self.takeProfit * 0.01
            
            if current_signal == 0 and self.position:
                self.position.close()
                
            elif current_signal == 1:  # LONG signal
                if self.position.is_short:
                    self.position.close()
                    
                if not self.position.is_long:
                    # For LONG: SL below price, TP above price
                    sl_price = current_price * (1 - sl_percent)
                    tp_price = current_price * (1 + tp_percent)
                    self.buy(sl=sl_price, tp=tp_price)
                    
            elif current_signal == -1:  # SHORT signal
                if self.position.is_long:
                    self.position.close()
                    
                if not self.position.is_short:
                    # For SHORT: SL above price, TP below price
                    sl_price = current_price * (1 + sl_percent)
                    tp_price = current_price * (1 - tp_percent)
                    self.sell(sl=sl_price, tp=tp_price)
                    
        except IndexError:
            pass
        
        
# bt = Backtest(
#     df,
#     SignalStrategy,
#     cash=10_000,
#     commission=.002
# )

# stats, heatmap = bt.optimize(
#     stopLoss = range(1, 5, 1),
#     takeProfit = range(12, 20, 1),
#     maximize='Sharpe Ratio',
#     return_heatmap=True,
#     max_tries= 50,
# )
# self.backTests[ticker] = bt
# self.tickerResults[ticker] = stats

##### Trading Strategy Abstract


In [None]:

class TradingStrategy():
    def __init__(self, data, window=30):
        self.rawData = data
        self.window = window
        self.workingData = pd.DataFrame()
        self.processed = False
        self.tickerResults = {}
        self.backTests = {}
        
    @abstractmethod
    def createProcessedData(self):
        """Preprocess raw data into working format"""
        pass
        
    @abstractmethod
    def calculateMetrics(self):
        """Calculate strategy-specific metrics and signals"""
        pass
        
    @abstractmethod
    def addSignalsToDataFrames(self):
        """Add generated signals to individual asset DataFrames"""
        pass
        
    @abstractmethod
    def executeBacktests(self):
        """Execute backtests for all assets"""
        pass
        
    def go(self):
        """Main execution workflow"""
        self.createProcessedData()
        self.calculateMetrics()
        self.addSignalsToDataFrames()
        self.executeBacktests()

##### Value Strategy


In [None]:
class EbitdaValueStrategy(TradingStrategy):
    def createProcessedData(self):
        self.filterRawData()
        """Preprocess raw data into working format"""
        column_map = {
            'open': 'Open',
            'high': 'High',
            'low': 'Low',
            'close': 'Close',
            'volume': 'Volume'
        }
        
        for ticker in self.rawData.keys():
            df = self.rawData[ticker].copy()
            # Drop the symbol column if it exists
            if 'symbol' in df.columns:
                df.drop(columns=['symbol'], inplace=True)
            # Rename columns using the mapping
            df.rename(columns=column_map, inplace=True)
            # If High or Low columns don't exist, create them as a copy of Close
            if 'High' not in df.columns:
                df['High'] = df['Close']
            if 'Low' not in df.columns:
                df['Low'] = df['Close']
            self.rawData[ticker] = df
            
            min_date = min(df.index.min() for df in self.rawData.values())
            print(f"Minimum date after processing: {min_date}")
            
    def createValueData(self):
        # 1. Create EBITDA DataFrame
        value_df = pd.DataFrame()
        rawData = self.rawData.copy()
        hasEbitda = 0
        # Collect EBITDA values for each ticker
        for ticker, df in rawData.items():
            if 'ebitdaValue' in df.columns:
                hasEbitda += 1
                # Ensure we have a datetime index
                temp = df[['ebitdaValue']].copy()
                temp.index = pd.to_datetime(temp.index)
                
                # Rename column to ticker name
                temp = temp.rename(columns={'ebitdaValue': ticker})
                
                if value_df.empty:
                    value_df = temp
                else:
                    value_df = value_df.join(temp, how='outer')
                
                
        
        #Make the Nan values 0
        value_df.fillna(0, inplace=True)
        
        self.value_df = value_df
    
    def getTopBottomTickers(self):
        value_df = self.value_df.copy()

        # 3. Rank values row-wise (date-wise)
        rank_df = value_df.rank(axis=1, method='min', ascending=False)
        rank_df.dropna(axis=1, how='all', inplace=True)  # Drop columns with all NaN values

        self.rank_df = rank_df
        
        # 4. Identify top/bottom tickers
        top_bottom_tickers = set()
        
        # Iterate through each date
        for date, row in rank_df.iterrows():
            # Get top 50 and bottom 50 tickers for this date
            sorted_tickers = row.sort_values().index
            #removing the tickers that have 0 as the value
            sorted_tickers = [ticker for ticker in sorted_tickers if row[ticker] > 0]
            window = self.window
            if len(sorted_tickers) < self.window * 2:
                window = math.floor(self.window/2)
                continue
            top_tickers = sorted_tickers[:self.window]
            bottom_tickers = sorted_tickers[-self.window:]
            
            # Add to our set
            top_bottom_tickers.update(top_tickers)
            top_bottom_tickers.update(bottom_tickers)
            
        return top_bottom_tickers
            
    def filterRawData(self):
        # getting the lowest minimum date of all the dataframes in rawData
        min_date = min(df.index.min() for df in self.rawData.values())
        print(f"Minimum date before filtration: {min_date}")
        
        self.createValueData()
        top_bottom_tickers = self.getTopBottomTickers()
        
        # 5. Filter rawData to keep only top/bottom tickers
        filtered_rawData = {
            ticker: df 
            for ticker, df in self.rawData.items() 
            if ticker in top_bottom_tickers
        }
        
        self.rawData = filtered_rawData
        
        min_date = min(df.index.min() for df in self.rawData.values())
        print(f"Minimum date after filtration: {min_date}")
        
        # Only keep columns in rank_df that are in filtered_rawData
        filtered_columns = [col for col in self.rank_df.columns if col in filtered_rawData.keys()]
        self.rank_df = self.rank_df[filtered_columns]
        # Create the rankedDict {date: {ticker: rank}}
        rankedDict = {}
        for date, row in self.rank_df.iterrows():
            date_ranks = {}
            for ticker, rank_value in row.items():
                date_ranks[ticker] = int(rank_value)  # Convert to integer rank
            rankedDict[date] = date_ranks
            
        self.rankedDict = rankedDict
        

        print(f"Original tickers: {len(self.rawData)}")
        print(f"Filtered tickers: {len(filtered_rawData)}")
        
    def mergedData(self):
        min_date = min(df.index.min() for df in self.rawData.values())
        print(f"Minimum date before margedData: {min_date}")
        
        for ticker in self.rawData.keys():
            df = self.rawData[ticker].copy()
            # Drop the symbol column if it exists
            if 'symbol' in df.columns:
                df.drop(columns=['symbol'], inplace=True)
            # Always add prefix for consistency
            df = df.add_prefix(f"{ticker}_")
            if self.workingData.empty:
                self.workingData = df.copy()
            else:
                self.workingData = self.workingData.join(df, how='outer')
            # If {ticker}_symbol column exists, drop it
            symbol_col = f"{ticker}_symbol"
            if symbol_col in self.workingData.columns:
                self.workingData.drop(columns=[symbol_col], inplace=True)
        print(f"Minimum date of workingData after mergedData: {self.workingData.index.min()}")
        min_date = min(df.index.min() for df in self.rawData.values())
        print(f"Minimum date after mergedData: {min_date}")
            
        # Ensure the workingData DataFrame is sorted by date index
        self.workingData.sort_index(inplace=True)
        
        #only keep the dates are are in self.dataDict.keys()
        
    
    def getRanks(self, ticker):
        """Get ranks for a specific ticker based on the rankedDict"""
        # Create a new Series with the same index as self.workingData
        signal_series = pd.Series(0, index=self.workingData.index)
        #print the mimimum date of the signal_series
        print(f"Minimum date of signal_series: {signal_series.index.min()}")
        
        # Iterate through the date in the index of signal_series
        for date in signal_series.index:
            if date in self.rankedDict:
                rank = self.rankedDict[date].get(ticker, 0)
                if 1 <= rank <= self.window:
                    signal_series[date] = 1
                elif len(self.rankedDict[date]) - self.window < rank <= len(self.rankedDict[date]):
                    signal_series[date] = -1
                else:
                    signal_series[date] = 0
            else:
                signal_series[date] = 0
        return signal_series          
    
    def calculateMetrics(self):
        """Calculate strategy-specific metrics and signals"""
        #the signal for each ticker will be in the column {ticker}_signal in the self.mergedData DataFrame
        #for each ticker, if the value for that date in rankedData is between 1 and 10, then the signal is 1
        #for each ticker if the value for that date in rankedData is between number of Columns in self.rankedDf and 
        #number of Columns in self.rankedDf - 10, then the signal is -1
        min_date = min(df.index.min() for df in self.rawData.values())
        print(f"Minimum date before calculateMetrics: {min_date}")
        
        self.mergedData()
        print(f"Minimum date of workingData before calculateMetrics: {self.workingData.index.min()}")
        for ticker in self.rawData.keys():
            # Create a new column for the signal
            self.workingData[f"{ticker}_signal"] = 0
            self.workingData[f"{ticker}_signal"] = self.getRanks(ticker)
            #shift the symbol signal by 1 to avoid lookahead bias
            self.workingData[f"{ticker}_signal"] = self.workingData[f"{ticker}_signal"].shift(1)
        
        min_date = min(df.index.min() for df in self.rawData.values())
        print(f"Minimum date after filtration: {min_date}")
        #print the mimimum date of the workingData DataFrame
        #calculate the minimum date of the workingData DataFrame
        print(f"Minimum date of workingData after calculateMetrics: {self.workingData.index.min()}")
            
            
            #for each row put in the ran
    
    def addSignalsToDataFrames(self):
        """Add generated signals to individual asset DataFrames"""
        # First ensure all indexes are unique
        self.workingData = self.workingData[~self.workingData.index.duplicated(keep='last')]
        
        for ticker, df in self.rawData.items():
            # Ensure current DF has unique index
            df_clean = df[~df.index.duplicated(keep='last')]
            
            # Get intersection of dates
            common_dates = df_clean.index.intersection(self.workingData.index)
            
            # Get signals only for existing common dates
            if not common_dates.empty and f'{ticker}_signal' in self.workingData.columns:
                signal_series = self.workingData.loc[common_dates, f'{ticker}_signal']
                
                # Add to a copy to avoid SettingWithCopyWarning
                df_clean = df_clean.copy()
                df_clean['signal'] = signal_series
                
                # Drop NA signals and update rawData
                df_clean.dropna(subset=['signal'], inplace=True)
                self.rawData[ticker] = df_clean
                print(f"✅ Signals added to {ticker} DataFrame.")
            else:
                print(f"⚠️ No signals available for {ticker}")
                
        # Verify dates after processing
        min_date = min(df.index.min() for df in self.rawData.values() if not df.empty)
        print(f"Minimum date after adding signals: {min_date}")
        
    def calculateRollingSharpe(self, returns, window=90, risk_free_rate=0.0):
        excess_returns = returns - risk_free_rate
        rolling_mean = excess_returns.rolling(window).mean()
        rolling_std = excess_returns.rolling(window).std()
        return (rolling_mean / rolling_std) * np.sqrt(252)  # 
        
    def calculateBacktestMetrics(self):
        all_returns = {}
        
        for ticker in self.tickerResults:
            tickerEquityCurve = self.tickerResults[ticker]['_equity_curve'] ## is a dataframe
            tickerEquityCurve['Return']= tickerEquityCurve['Equity'].pct_change().fillna(0)
            combined_df['rolling_sharpe_90'] = self.calculateRollingSharpe(
                                                tickerEquityCurve['Return'],
                                                window=90,
                                            )
            
            all_returns[ticker] = tickerEquityCurve['Return']
            
        returns_df = pd.DataFrame(all_returns)

        # Step 3: Fill missing values with 0 (for dates before IPO)
        returns_df.fillna(0, inplace=True)

        # Step 4: Calculate strategy returns (sum across tickers)
        strategy_daily_returns = returns_df.sum(axis=1)
        
        strategy_rolling_sharpe = self.calculateRollingSharpe(
            strategy_daily_returns,
            window=90,
            risk_free_rate=0.0
        )

        # Step 5: Store results in the object
        self.strategy_daily_returns = strategy_daily_returns
        self.strategy_rolling_sharpe = strategy_rolling_sharpe
        
        self.calculatePerformanceMetrics()
            
    def calculatePerformanceMetrics(self):
        """Calculate performance metrics from strategy returns"""
        returns = self.strategy_daily_returns
        
        # Annualized return
        annualized_return = (1 + returns).prod() ** (252/len(returns)) - 1
        
        # Annualized volatility
        annualized_vol = returns.std() * np.sqrt(252)
        
        # Sharpe ratio (assuming 0% risk-free rate)
        sharpe_ratio = annualized_return / annualized_vol
        
        # Max drawdown
        equity = (1 + returns).cumprod()
        peak = equity.expanding(min_periods=1).max()
        drawdown = (equity/peak) - 1
        max_drawdown = drawdown.min()
        
        # Store metrics
        self.performance_metrics = {
            'Annualized Return': annualized_return,
            'Annualized Volatility': annualized_vol,
            'Sharpe Ratio': sharpe_ratio,
            'Max Drawdown': max_drawdown,
            'Total Return': equity.iloc[-1] / equity.iloc[0] - 1
        }  
            
            
        
        
    
    def executeBacktests(self):
        min_date = min(df.index.min() for df in self.rawData.values())
        print(f"Minimum date before backtest: {min_date}")
        """Execute backtests for all assets"""
        startTime = time.perf_counter()
        for ticker, df in self.rawData.items():
            if 'signal' in df.columns:
                #minimum date of the df DataFrame
                if df.empty or df['signal'].isnull().all():
                    print(f"❌ No valid data for backtest on {ticker}. Skipping...")
                    continue
                print(f"Mimimum date of {ticker} DataFrame: {df.index.min()}")
                bt = Backtest(df, SignalStrategy, cash=10_000, commission=.002)
                stats, heatmap = bt.optimize(
                    stopLoss = range(1, 5, 1),
                    takeProfit = range(12, 25, 1),
                    maximize='Sharpe Ratio',
                    return_heatmap=True,
                    max_tries= 50,
                )
                self.backTests[ticker] = bt
                self.tickerResults[ticker] = stats
                print(f"✅ Backtest executed for {ticker}.")
        endTime = time.perf_counter()
        self.BackTestTime = endTime - startTime
        print(f"Total backtest execution time: {self.BackTestTime:.2f} seconds")
        
    def executeMultiThreadedBacktests(self):
        min_date = min(df.index.min() for df in self.rawData.values())
        print(f"Minimum date before backtest: {min_date}")
        
        # Start the stopwatch
        start_time = time.perf_counter()
        
        # Create a wrapper function for thread execution
        def process_ticker(ticker):
            """Process a single ticker for backtesting"""
            df = self.rawData[ticker]
            if 'signal' not in df.columns or df.empty or df['signal'].isnull().all():
                print(f"❌ No valid data for backtest on {ticker}. Skipping...")
                return ticker, None, None
            
            print(f"Minimum date of {ticker} DataFrame: {df.index.min()}")
            
            try:
                bt = Backtest(df, SignalStrategy, cash=10_000, commission=.002)
                stats, heatmap = bt.optimize(
                    stopLoss=range(1, 5, 1),
                    takeProfit=range(12, 15, 1),
                    maximize='Sharpe Ratio',
                    return_heatmap=True,
                    max_tries=50,
                )
                print(f"✅ Backtest executed for {ticker}.")
                return ticker, bt, stats
            except Exception as e:
                print(f"❌ Error during backtest for {ticker}: {str(e)}")
                return ticker, None, None
        
        # Create thread pool - adjust max_workers based on your CPU cores
        max_workers = min(6, len(self.rawData))  # Don't exceed 8 threads
        print(f"Starting backtests with {max_workers} threads...")
        
        with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) as executor:
            # Submit all tasks to the thread pool
            future_to_ticker = {
                executor.submit(process_ticker, ticker): ticker 
                for ticker in self.rawData.keys()
            }
            
            # Process completed tasks as they finish
            for future in concurrent.futures.as_completed(future_to_ticker):
                ticker = future_to_ticker[future]
                try:
                    # Get the result from the thread
                    _, bt, stats = future.result()  # Unpack but ignore the returned ticker
                    
                    # Check explicitly for None instead of truthiness
                    if bt is not None and stats is not None:
                        self.backTests[ticker] = bt
                        self.tickerResults[ticker] = stats
                    else:
                        print(f"⚠️ Backtest for {ticker} returned no results. Skipping...")
                except Exception as e:
                    print(f"❌ Unexpected error processing {ticker}: {str(e)}")
        
        # Stop the stopwatch and calculate duration
        end_time = time.perf_counter()
        self.BacktestTime = end_time - start_time
        
        # Convert to minutes and seconds for readability
        minutes = int(self.BacktestTime // 60)
        seconds = self.BacktestTime % 60
        
        print(f"\n⏱️ Total backtest time: {minutes} minutes {seconds:.2f} seconds")
        print(f"Number of tickers processed: {len(self.backTests)}")
        print(f"Tickers with errors: {len(self.rawData) - len(self.backTests)}")

In [None]:
valStr = EbitdaValueStrategy(combinedDataDict)
valStr.go()
rawData = valStr.rawData

In [None]:
valStr.calculateBacktestMetrics()

In [None]:
from dotenv import load_dotenv
import os
load_dotenv()

print(os.getenv('api_key'))