In [1]:
import pandas as pd
import backtrader as bt
from sklearn.linear_model import LinearRegression
#https://github.com/dllllb/btrade/blob/master/bt-strategy-test.ipynb
%matplotlib inline

# Load the data
df = pd.read_csv("equities_biotech_pull_2020-01-01_2024-01-01.csv")
df.drop(columns=["Unnamed: 0"], inplace=True)

# Convert date to datetime
df['date'] = pd.to_datetime(df['date'])

# Sort by date and drop duplicates
df = df.sort_values('date').drop_duplicates(subset='date')

# Set the date as the index
df.set_index('date', inplace=True)

# Split the data
train_df = df['2020-01-01':'2021-12-31']  # Training set
val_df = df['2022-01-01':'2022-12-31']    # Validation set
test_df = df['2023-01-01':'2023-12-31']   # Backtest set

# Create lagged features
def create_features(data):
    data['ret_lag1'] = data['ret'].shift(1)
    data['vol_lag1'] = data['vol'].shift(1)
    return data.dropna()

train_df = create_features(train_df)
val_df = create_features(val_df)
test_df = create_features(test_df)

# Define and train the regression model
X_train = train_df[['ret_lag1', 'vol_lag1']]
y_train = train_df['ret']

model = LinearRegression()
model.fit(X_train, y_train)



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['ret_lag1'] = data['ret'].shift(1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['vol_lag1'] = data['vol'].shift(1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['ret_lag1'] = data['ret'].shift(1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row

In [2]:
import datetime
import matplotlib as mpl
import backtrader as bt
%matplotlib inline

import yfinance as yf

# Fetch data using yfinance directly
nvda_df = yf.download('NVDA', start='2014-01-01', end='2020-05-01')

# Make sure the index is a datetime index
nvda_df.index = pd.to_datetime(nvda_df.index)

from bt_strategy import close_vs_sma_strategy

cerebro = close_vs_sma_strategy()
cerebro.broker.setcash(100000.0)

# Use the data fetched from yfinance
data = bt.feeds.PandasData(dataname=nvda_df)
cerebro.adddata(data)

# Run the backtest
cerebro.run()

# Set the figure size for plotting
mpl.rc("figure", figsize=(12, 10))
cerebro.plot(iplot=False);


  df.index += _pd.TimedeltaIndex(dst_error_hours, 'h')
[*********************100%%**********************]  1 of 1 completed


In [1]:
import bt_strategy as bs
import yfinance as yf
import logging
import pandas as pd
from datetime import datetime
import quantstats as qs

# Set up logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')

# Define constants
START_DATE = '2020-01-01'
END_DATE = '2023-12-31'
#TICKERS = pd.read_csv("biotech_tickers.csv").Ticker.unique().tolist()
TICKERS = pd.read_csv("biotech_tickers.csv").head(5).Ticker.unique().tolist()

def fetch_data(tickers, start, end):
    """Fetch stock data from yfinance with error handling."""
    logs = {}
    for ticker in tickers:
        try:
            logging.info(f"Fetching data for {ticker}")
            data = yf.download(ticker, start=start, end=end)
            if data.empty:
                logging.warning(f"No data returned for {ticker}.")
            else:
                logs[ticker] = data
        except ValueError as ve:
            logging.error(f"Value error for {ticker}: {ve}")
        except KeyError as ke:
            logging.error(f"Key error for {ticker}: {ke}")
        except Exception as e:
            logging.error(f"Error fetching data for {ticker}: {e}")
    return logs

def evaluate_strategies(logs, strategies):
    """Evaluate strategies and return stats."""
    if not logs:
        logging.error("No data available for strategy evaluation.")
        return None
    try:
        stats = bs.evaluate_strategies(strategies, logs, n_trials=50, n_jobs=-1)
        logging.info("Strategy evaluation completed successfully.")
        return stats
    except Exception as e:
        logging.error(f"Error during strategy evaluation: {e}")
        return None

def summarize_stats(stats):
    """Calculate and print summary statistics."""
    if stats is None or stats.empty:
        logging.error("No statistics available to summarize.")
        return

    try:
        # Calculate mean values and max drop for each strategy and ticker
        mean_values = stats.groupby(['strategy', 'ticker']).value.mean().unstack()
        max_drops = stats.groupby(['strategy', 'ticker']).dropdown.max().unstack()

        # Calculate overall mean values and max drops for each strategy
        overall_mean = stats.groupby('strategy').value.mean()
        overall_max_drop = stats.groupby('strategy').dropdown.max()

        # Create a summary DataFrame for better visualization
        summary = pd.DataFrame({
            'Mean Value': overall_mean,
            'Max Drop': overall_max_drop
        })

        # Display results
        print("Mean Values by Strategy and Ticker:")
        print(mean_values)
        print("\nMax Drops by Strategy and Ticker:")
        print(max_drops)
        print("\nOverall Strategy Performance Summary:")
        print(summary)

    except Exception as e:
        logging.error(f"Error summarizing stats: {e}")

def generate_quantstats_report(logs, strategies):
    """Generate and save one QuantStats report per strategy."""
    if not logs:
        logging.warning("No logs available to generate reports.")
        return

    for strategy in strategies:
        try:
            combined_data = pd.DataFrame()  # To store combined data for each strategy
            for ticker, data in logs.items():
                # For demonstration, we're just copying the close prices
                combined_data[ticker] = data['Close']

            # Create a QuantStats report for the combined data of this strategy
            qs.extend_pandas()
            qs.reports.html(combined_data, output=f'quantstats_report_{strategy.__name__}.html', title=f'QuantStats Report for {strategy.__name__}')
            logging.info(f"QuantStats report generated for strategy: {strategy.__name__}")

        except Exception as e:
            logging.error(f"Error generating QuantStats report for strategy {strategy.__name__}: {e}")

def main():
    """Main function to run the strategy evaluations."""
    # Fetch data
    logs = fetch_data(TICKERS, START_DATE, END_DATE)

    # Filter out tickers with no data
    logs = {ticker: data for ticker, data in logs.items() if not data.empty}
    if not logs:
        logging.error("No valid data available for strategy evaluation.")
        return

    # Define the strategies to evaluate
    strategies = [
        bs.buy_and_hold_strategy,
        bs.close_vs_sma_strategy,
        bs.mean_reversion_strategy,
        bs.prev_peak_strategy,
        bs.random_strategy,
        bs.prev_peak_nodrop_strategy,
        bs.mean_reversion_nodrop_strategy,
        bs.close_vs_sma_nodrop_strategy,
        bs.anti_drop_strategy
    ]

    # Evaluate the strategies
    stats = evaluate_strategies(logs, strategies)

    # If stats are available, summarize them
    if stats is not None:
        summarize_stats(stats)
        generate_quantstats_report(logs, strategies)

if __name__ == "__main__":
    main()


2024-12-28 22:03:37,003 - INFO - Fetching data for NVO
  df.index += _pd.TimedeltaIndex(dst_error_hours, 'h')
[*********************100%%**********************]  1 of 1 completed
2024-12-28 22:03:37,697 - INFO - Fetching data for VRTX
  df.index += _pd.TimedeltaIndex(dst_error_hours, 'h')
[*********************100%%**********************]  1 of 1 completed
2024-12-28 22:03:37,837 - INFO - Fetching data for REGN
  df.index += _pd.TimedeltaIndex(dst_error_hours, 'h')
[*********************100%%**********************]  1 of 1 completed
2024-12-28 22:03:37,963 - INFO - Fetching data for ALNY
  df.index += _pd.TimedeltaIndex(dst_error_hours, 'h')
[*********************100%%**********************]  1 of 1 completed
2024-12-28 22:03:38,102 - INFO - Fetching data for ARGX
  df.index += _pd.TimedeltaIndex(dst_error_hours, 'h')
[*********************100%%**********************]  1 of 1 completed
100%|██████████| 2250/2250 [00:39<00:00, 56.46it/s]
2024-12-28 22:04:18,687 - INFO - Strategy evaluat

Mean Values by Strategy and Ticker:
ticker                              ALNY      ARGX       NVO      REGN  \
strategy                                                                 
anti_drop_strategy              1.448959  1.187060  1.147313  1.206842   
buy_and_hold_strategy           1.342077  1.941225  1.964565  1.377792   
close_vs_sma_nodrop_strategy    1.339909  1.325364  1.022766  1.316171   
close_vs_sma_strategy           1.837184  1.803746  1.164529  1.492939   
mean_reversion_nodrop_strategy  1.084073  1.136390  1.345302  1.218323   
mean_reversion_strategy         1.162148  1.288352  1.508820  1.282065   
prev_peak_nodrop_strategy       1.215197  1.121747  1.224393  1.210781   
prev_peak_strategy              1.274682  1.520053  1.421377  1.310059   
random_strategy                 1.090344  1.384209  1.276284  1.184064   

ticker                              VRTX  
strategy                                  
anti_drop_strategy              1.000849  
buy_and_hold_strateg

  return reduction(axis=axis, out=out, **passkwargs)
  return reduction(axis=axis, out=out, **passkwargs)
  return reduction(axis=axis, out=out, **passkwargs)
  returns = _utils._prepare_returns(returns, rf).resample(resolution).sum()
  returns = returns.resample("A").apply(_stats.comp)
  returns = returns.resample("A").last()
  returns.fillna(0).resample(resample).apply(apply_fnc).resample(resample).last()
  data_subset = grouped_data.get_group(pd_key)
  data_subset = grouped_data.get_group(pd_key)
  data_subset = grouped_data.get_group(pd_key)
  data_subset = grouped_data.get_group(pd_key)
  data_subset = grouped_data.get_group(pd_key)
  data_subset = grouped_data.get_group(pd_key)
  data_subset = grouped_data.get_group(pd_key)
  data_subset = grouped_data.get_group(pd_key)
  data_subset = grouped_data.get_group(pd_key)
  data_subset = grouped_data.get_group(pd_key)
  data_subset = grouped_data.get_group(pd_key)
The behavior will change in pandas 3.0. This inplace method will never w

In [None]:
import bt_strategy as bs
import logging,os
import pandas as pd
from datetime import datetime
import quantstats as qs
import sqlite3


# Configure logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')

# Constants from Code 2
DB_PATH = '/content/drive/MyDrive/AFP/Code/Download_This_Folder/1_financial_data.db'
REPORTS_FOLDER = '/content/drive/MyDrive/AFP/Code/QuantStats_Reports/'
START_DATE = '2020-01-01'
END_DATE = '2023-12-31'
MINI_START_DATE = '2020-01-01'
MINI_END_DATE = '2020-06-30'
MINI_LIVE_START_DATE = '2020-08-01'

# Mount Google Drive to access files
drive.mount('/content/drive')

def load_data_from_db():
    """Load all datasets from the database."""
    conn = sqlite3.connect(DB_PATH)
    merged_data = pd.read_sql(f"SELECT * FROM merged_data WHERE date BETWEEN '{MINI_START_DATE}' AND '{MINI_END_DATE}'", conn)
    conn.close()
    return merged_data

# Load the data
merged_data = load_data_from_db()

# Convert 'date' column to datetime format
merged_data['date'] = pd.to_datetime(merged_data['date'], errors='coerce')

# Ensure 'date' column is properly formatted
merged_data = merged_data.sort_values('date')
merged_data.set_index('date', inplace=True)

# Ensure no missing values
merged_data.ffill(inplace=True)
merged_data.fillna(0, inplace=True)

# Define constants
TICKERS = merged_data['ticker'].unique().tolist()

# Define the strategies from Code 1
strategies = [
    bs.buy_and_hold_strategy,
    bs.close_vs_sma_strategy,
    bs.mean_reversion_strategy,
    bs.prev_peak_strategy,
    bs.random_strategy,
    bs.prev_peak_nodrop_strategy,
    bs.mean_reversion_nodrop_strategy,
    bs.close_vs_sma_nodrop_strategy,
    bs.anti_drop_strategy
]

def evaluate_strategies(logs, strategies):
    """Evaluate strategies and return stats."""
    if not logs:
        logging.error("No data available for strategy evaluation.")
        return None
    try:
        stats = bs.evaluate_strategies(strategies, logs, n_trials=50, n_jobs=-1)
        logging.info("Strategy evaluation completed successfully.")
        return stats
    except Exception as e:
        logging.error(f"Error during strategy evaluation: {e}")
        return None

def summarize_stats(stats):
    """Calculate and print summary statistics."""
    if stats is None or stats.empty:
        logging.error("No statistics available to summarize.")
        return

    try:
        # Calculate mean values and max drop for each strategy and ticker
        mean_values = stats.groupby(['strategy', 'ticker']).value.mean().unstack()
        max_drops = stats.groupby(['strategy', 'ticker']).dropdown.max().unstack()

        # Calculate overall mean values and max drops for each strategy
        overall_mean = stats.groupby('strategy').value.mean()
        overall_max_drop = stats.groupby('strategy').dropdown.max()

        # Create a summary DataFrame for better visualization
        summary = pd.DataFrame({
            'Mean Value': overall_mean,
            'Max Drop': overall_max_drop
        })

        # Display results
        print("Mean Values by Strategy and Ticker:")
        print(mean_values)
        print("\nMax Drops by Strategy and Ticker:")
        print(max_drops)
        print("\nOverall Strategy Performance Summary:")
        print(summary)

    except Exception as e:
        logging.error(f"Error summarizing stats: {e}")

def generate_quantstats_report(logs, strategies):
    """Generate and save one QuantStats report per strategy."""
    if not logs:
        logging.warning("No logs available to generate reports.")
        return

    for strategy in strategies:
        try:
            combined_data = pd.DataFrame()  # To store combined data for each strategy
            for ticker in TICKERS:
                ticker_data = merged_data[merged_data['ticker'] == ticker]
                combined_data[ticker] = ticker_data['close']

            # Create a QuantStats report for the combined data of this strategy
            qs.extend_pandas()
            report_filename = f'quantstats_report_{strategy.__name__}.html'
            report_path = os.path.join(REPORTS_FOLDER, report_filename)
            qs.reports.html(combined_data, output=report_path, title=f'QuantStats Report for {strategy.__name__}')
            logging.info(f"QuantStats report generated for strategy: {strategy.__name__}")
            logging.info(f"Report saved to: {report_path}")

        except Exception as e:
            logging.error(f"Error generating QuantStats report for strategy {strategy.__name__}: {e}")

def main():
    """Main function to run the strategy evaluations."""
    # Prepare data as a dictionary to match the format expected by evaluate_strategies
    logs = {}
    for ticker in TICKERS:
        ticker_data = merged_data[merged_data['ticker'] == ticker]
        logs[ticker] = ticker_data

    # Evaluate the strategies
    stats = evaluate_strategies(logs, strategies)

    # If stats are available, summarize them
    if stats is not None:
        summarize_stats(stats)
        generate_quantstats_report(logs, strategies)

if __name__ == "__main__":
    main()
