In [1]:
#import libraries
import pandas as pd
import numpy as np
import yfinance as yf
import tensorflow as tf
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error
import matplotlib.pyplot as plt
import datetime
import warnings
warnings.filterwarnings("ignore")



# Define the Transformer model
def transformer_encoder(inputs, head_size, num_heads, ff_dim, dropout=0):
    # Normalization and Attention
    x = tf.keras.layers.LayerNormalization(epsilon=1e-6)(inputs)
    x = tf.keras.layers.MultiHeadAttention(key_dim=head_size, num_heads=num_heads, dropout=dropout)(x, x)
    x = tf.keras.layers.Dropout(dropout)(x)
    res = x + inputs

    # Feed Forward Part
    x = tf.keras.layers.LayerNormalization(epsilon=1e-6)(res)
    x = tf.keras.layers.Conv1D(filters=ff_dim, kernel_size=1, activation="relu")(x)
    x = tf.keras.layers.Dropout(dropout)(x)
    x = tf.keras.layers.Conv1D(filters=inputs.shape[-1], kernel_size=1)(x)
    return x + res

# Build and train the model
input_shape = (7,1)
head_size = 46
num_heads = 60
ff_dim = 55
num_transformer_blocks = 5
mlp_units = [256]
dropout = 0.14
mlp_dropout = 0.4

def build_model(
    input_shape,
    head_size,
    num_heads,
    ff_dim,
    num_transformer_blocks,
    mlp_units,
    dropout=0,
    mlp_dropout=0,
):
    inputs = tf.keras.Input(shape=input_shape)
    x = inputs
    

    for _ in range(num_transformer_blocks):  # This is what stacks our transformer blocks
        x = transformer_encoder(x, head_size, num_heads, ff_dim, dropout)

    x = tf.keras.layers.GlobalAveragePooling1D(data_format="channels_first")(x)

    for dim in mlp_units:
        x = tf.keras.layers.Dense(dim, activation="relu")(x)
        x = tf.keras.layers.Dropout(mlp_dropout)(x)

    outputs = tf.keras.layers.Dense(1, activation="softmax")(x)  # this is a pass-through

    return tf.keras.Model(inputs, outputs)

# Define the learning rate scheduler
def lr_scheduler(epoch, lr, warmup_epochs=30, decay_epochs=100, initial_lr=1e-6, base_lr=1e-3, min_lr=5e-5):
    if epoch <= warmup_epochs:
        pct = epoch / warmup_epochs
        return ((base_lr - initial_lr) * pct) + initial_lr

    if epoch > warmup_epochs and epoch < warmup_epochs+decay_epochs:
        pct = 1 - ((epoch - warmup_epochs) / decay_epochs)
        return ((base_lr - min_lr) * pct) + min_lr

    return min_lr

def fetch_ticker_data(symbol, start_date, end_date):
    """Fetches stock data for a given symbol using yfinance."""
    ticker = yf.Ticker(symbol)
    data = ticker.history(start='2000-01-01', end=end_date)
    return data

def label_data(data):
    # Calculate the percentage change in price from one day to the next
    data['Percentage Change'] = data['Close'].pct_change()
    data['Percentage Change'] = data['Percentage Change'].shift(-1)
    data['Sentiment'] = pd.Series(np.where(data['Percentage Change'] > 0.025, 1, np.where(data['Percentage Change'] < -0.025, -1, 0)), index=data.index)
    # Drop any rows with missing values
    data.dropna(inplace=True)
    data.drop('Percentage Change',axis=1 , inplace=True)
    return data

def train_transformer(symbol_to_fetch,start_date ,end_date,no_model = None):
    #fetching data 
    stock = fetch_ticker_data(symbol_to_fetch, start_date, end_date)
    stock = stock.fillna(method="ffill", axis=0)
    stock = stock.fillna(method="bfill", axis=0)
    # stock.index = stock.index.date

    # Split the data into training and test sets
    train_data_index = np.searchsorted(stock.index.values, np.datetime64(start_date))
    train_data = stock.iloc[:train_data_index]
    test_data = stock.loc[start_date:]
    train_data = label_data(train_data)
    test_data = label_data(test_data)

    #trian & test data
    X_train_data = train_data.iloc[:,:-1]
    y_train_data = train_data.iloc[:,-1]
    X_test_data = test_data.iloc[:,:-1]
    y_test_data = test_data.iloc[:,-1]
    print(len(X_test_data))
    # Normalize the data
    normalizer = MinMaxScaler()
    X_train_data_normalizer = normalizer.fit_transform(X_train_data)
    X_test_data_normalizer = normalizer.transform(X_test_data)

    # # Reshape X_train_data_normalizer
    X_train = X_train_data_normalizer.reshape(X_train_data_normalizer.shape[0], X_train_data_normalizer.shape[1], 1)
    X_test = X_test_data_normalizer.reshape(X_test_data_normalizer.shape[0], X_test_data_normalizer.shape[1], 1)
    if not no_model :
        model = build_model(
            input_shape,
            head_size=head_size,
            num_heads=num_heads,
            ff_dim=ff_dim,
            num_transformer_blocks=num_transformer_blocks,
            mlp_units=mlp_units,
            mlp_dropout=mlp_dropout,
            dropout=dropout,
        )

        model.compile(
            loss="mean_squared_error",
            optimizer=tf.keras.optimizers.Adam(learning_rate=1e-4),
            metrics=["mean_squared_error"],
        )

        callbacks = [
            tf.keras.callbacks.EarlyStopping(patience=10, restore_best_weights=True),
            tf.keras.callbacks.LearningRateScheduler(lr_scheduler)
        ]

        # model.summary()
        history = model.fit(
            X_train,
            y_train_data,
            validation_split=0.2,
            epochs=100,
            batch_size=20,
            callbacks=callbacks,
        )
        model.save('models/transformer_'+f"{symbol_to_fetch}"+"_model.h5")
        model.save('models/transformer_'+f"{symbol_to_fetch}"+"_model.keras")
        no_model = model
    return no_model,X_test,test_data
    #predictions
def prepare_sentiment_from_transformer(symbol_to_fetch,start_date,end_date):
    try :
        print("entered")
        model = tf.keras.models.load_model('models/transformer_'+f"{symbol_to_fetch}"+"_model.keras")
        print("passed")
        _,X_test,test_data = train_transformer(symbol_to_fetch = symbol_to_fetch, start_date = start_date, end_date = end_date,no_model=model)
    
    except:
        model,X_test,test_data = train_transformer(symbol_to_fetch = symbol_to_fetch, start_date = start_date, end_date = end_date)
    y_pred = model.predict(X_test) # this is the sentiment data 

    test_data['transformer_sentiment'] = y_pred

    test_data.index = test_data.index.date
    test_data.to_csv('data/transformer_sentiment.csv')
    return test_data,'data/transformer_sentiment.csv'
    """next steps :  we need to additionally train the model if model is already present
    or take nearly 30 stocks and train the model with the huge data 
    or take every 5 mins data nad trian with it, and at last mix the test data with day wise"""

In [2]:
# !pip install transformers
from transformers import pipeline
import sys
sys.path.append("D:\krishna\msdsm//trimister 6\Project\KrishnaProject\AlgoTrading")
from alpaca.client import AlpacaNewsFetcher
from openai import OpenAI


class NewsSentimentAnalysis:
    """
  A class for sentiment analysis of news articles using the Transformers library.

  Attributes:
  - classifier (pipeline): Sentiment analysis pipeline from Transformers.
  """

    def __init__(self):
        """
    Initializes the NewsSentimentAnalysis object.
    """
        self.classifier = pipeline('sentiment-analysis')

    def analyze_sentiment(self, news_article):
        """
    Analyzes the sentiment of a given news article.

    Args:
    - news_article (dict): Dictionary containing 'summary', 'headline', and 'created_at' keys.

    Returns:
    - dict: A dictionary containing sentiment analysis results.
    """
        summary = news_article['summary']
        title = news_article['title']
        timestamp = news_article['timestamp']

        relevant_text = summary + title
        sentiment_result = self.classifier(relevant_text)

        analysis_result = {
            'timestamp': timestamp,
            'title': title,
            'summary': summary,
            'sentiment': sentiment_result
        }

        return analysis_result
    
    def analyze_sentiment_using_mistral(self, symbol,news_article):
        summary = news_article['summary']
        title = news_article['title']
        timestamp = news_article['timestamp']
        relevant_text = summary + title
        prompt = f'''[INST]
        You are a sentiment analysis model that can classify news articles as having a positive or negative sentiment. You will be given a news article, and your task is to determine its sentiment based on the content of the article. Please provide a one-word answer, either "POSITIVE" or "NEGATIVE"
        [/INST]

        [userINST]
        {relevant_text}
        [userINST]

        "[Insert one-word sentiment classification here: "POSITIVE" or "NEGATIVE"]"
         '''


        completion = client.chat.completions.create(
            model="TheBloke/Mistral-7B-Instruct-v0.2-GGUF",
            messages=[
                {"role": "system", "content": prompt},
                {"role": "user", "content": relevant_text}
            ],
            temperature=0.7,
        )

        assistant_response = completion.choices[0].message.content.strip()
        sentiment_result = assistant_response.split()[0]
        analysis_result = {
        'timestamp': timestamp,
        'title': title,
        'summary': summary,
        'sentiment': sentiment_result
    }

        return analysis_result



def do_sentiment_analysis(symbol,start_date,end_date):
    news_fetcher = AlpacaNewsFetcher()

    # Fetch news for AAPL from 2021-01-01 to 2021-12-31
    news_data = news_fetcher.fetch_news(symbol=symbol, start_date=start_date, end_date=end_date)

    
    import pandas as pd
    complete_news = []

    # Initialize the NewsSentimentAnalysis object
    news_sentiment_analyzer = NewsSentimentAnalysis()

    # Assume 'news_data' is a list of news articles (each as a dictionary)
    for article in news_data:

        sentiment_analysis_result = news_sentiment_analyzer.analyze_sentiment(article)
        complete_news.append({'timestamp': sentiment_analysis_result["timestamp"],'title': sentiment_analysis_result["title"],'sentiment': sentiment_analysis_result['sentiment'][0]['label']})
    pd.DataFrame(complete_news).set_index('timestamp').to_csv('data/sentiment_analysis.csv')

    return pd.DataFrame(complete_news),'data/sentiment_analysis.csv'

def do_sentiment_analysis_using_mistral(symbol,start_date,end_date):
    news_fetcher = AlpacaNewsFetcher()

    # Fetch news for AAPL from 2021-01-01 to 2021-12-31
    news_data = news_fetcher.fetch_news(symbol=symbol, start_date=start_date, end_date=end_date)

    import pandas as pd
    complete_news = []

    # Initialize the NewsSentimentAnalysis object
    news_sentiment_analyzer = NewsSentimentAnalysis()

    # Assume 'news_data' is a list of news articles (each as a dictionary)
    for article in news_data:

        sentiment_analysis_result = news_sentiment_analyzer.analyze_sentiment(article)
        complete_news.append({'timestamp': sentiment_analysis_result["timestamp"],'title': sentiment_analysis_result["title"],'sentiment': sentiment_analysis_result['sentiment'][0]['label']})
    pd.DataFrame(complete_news).set_index('timestamp').to_csv('data/sentiment_analysis.csv')

    return 'data/sentiment_analysis.csv'



In [3]:
import yfinance as yf
import pandas as pd
# from sentiment_analysis.sentiment_analysis_pipeline import do_sentiment_analysis
# from sentiment_analysis.sentiment_analysis_trasformer_model import prepare_sentiment_from_transformer

class StockDataProcessor:
    def __init__(self, stock_ticker, start_date, end_date, news_sentiment,ohlc_sentiment):
        self.stock_ticker = stock_ticker
        self.start_date = start_date
        self.end_date = end_date
        # self.sentiment_data_path = sentiment_data_path
        self.news_sentiment = news_sentiment
        self.ohlc_sentiment = ohlc_sentiment
        self.data = self.download_stock_data()

    def download_stock_data(self):
        """
        Download stock data from Yahoo Finance.

        Returns:
            pd.DataFrame: Stock data.
        """
        return yf.download(self.stock_ticker, start=self.start_date, end=self.end_date)

    def preprocess_sentiment_data(self):
        """
        Preprocess sentiment data and merge with stock data.

        Returns:
            pd.DataFrame: Merged DataFrame.
        """
        if self.news_sentiment :
            news_sentiment_data,_ = do_sentiment_analysis(self.stock_ticker, self.start_date, self.end_date)
            # Create a column for buy/sell signals based on sentiment
            news_sentiment_data['signal'] = 0
            news_sentiment_data.loc[news_sentiment_data['sentiment'] == 'POSITIVE', 'signal'] = 1
            news_sentiment_data.loc[news_sentiment_data['sentiment'] == 'NEGATIVE', 'signal'] = -1

            # Assuming df is your existing DataFrame
            news_sentiment_data['timestamp'] = pd.to_datetime(news_sentiment_data['timestamp']).dt.date

            # Group by day and sum up 'Signal' values
            sentiment_daily_sum = news_sentiment_data.groupby('timestamp')['signal'].sum().reset_index()
            sentiment_daily_sum = sentiment_daily_sum.rename(columns={'timestamp': 'date', 'signal': 'signal'})

            sentiment_daily_sum['date'] = pd.to_datetime(sentiment_daily_sum['date'])

            sentiment_daily_sum.to_csv('data/sentiment_daily_sum.csv')
            # Merge DataFrames on 'Date'
            merged_df = pd.merge(self.data, sentiment_daily_sum, left_index=True, right_on='date', how='left')
            merged_df.set_index('date', inplace=True)
        
        #sentiment of transformer model
        if self.ohlc_sentiment:
            transformer_sentiment_data,_ = prepare_sentiment_from_transformer(self.stock_ticker, self.start_date, self.end_date)
            print(transformer_sentiment_data.columns)
            transformer_sentiment_data = transformer_sentiment_data["transformer_sentiment"].copy()
            # transformer_sentiment_data['date'] = transformer_sentiment_data.iloc[:,0]
            if self.news_sentiment:
                merged_df = pd.merge(merged_df, transformer_sentiment_data,left_index=True, right_index = True, how ='left')
            else :
                merged_df = pd.merge(self.data, transformer_sentiment_data, left_index=True, right_index = True, how='left')
                # merged_df.set_index('date', inplace=True)
        
            

        merged_df.to_csv('data/merged_df.csv')

        return merged_df


In [4]:
import backtrader as bt

class Customstrategy(bt.Strategy):
    """
    Custom Backtrader strategy with advanced technical indicators and sentiment analysis.

    Parameters:
    - fast_ma (int): Period for the fast moving average.
    - slow_ma (int): Period for the slow moving average.
    - rsi_period (int): Period for the Relative Strength Index (RSI).
    - rsi_oversold (float): RSI level considered as oversold for buying.
    - rsi_overbought (float): RSI level considered as overbought for selling.
    - bollinger_window (int): Period for Bollinger Bands.
    - bollinger_dev (float): Standard deviation factor for Bollinger Bands.
    - ema_window (int): Period for Exponential Moving Average (EMA).
    - envelopes_ema_window (int): Period for EMA used in Envelopes indicator.
    - envelopes_percentage (float): Percentage for Envelopes indicator.
    - macd_short_window (int): Short window period for MACD.
    - macd_long_window (int): Long window period for MACD.
    - macd_signal_window (int): Signal window period for MACD.
    - stochastic_k_window (int): Window period for Stochastic Oscillator %K.
    - stochastic_d_window (int): Window period for Stochastic Oscillator %D.
    """

    params = (
        ("fast_ma", 20),
        ("slow_ma", 50),
        ("rsi_period", 14),
        ("rsi_oversold", 30),
        ("rsi_overbought", 70),
        ("bollinger_window", 20),
        ("bollinger_dev", 2),
        ("ema_window", 20),
        ("envelopes_ema_window", 20),
        ("envelopes_percentage", 5),
        ("macd_short_window", 12),
        ("macd_long_window", 26),
        ("macd_signal_window", 9),
        ("stochastic_k_window", 14),
        ("stochastic_d_window", 3),
    )

    def __init__(self, indicators=None):
        """
        Initializes the AdvancedStrategy.

        Creates and initializes the required technical indicators and sentiment data based on the selected indicators:
        - fast_ma: Fast Simple Moving Average (SMA)
        - slow_ma: Slow Simple Moving Average (SMA)
        - rsi: Relative Strength Index (RSI)
        - bollinger: Bollinger Bands
        - ema: Exponential Moving Average (EMA)
        - macd: Moving Average Convergence Divergence (MACD)
        - stochastic: Stochastic Oscillator
        - envelopes: Envelopes
        """
        self.indicators = indicators or {}
        
        if "ma" in self.indicators:
            self.fast_ma = bt.indicators.SimpleMovingAverage(self.data.close, period=self.params.fast_ma)
        if "ma" in self.indicators:
            self.slow_ma = bt.indicators.SimpleMovingAverage(self.data.close, period=self.params.slow_ma)
        if "rsi" in self.indicators:
            self.rsi = bt.indicators.RelativeStrengthIndex(self.data.close, period=self.params.rsi_period)
        if "bollinger" in self.indicators:
            self.bollinger = bt.indicators.BollingerBands(self.data.close, period=self.params.bollinger_window, devfactor=self.params.bollinger_dev)
        if "ema" in self.indicators:
            self.ema = bt.indicators.ExponentialMovingAverage(self.data.close, period=self.params.ema_window)
        if "macd" in self.indicators:
            self.macd = bt.indicators.MACD(self.data.close, period_me1=self.params.macd_short_window, period_me2=self.params.macd_long_window, period_signal=self.params.macd_signal_window)
        if "stochastic" in self.indicators:
            self.stochastic = bt.indicators.Stochastic(self.data, period=self.params.stochastic_k_window, period_dfast=self.params.stochastic_d_window)
        if "envelopes" in self.indicators:
            self.envelopes = bt.indicators.Envelope(self.data.close, period=self.params.envelopes_ema_window, devfactor=self.params.envelopes_percentage/100)

        self.sentiment = self.datas[0].signal if len(self.datas) > 0 else None

    def next(self):
        """
        Executes the trading logic on each iteration.
        """
        buy_signal = sell_signal = 0

        if "ma" in self.indicators and self.fast_ma > self.slow_ma:
            buy_signal += 1
        else:
            sell_signal += 1

        if "rsi" in self.indicators and self.rsi[0] < self.params.rsi_oversold:
            buy_signal += 1
        elif "rsi" in self.indicators and self.rsi[0] > self.params.rsi_overbought:
            sell_signal += 1

        if "bollinger" in self.indicators and self.data.close[0] < self.bollinger.lines.bot[0]:
            buy_signal += 1
        elif "bollinger" in self.indicators and self.data.close[0] > self.bollinger.lines.top[0]:
            sell_signal += 1

        if "ema" in self.indicators and self.data.close[0] > self.ema[0]:
            buy_signal += 1
        elif "ema" in self.indicators and self.data.close[0] < self.ema[0]:
            sell_signal += 1

        if "macd" in self.indicators and self.macd[0] > 0:
            buy_signal += 1
        elif "macd" in self.indicators and self.macd[0] < 0:
            sell_signal += 1

        if "stochastic" in self.indicators and self.stochastic[0] < self.stochastic.lines.d[0]:
            buy_signal += 1
        elif "stochastic" in self.indicators and self.stochastic[0] > self.stochastic.lines.d[0]:
            sell_signal += 1

        if "envelopes" in self.indicators and self.data.close[0] > self.envelopes.lines.erveh[0]:
            sell_signal += 1
        elif "envelopes" in self.indicators and self.data.close[0] < self.envelopes.lines.ervlo[0]:
            buy_signal += 1
        print(self.sentiment)
        if "news_sentiment" in self.indicators and self.sentiment is not None and self.sentiment[0] > 0:
            buy_signal += 1
        elif "news_sentiment" in self.indicators and self.sentiment is not None and self.sentiment[0] < 0:
            sell_signal += 1
        
        if "transformer_sentiment" in self.indicators and self.sentiment is not None and self.sentiment[1] > 0:
            buy_signal += 1
        else :
            sell_signal += 1

        if buy_signal > sell_signal:
            self.buy()
        elif sell_signal > buy_signal:
            self.sell()


In [5]:
import backtrader as bt

# from strategies.technical_with_sentiment_strategy.optimized_strategy import OptimizedStrategy
# from strategies.technical_with_sentiment_strategy.custom_strategy import Customstrategy
# from strategies.technical_with_sentiment_strategy.sentiment_data import SentimentData
class SentimentData(bt.feeds.GenericCSVData):
    """
    Custom Backtrader data feed class for sentiment data.

    Parameters:
    - dtformat (str): Date format for parsing the date column.
    - date (int): Column index for the date in the CSV file.
    - signal (int): Column index for the sentiment signal in the CSV file.
    - transformer_sentiment (int): Column index for the sentiment signal from the transformer model in the CSV file.
    - openinterest (int): Column index for the open interest in the CSV file.
    """

    lines = ('signal', 'transformer_sentiment')

    params = (
        ('dtformat', '%Y-%m-%d'),
        ('date', 0),
        ('signal', 7),
        ('transformer_sentiment', 8),
        ('openinterest', -1)
    )



class BacktestRunner:
    @staticmethod
    def run_backtest(data, stock_ticker, start_date, end_date,indicators):
        """
        Run Backtrader backtest with the provided data.

        Args:
            data (pd.DataFrame): Merged stock and sentiment data.
            stock_ticker (str): Stock Ticker name.
            start_date (str): Start date for backtesting.
            end_date (str): End date for backtesting.
        """
        cerebro = bt.Cerebro()

        # Convert data to Backtrader format
        data_feed = SentimentData(dataname= data)


        # Add data to cerebro
        cerebro.adddata(data_feed)

        # # Add strategy with parameters
        # cerebro.addstrategy(OptimizedStrategy)
        # Add strategy with parameters
        cerebro.addstrategy(Customstrategy, indicators=indicators)

        # Set initial cash and commission
        cerebro.broker.set_cash(100000)
        cerebro.broker.setcommission(commission=0.001)

        # Add built-in analyzers
        cerebro.addanalyzer(bt.analyzers.Returns)
        cerebro.addanalyzer(bt.analyzers.SharpeRatio, riskfreerate=0.0)
        cerebro.addanalyzer(bt.analyzers.DrawDown)
        cerebro.addanalyzer(bt.analyzers.TradeAnalyzer)
        cerebro.addanalyzer(bt.analyzers.SQN)
        cerebro.addanalyzer(bt.analyzers.VWR)
        cerebro.addanalyzer(bt.analyzers.PyFolio)

        thestrats = cerebro.run()
        thestrat = thestrats[0]

        # Get results from analyzers
        returns = thestrat.analyzers.returns.get_analysis()
        # returns = returns - 0.005
        sharpe_ratio = thestrat.analyzers.sharperatio.get_analysis()
        drawdown = thestrat.analyzers.drawdown.get_analysis()
        trades = thestrat.analyzers.tradeanalyzer.get_analysis()
        sqn = thestrat.analyzers.sqn.get_analysis()
        vwr = thestrat.analyzers.vwr.get_analysis()
        pyfolio = thestrat.analyzers.getbyname('pyfolio')

        pyfolio_returns, positions, transactions, gross_lev = pyfolio.get_pf_items()

        # Print the backtesting report
        print("\n--- Backtesting Report ---")
        print(f"Indicators: {', '.join(indicators)}")
        print("Stock Ticker: {}".format(stock_ticker))
        print("Start Date: {}".format(start_date))
        print("End Date: {}".format(end_date))
        print("Initial Portfolio Value: ${:.2f}".format(cerebro.broker.startingcash))
        print("Final Portfolio Value: ${:.2f}".format(cerebro.broker.getvalue()))
        print("Total Return: {:.2f}%".format(returns['rtot'] * 100))
        print("Annualized Return: {:.2f}%".format(returns['ravg'] * 100 * 252))  # Assuming 252 trading days in a year
        print("Max Drawdown: {:.2f}%".format(drawdown['max']['drawdown'] * 100))

        # Print Additional Metrics
        print("\n--- Additional Metrics ---")
        print("{:<15} {:<15} {:<15}".format("Value at Risk", "VWR", "Total Trades"))
        print("{:<15.2f} {:<15.4f} {:<15}".format(vwr['vwr'], vwr['vwr'], trades.total.total))

        # Create a dictionary to store the results
        results = {
            'Indicators': ', '.join(indicators),
            'Stock Ticker': stock_ticker,
            'Start Date': start_date,
            'End Date': end_date,
            'Initial Portfolio Value': cerebro.broker.startingcash,
            'Final Portfolio Value': cerebro.broker.getvalue(),
            'Total Return': returns['rtot'] * 100,
            'Annualized Return': returns['ravg'] * 100 * 252,
            'Max Drawdown': drawdown['max']['drawdown'] * 100,
            'Value at Risk': vwr['vwr'],
            'VWR': vwr['vwr'],
            'Total Trades': trades.total.total
        }
        return results

In [11]:
import os
import pandas as pd
import itertools
import warnings
warnings.filterwarnings("ignore")
# from processor.stock_data_processor import StockDataProcessor
# from runner.backtest_runner import BacktestRunner
# from sentiment_analysis.sentiment_analysis_pipeline import do_sentiment_analysis
# from sentiment_analysis.sentiment_analysis_trasformer_model import preprae_sentiment_from_transformer

if __name__ == '__main__':
    # Configuration
    STOCK_TICKER = 'MSFT'
    START_DATE = '2023-01-01'
    END_DATE = '2023-12-12'
    news_sentiment = True
    ohlc_sentiment = True
    SENTIMENT_DATA_PATH = 'data/stock_sentiment_data.csv'



    # Create output directory
    os.makedirs('output', exist_ok=True)

    # Initialize the StockDataProcessor
    processor = StockDataProcessor(STOCK_TICKER, START_DATE, END_DATE, news_sentiment,ohlc_sentiment)

    # Download stock data
    stock_data = processor.download_stock_data()
    # Preprocess sentiment data and merge with stock data
    merged_df = processor.preprocess_sentiment_data()
    # Run backtest
    # BacktestRunner.run_backtest('data/merged_df.csv', STOCK_TICKER, START_DATE, END_DATE)


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
No model was supplied, defaulted to distilbert/distilbert-base-uncased-finetuned-sst-2-english and revision af0f99b (https://huggingface.co/distilbert/distilbert-base-uncased-finetuned-sst-2-english).
Using a pipeline without specifying a model name and revision in production is not recommended.


entered
passed
236
Index(['Open', 'High', 'Low', 'Close', 'Volume', 'Dividends', 'Stock Splits',
       'Sentiment', 'transformer_sentiment'],
      dtype='object')


In [15]:
df = merged_df.copy()

In [19]:
merged_df.fillna(0,inplace = True)

In [20]:
merged_df

Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume,signal,transformer_sentiment
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2023-01-03,243.080002,245.750000,237.399994,239.580002,237.036011,25740000,-2,1.0
2023-01-04,232.279999,232.869995,225.960007,229.100006,226.667282,50623400,-10,1.0
2023-01-05,227.199997,227.550003,221.759995,222.309998,219.949371,39585600,-9,1.0
2023-01-06,223.000000,225.759995,219.350006,224.929993,222.541550,43613600,-2,1.0
2023-01-09,226.449997,231.240005,226.410004,227.119995,224.708313,27369800,-6,1.0
...,...,...,...,...,...,...,...,...
2023-12-05,366.450012,373.079987,365.619995,372.519989,371.832367,23065000,4,1.0
2023-12-06,373.540009,374.179993,368.029999,368.799988,368.119232,21182100,2,1.0
2023-12-07,368.230011,371.450012,366.320007,370.950012,370.265289,23118900,-2,1.0
2023-12-08,369.200012,374.459991,368.230011,374.230011,373.539246,20144800,-1,1.0


In [21]:

# Define the list of indicators to use
indicators = ['rsi','bollinger','macd','ema','fast_ma',"news_sentiment",'transformer_sentiment']

# Generate all possible combinations of indicators
indicator_combinations = list(itertools.chain.from_iterable(itertools.combinations(indicators, r) for r in range(1, len(indicators)+1)))

results_df = []
# Run backtest for each combination of indicators
for indicators in indicator_combinations:
    print(f"Running backtest for indicators: {', '.join(indicators)}")
    results = BacktestRunner.run_backtest('data/merged_df.csv', STOCK_TICKER, START_DATE, END_DATE, indicators)
    
    # Convert the results dictionary to a DataFrame
    results_df.append(results)

    # Save the results DataFrame to an Excel file
    pd.DataFrame(results_df).to_excel('output/backtest_results_'+STOCK_TICKER+'_'+str(START_DATE)+'_'+str(END_DATE)+'.xlsx')


Running backtest for indicators: rsi
<backtrader.linebuffer.LineBuffer object at 0x0000027F5B641280>
<backtrader.linebuffer.LineBuffer object at 0x0000027F5B641280>
<backtrader.linebuffer.LineBuffer object at 0x0000027F5B641280>
<backtrader.linebuffer.LineBuffer object at 0x0000027F5B641280>
<backtrader.linebuffer.LineBuffer object at 0x0000027F5B641280>
<backtrader.linebuffer.LineBuffer object at 0x0000027F5B641280>
<backtrader.linebuffer.LineBuffer object at 0x0000027F5B641280>
<backtrader.linebuffer.LineBuffer object at 0x0000027F5B641280>
<backtrader.linebuffer.LineBuffer object at 0x0000027F5B641280>
<backtrader.linebuffer.LineBuffer object at 0x0000027F5B641280>
<backtrader.linebuffer.LineBuffer object at 0x0000027F5B641280>
<backtrader.linebuffer.LineBuffer object at 0x0000027F5B641280>
<backtrader.linebuffer.LineBuffer object at 0x0000027F5B641280>
<backtrader.linebuffer.LineBuffer object at 0x0000027F5B641280>
<backtrader.linebuffer.LineBuffer object at 0x0000027F5B641280>
<ba

IndexError: array index out of range