In [None]:
import pandas as pd
import numpy as np

# Assume df and dfm are already loaded

# Data processing module
def process_data(df, dfm):
    # Align time
    df['time'] = pd.to_datetime(df['createDate']).dt.ceil('T')
    dfm['asoftime'] = pd.to_datetime(dfm['asoftime'])

    # Find the nearest trading time
    df['trade_time'] = df['time'].apply(lambda x: dfm['asoftime'][dfm['asoftime'] >= x].min())

    # Merge price data
    df = pd.merge(df, dfm[['asoftime', 'close']], left_on='trade_time', right_on='asoftime', how='left')
    df = df.drop('asoftime', axis=1)

    # Save processed data to Excel
    df.to_excel('processed_data.xlsx', index=False)

    return df

# Execute data processing
df = process_data(df, dfm)
print("Data processing completed. Results saved to 'processed_data.xlsx'.")

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import timedelta
import plotly.graph_objects as go
from plotly.subplots import make_subplots

class SentimentBacktest:
    def __init__(self, df, dfm, thres1, thres2, T, r_pt, r_sl):
        self.df = df
        self.dfm = dfm
        self.thres1 = thres1
        self.thres2 = thres2
        self.T = T
        self.r_pt = r_pt
        self.r_sl = r_sl
        
        self.trades = self._backtest()
        self.metrics = self._calculate_metrics(self.trades)
        self.long_metrics = self._calculate_metrics(self.trades[self.trades['position'] == 1])
        self.short_metrics = self._calculate_metrics(self.trades[self.trades['position'] == -1])
    
    def _backtest(self):
        trades = []
        position = 0
        entry_price = 0
        entry_time = None
        
        for i, row in self.df.iterrows():
            if pd.isnull(row['close']):
                continue
            
            # Check if we need to close the position
            if position != 0:
                # Time exit
                if row['trade_time'] >= entry_time + timedelta(minutes=self.T):
                    exit_price = row['close']
                    trades.append({
                        'entry_time': entry_time,
                        'exit_time': row['trade_time'],
                        'entry_price': entry_price,
                        'exit_price': exit_price,
                        'position': position,
                        'exit_reason': 'Time exit'
                    })
                    position = 0
                
                # Profit exit
                elif (position == 1 and row['close'] / entry_price - 1 >= self.r_pt) or \
                     (position == -1 and 1 - row['close'] / entry_price >= self.r_pt):
                    exit_price = row['close']
                    trades.append({
                        'entry_time': entry_time,
                        'exit_time': row['trade_time'],
                        'entry_price': entry_price,
                        'exit_price': exit_price,
                        'position': position,
                        'exit_reason': 'Profit exit'
                    })
                    position = 0
                
                # Stop-loss exit
                elif (position == 1 and row['close'] / entry_price - 1 <= self.r_sl) or \
                     (position == -1 and 1 - row['close'] / entry_price <= self.r_sl):
                    exit_price = row['close']
                    trades.append({
                        'entry_time': entry_time,
                        'exit_time': row['trade_time'],
                        'entry_price': entry_price,
                        'exit_price': exit_price,
                        'position': position,
                        'exit_reason': 'Stop-loss exit'
                    })
                    position = 0
                
                # Reversal exit
                elif (position == 1 and row['FinBERT_sentiment_title'] <= self.thres2) or \
                     (position == -1 and row['FinBERT_sentiment_title'] >= self.thres1):
                    exit_price = row['close']
                    trades.append({
                        'entry_time': entry_time,
                        'exit_time': row['trade_time'],
                        'entry_price': entry_price,
                        'exit_price': exit_price,
                        'position': position,
                        'exit_reason': 'Reversal exit'
                    })
                    position = 0
                    
                    # Reverse position
                    if row['time'] == row['trade_time']:
                        if row['FinBERT_sentiment_title'] > self.thres1:
                            position = 1
                            entry_price = row['close']
                            entry_time = row['trade_time']
                        elif row['FinBERT_sentiment_title'] < self.thres2:
                            position = -1
                            entry_price = row['close']
                            entry_time = row['trade_time']
            
            # Open position
            elif row['time'] == row['trade_time']:
                if row['FinBERT_sentiment_title'] > self.thres1:
                    position = 1
                    entry_price = row['close']
                    entry_time = row['trade_time']
                elif row['FinBERT_sentiment_title'] < self.thres2:
                    position = -1
                    entry_price = row['close']
                    entry_time = row['trade_time']
        
        # Close the last position
        if position != 0:
            exit_price = self.dfm['close'].iloc[-1]
            trades.append({
                'entry_time': entry_time,
                'exit_time': self.dfm['asoftime'].iloc[-1],
                'entry_price': entry_price,
                'exit_price': exit_price,
                'position': position,
                'exit_reason': 'End of backtest'
            })
        
        return pd.DataFrame(trades)

    def _calculate_metrics(self, trades):
        trades['return'] = np.where(trades['position'] == 1, 
                                    trades['exit_price'] / trades['entry_price'] - 1,
                                    1 - trades['exit_price'] / trades['entry_price'])
        
        total_return = (1 + trades['return']).prod() - 1
        sharpe_ratio = np.sqrt(252) * trades['return'].mean() / trades['return'].std()
        
        cumulative_returns = (1 + trades['return']).cumprod()
        max_drawdown = (cumulative_returns.cummax() - cumulative_returns).max()
        
        win_rate = (trades['return'] > 0).mean()
        profit_factor = trades[trades['return'] > 0]['return'].sum() / abs(trades[trades['return'] < 0]['return'].sum())
        
        return {
            'Total Return': total_return,
            'Sharpe Ratio': sharpe_ratio,
            'Max Drawdown': max_drawdown,
            'Win Rate': win_rate,
            'Profit Factor': profit_factor
        }

    def plot_cumulative_returns(self):
        self.trades['cumulative_return'] = (1 + self.trades['return']).cumprod()
        
        plt.figure(figsize=(12, 6))
        sns.lineplot(x='exit_time', y='cumulative_return', data=self.trades)
        plt.title('Cumulative Returns')
        plt.xlabel('Date')
        plt.ylabel('Cumulative Return')
        
        for i, trade in self.trades.iterrows():
            color = 'g' if trade['position'] == 1 else 'r'
            plt.scatter(trade['entry_time'], trade['cumulative_return'], color=color, marker='^')
            plt.scatter(trade['exit_time'], trade['cumulative_return'], color=color, marker='v')
        
        plt.tight_layout()
        plt.savefig('cumulative_returns.png')
        plt.show()
        plt.close()

    def plot_interactive_cumulative_returns(self):
        self.trades['cumulative_return'] = (1 + self.trades['return']).cumprod()
        
        fig = make_subplots(specs=[[{"secondary_y": True}]])
        
        fig.add_trace(
            go.Scatter(x=self.trades['exit_time'], y=self.trades['cumulative_return'], name="Cumulative Return"),
            secondary_y=False,
        )
        
        for i, trade in self.trades.iterrows():
            color = 'green' if trade['position'] == 1 else 'red'
            symbol = 'triangle-up' if trade['position'] == 1 else 'triangle-down'
            
            fig.add_trace(
                go.Scatter(
                    x=[trade['entry_time'], trade['exit_time']],
                    y=[trade['cumulative_return'], trade['cumulative_return']],
                    mode='markers',
                    marker=dict(color=color, symbol=symbol, size=10),
                    name=f"Trade {i+1}",
                    text=[f"Entry: {trade['entry_time']}<br>Price: {trade['entry_price']:.2f}",
                          f"Exit: {trade['exit_time']}<br>Price: {trade['exit_price']:.2f}<br>Return: {trade['return']:.2%}<br>Reason: {trade['exit_reason']}"],
                    hoverinfo='text'
                ),
                secondary_y=False,
            )
        
        fig.update_layout(
            title_text="Cumulative Returns with Trade Points",
            xaxis_title="Date",
            yaxis_title="Cumulative Return",
            hovermode="closest"
        )
        
        fig.write_html("interactive_cumulative_returns.html")
        fig.show()

    def save_results(self):
        self.trades.to_csv('trades.csv', index=False)
        pd.DataFrame(self.metrics, index=[0]).to_csv('metrics.csv', index=False)
        pd.DataFrame(self.long_metrics, index=[0]).to_csv('long_metrics.csv', index=False)
        pd.DataFrame(self.short_metrics, index=[0]).to_csv('short_metrics.csv', index=False)
        self.plot_cumulative_returns()
        self.plot_interactive_cumulative_returns()

    def print_results(self):
        print("Overall Metrics:")
        print(self.metrics)
        print("\nLong Trades Metrics:")
        print(self.long_metrics)
        print("\nShort Trades Metrics:")
        print(self.short_metrics)

In [None]:
# Assume df is loaded from 'processed_data.xlsx'
df = pd.read_excel('processed_data.xlsx')

# Initialize and run the backtest
backtest = SentimentBacktest(df, dfm, thres1=0.5, thres2=-0.5, T=60, r_pt=0.02, r_sl=-0.01)

# Save and print results
backtest.save_results()
backtest.print_results()

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import timedelta
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import time

class SentimentBacktest:
    def __init__(self, df, model, thres1, thres2, T, r_pt, r_sl, mode='mid', show_trades=False):
        self.df = df
        self.model = model
        self.thres1 = thres1
        self.thres2 = thres2
        self.T = T
        self.r_pt = r_pt
        self.r_sl = r_sl
        self.mode = mode
        self.show_trades = show_trades
        
        print("Starting backtest...")
        start_time = time.time()
        self.trades = self._backtest()
        end_time = time.time()
        print(f"Backtest completed in {end_time - start_time:.2f} seconds.")
        
        self.metrics = self._calculate_metrics(self.trades)
        self.long_metrics = self._calculate_metrics(self.trades[self.trades['position'] == 1])
        self.short_metrics = self._calculate_metrics(self.trades[self.trades['position'] == -1])
    
    def _backtest(self):
        trades = []
        position = 0
        entry_price = 0
        entry_time = None
        
        total_rows = len(self.df)
        
        for i, row in self.df.iterrows():
            if pd.isnull(row['close']):
                continue
            
            if i % (total_rows // 100) == 0:
                print(f"Progress: {i/total_rows*100:.2f}%")
            
            # Check if we need to close the position
            if position != 0:
                # Time exit
                if row['trade_time'] >= entry_time + timedelta(minutes=self.T):
                    exit_price = self._get_exit_price(row, position)
                    trade = self._record_trade(entry_time, row['trade_time'], entry_price, exit_price, position, 'Time exit')
                    trades.append(trade)
                    position = 0
                
                # Profit exit
                elif (position == 1 and exit_price / entry_price - 1 >= self.r_pt) or \
                     (position == -1 and 1 - exit_price / entry_price >= self.r_pt):
                    exit_price = self._get_exit_price(row, position)
                    trade = self._record_trade(entry_time, row['trade_time'], entry_price, exit_price, position, 'Profit exit')
                    trades.append(trade)
                    position = 0
                
                # Stop-loss exit
                elif (position == 1 and exit_price / entry_price - 1 <= self.r_sl) or \
                     (position == -1 and 1 - exit_price / entry_price <= self.r_sl):
                    exit_price = self._get_exit_price(row, position)
                    trade = self._record_trade(entry_time, row['trade_time'], entry_price, exit_price, position, 'Stop-loss exit')
                    trades.append(trade)
                    position = 0
                
                # Reversal exit
                elif (position == 1 and row[f"{self.model}_sentiment_title"] <= self.thres2) or \
                     (position == -1 and row[f"{self.model}_sentiment_title"] >= self.thres1):
                    exit_price = self._get_exit_price(row, position)
                    trade = self._record_trade(entry_time, row['trade_time'], entry_price, exit_price, position, 'Reversal exit')
                    trades.append(trade)
                    position = 0
                    
                    # Reverse position
                    if row['time'] == row['trade_time']:
                        if row[f"{self.model}_sentiment_title"] > self.thres1:
                            position = 1
                            entry_price = self._get_entry_price(row, position)
                            entry_time = row['trade_time']
                        elif row[f"{self.model}_sentiment_title"] < self.thres2:
                            position = -1
                            entry_price = self._get_entry_price(row, position)
                            entry_time = row['trade_time']
            
            # Open position
            elif row['time'] == row['trade_time']:
                if row[f"{self.model}_sentiment_title"] > self.thres1:
                    position = 1
                    entry_price = self._get_entry_price(row, position)
                    entry_time = row['trade_time']
                elif row[f"{self.model}_sentiment_title"] < self.thres2:
                    position = -1
                    entry_price = self._get_entry_price(row, position)
                    entry_time = row['trade_time']
        
        # Close the last position
        if position != 0:
            exit_price = self._get_exit_price(self.df.iloc[-1], position)
            trade = self._record_trade(entry_time, self.df['trade_time'].iloc[-1], entry_price, exit_price, position, 'End of backtest')
            trades.append(trade)
        
        return pd.DataFrame(trades)

    def _get_entry_price(self, row, position):
        if self.mode == 'mid':
            return row['close']
        elif self.mode == 'bidask':
            return row['ask'] if position == 1 else row['bid']

    def _get_exit_price(self, row, position):
        if self.mode == 'mid':
            return row['close']
        elif self.mode == 'bidask':
            return row['bid'] if position == 1 else row['ask']

    def _record_trade(self, entry_time, exit_time, entry_price, exit_price, position, exit_reason):
        trade = {
            'entry_time': entry_time,
            'exit_time': exit_time,
            'entry_price': entry_price,
            'exit_price': exit_price,
            'position': position,
            'exit_reason': exit_reason
        }
        if self.show_trades:
            print(trade)
        return trade

    def _calculate_metrics(self, trades):
        trades['return'] = np.where(trades['position'] == 1, 
                                    trades['exit_price'] / trades['entry_price'] - 1,
                                    1 - trades['exit_price'] / trades['entry_price'])
        
        total_return = (1 + trades['return']).prod() - 1
        sharpe_ratio = np.sqrt(252) * trades['return'].mean() / trades['return'].std()
        
        cumulative_returns = (1 + trades['return']).cumprod()
        max_drawdown = (cumulative_returns.cummax() - cumulative_returns).max()
        
        win_rate = (trades['return'] > 0).mean()
        profit_factor = trades[trades['return'] > 0]['return'].sum() / abs(trades[trades['return'] < 0]['return'].sum())
        
        return {
            'Total Return': total_return,
            'Sharpe Ratio': sharpe_ratio,
            'Max Drawdown': max_drawdown,
            'Win Rate': win_rate,
            'Profit Factor': profit_factor
        }

    def plot_cumulative_returns(self):
        self.trades['cumulative_return'] = (1 + self.trades['return']).cumprod()
        
        # Calculate buy and hold returns
        buy_and_hold = self.df[['trade_time', 'close']].copy()
        buy_and_hold['return'] = buy_and_hold['close'].pct_change()
        buy_and_hold['cumulative_return'] = (1 + buy_and_hold['return']).cumprod()
        
        plt.figure(figsize=(12, 6))
        sns.lineplot(x='exit_time', y='cumulative_return', data=self.trades, label='Strategy')
        sns.lineplot(x='trade_time', y='cumulative_return', data=buy_and_hold, label='Buy and Hold')
        plt.title('Cumulative Returns')
        plt.xlabel('Date')
        plt.ylabel('Cumulative Return')
        
        for i, trade in self.trades.iterrows():
            color = 'g' if trade['position'] == 1 else 'r'
            plt.scatter(trade['entry_time'], trade['cumulative_return'], color=color, marker='^')
            plt.scatter(trade['exit_time'], trade['cumulative_return'], color=color, marker='v')
        
        plt.legend()
        plt.tight_layout()
        plt.savefig('cumulative_returns.png')
        plt.show()
        plt.close()

    def plot_interactive_cumulative_returns(self):
        self.trades['cumulative_return'] = (1 + self.trades['return']).cumprod()
        
        # Calculate buy and hold returns
        buy_and_hold = self.df[['trade_time', 'close']].copy()
        buy_and_hold['return'] = buy_and_hold['close'].pct_change()
        buy_and_hold['cumulative_return'] = (1 + buy_and_hold['return']).cumprod()
        
        fig = make_subplots(specs=[[{"secondary_y": True}]])
        
        fig.add_trace(
            go.Scatter(x=self.trades['exit_time'], y=self.trades['cumulative_return'], name="Strategy"),
            secondary_y=False,
        )
        
        fig.add_trace(
            go.Scatter(x=buy_and_hold['trade_time'], y=buy_and_hold['cumulative_return'], name="Buy and Hold"),
            secondary_y=False,
        )
        
        for i, trade in self.trades.iterrows():
            color = 'green' if trade['position'] == 1 else 'red'
            symbol = 'triangle-up' if trade['position'] == 1 else 'triangle-down'
            
            fig.add_trace(
                go.Scatter(
                    x=[trade['entry_time'], trade['exit_time']],
                    y=[trade['cumulative_return'], trade['cumulative_return']],
                    mode='markers',
                    marker=dict(color=color, symbol=symbol, size=10),
                    name=f"Trade {i+1}",
                    text=[f"Entry: {trade['entry_time']}<br>Price: {trade['entry_price']:.2f}",
                          f"Exit: {trade['exit_time']}<br>Price: {trade['exit_price']:.2f}<br>Return: {trade['return']:.2%}<br>Reason: {trade['exit_reason']}"],
                    hoverinfo='text'
                ),
                secondary_y=False,
            )
        
        fig.update_layout(
            title_text="Cumulative Returns with Trade Points",
            xaxis_title="Date",
            yaxis_title="Cumulative Return",
            hovermode="closest"
        )
        
        fig.write_html("interactive_cumulative_returns.html")
        fig.show()

    def save_results(self):
        self.trades.to_csv('trades.csv', index=False)
        pd.DataFrame(self.metrics, index=[0]).to_csv('metrics.csv', index=False)
        pd.DataFrame(self.long_metrics, index=[0]).to_csv('long_metrics.csv', index=False)
        pd.DataFrame(self.short_metrics, index=[0]).to_csv('short_metrics.csv', index=False)
        self.plot_cumulative_returns()
        self.plot_interactive_cumulative_returns()

    def print_results(self):
        print("Overall Metrics:")
        print(self.metrics)
        print("\nLong Trades Metrics:")
        print(self.long_metrics)
        print("\nShort Trades Metrics:")
        print(self.short_metrics)

In [None]:
# Assume df is loaded from 'processed_data.xlsx'
df = pd.read_excel('processed_data.xlsx')

# Initialize and run the backtest
backtest = SentimentBacktest(df, model='FinBERT', thres1=0.5, thres2=-0.5, T=60, r_pt=0.02, r_sl=-0.01, mode='bidask', show_trades=True)

# Save and print results
backtest.save_results()
backtest.print_results()