In [None]:
import pandas as pd
import numpy as np

# Assume df and dfm are already loaded

# Data processing module
def process_data(df, dfm):
    # Align time
    df['time'] = pd.to_datetime(df['createDate']).dt.ceil('T')
    dfm['asoftime'] = pd.to_datetime(dfm['asoftime'])

    # Find the nearest trading time
    df['trade_time'] = df['time'].apply(lambda x: dfm['asoftime'][dfm['asoftime'] >= x].min())

    # Merge price data
    df = pd.merge(df, dfm[['asoftime', 'close', 'bid', 'ask']], left_on='trade_time', right_on='asoftime', how='left')
    df = df.drop('asoftime', axis=1)

    df['Max_sentiment_title'] = df[['FinBERT_sentiment_title', 'Vader_sentiment_title', 'ABSA_sentiment_title']].max(axis=1)
    df['Min_sentiment_title'] = df[['FinBERT_sentiment_title', 'Vader_sentiment_title', 'ABSA_sentiment_title']].min(axis=1)
    df['Mean_sentiment_title'] = df[['FinBERT_sentiment_title', 'Vader_sentiment_title', 'ABSA_sentiment_title']].mean(axis=1)

    return df

def calculate_moving_averages(df, period, MA='SMA'):
    sentiment_columns = ['FinBERT_sentiment_title', 'Vader_sentiment_title', 'ABSA_sentiment_title', 
                         'Max_sentiment_title', 'Min_sentiment_title', 'Mean_sentiment_title']
    
    for col in sentiment_columns:
        if MA == 'SMA':
            df[f'{col}_{MA}_{period}'] = df[col].rolling(window=period).mean()
        elif MA == 'EMA':
            df[f'{col}_{MA}_{period}'] = df[col].ewm(span=period).mean()
    
    return df
    
# Execute data processing
df = process_data(df, dfm)

for MA in ['SMA','EMA']:
    for period in ['1D', '1H']:        
        df = calculate_moving_averages(df, period, MA)

# Save processed data to Excel
df.to_excel('processed_data.xlsx', index=False)
print("Data processing completed. Results saved to 'processed_data.xlsx'.")

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import timedelta
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import time

class SentimentBacktest:
    def __init__(self, df, model, thres1, thres2, T, r_pt, r_sl, mode='mid', show_trades=False, period='', MA=''):
        self.df = df
        self.model = model
        self.thres1 = thres1
        self.thres2 = thres2
        self.T = T
        self.r_pt = r_pt
        self.r_sl = r_sl
        self.mode = mode
        self.show_trades = show_trades
        self.period = period
        self.MA = MA
        
        print("Starting backtest...")
        start_time = time.time()
        self.trades = self._backtest()
        end_time = time.time()
        print(f"Backtest completed in {end_time - start_time:.2f} seconds.")
        
        self.metrics = self._calculate_metrics(self.trades)
        self.long_metrics = self._calculate_metrics(self.trades[self.trades['position'] == 1])
        self.short_metrics = self._calculate_metrics(self.trades[self.trades['position'] == -1])
    
    def _backtest(self):
        trades = []
        position = 0
        entry_price = 0
        entry_time = None
        
        total_rows = len(self.df)

        sentiment_column = f"{self.model}_sentiment_title_{self.MA}_{self.period}" if self.period and self.MA else f"{self.model}_sentiment_title"
        
        for i, row in self.df.iterrows():
            if pd.isnull(row['close']):
                continue
            
            if i % (total_rows // 100) == 0:
                print(f"Progress: {i/total_rows*100:.2f}%")
            
            # Check if we need to close the position
            if position != 0:
                exit_price = self._get_exit_price(row, position)
                
                # Time exit
                if row['trade_time'] >= entry_time + timedelta(minutes=self.T):
                    trade = self._record_trade(entry_time, row['trade_time'], entry_price, exit_price, position, 'Time exit')
                    trades.append(trade)
                    position = 0
                
                # Profit exit
                elif (position == 1 and exit_price / entry_price - 1 >= self.r_pt) or \
                     (position == -1 and 1 - exit_price / entry_price >= self.r_pt):
                    trade = self._record_trade(entry_time, row['trade_time'], entry_price, exit_price, position, 'Profit exit')
                    trades.append(trade)
                    position = 0
                
                # Stop-loss exit
                elif (position == 1 and exit_price / entry_price - 1 <= self.r_sl) or \
                     (position == -1 and 1 - exit_price / entry_price <= self.r_sl):
                    trade = self._record_trade(entry_time, row['trade_time'], entry_price, exit_price, position, 'Stop-loss exit')
                    trades.append(trade)
                    position = 0
                
                # Reversal exit
                elif (position == 1 and row[sentiment_column] <= self.thres1) or \
                     (position == -1 and row[sentiment_column] >= self.thres2):
                    trade = self._record_trade(entry_time, row['trade_time'], entry_price, exit_price, position, 'Reversal exit')
                    trades.append(trade)
                    position = 0
                    
                    # Reverse position
                    if row['time'] == row['trade_time']:
                        if row[sentiment_column] > self.thres1:
                            position = 1
                            entry_price = self._get_entry_price(row, position)
                            entry_time = row['trade_time']
                        elif row[sentiment_column] < self.thres2:
                            position = -1
                            entry_price = self._get_entry_price(row, position)
                            entry_time = row['trade_time']
            
            # Open position
            elif row['time'] == row['trade_time']:
                if row[fsentiment_column] > self.thres1:
                    position = 1
                    entry_price = self._get_entry_price(row, position)
                    entry_time = row['trade_time']
                elif row[sentiment_column] < self.thres2:
                    position = -1
                    entry_price = self._get_entry_price(row, position)
                    entry_time = row['trade_time']
        
        # Close the last position
        if position != 0:
            last_valid_row = self.df.iloc[::-1].dropna(subset=['close']).iloc[0]
            exit_price = self._get_exit_price(last_valid_row, position)
            trade = self._record_trade(entry_time, last_valid_row['trade_time'], entry_price, exit_price, position, 'End of backtest')
            trades.append(trade)
        
        return pd.DataFrame(trades)

    def _get_entry_price(self, row, position):
        if self.mode == 'mid':
            return row['close']
        elif self.mode == 'bidask':
            return row['ask'] if position == 1 else row['bid']

    def _get_exit_price(self, row, position):
        if self.mode == 'mid':
            return row['close']
        elif self.mode == 'bidask':
            return row['bid'] if position == 1 else row['ask']

    def _record_trade(self, entry_time, exit_time, entry_price, exit_price, position, exit_reason):
        trade = {
            'entry_time': entry_time,
            'exit_time': exit_time,
            'entry_price': entry_price,
            'exit_price': exit_price,
            'position': position,
            'exit_reason': exit_reason
        }
        if self.show_trades:
            print(f"Trade: {trade['entry_time']} -> {trade['exit_time']}, Entry: {trade['entry_price']:.4f}, Exit: {trade['exit_price']:.4f}, Position: {trade['position']}, Reason: {trade['exit_reason']}")
        return trade

    def _calculate_metrics(self, trades):
        trades['return'] = np.where(trades['position'] == 1, 
                                    trades['exit_price'] / trades['entry_price'] - 1,
                                    1 - trades['exit_price'] / trades['entry_price'])
        
        total_return = (1 + trades['return']).prod() - 1
        sharpe_ratio = np.sqrt(252) * trades['return'].mean() / trades['return'].std()
        
        cumulative_returns = (1 + trades['return']).cumprod()
        max_drawdown = (cumulative_returns.cummax() - cumulative_returns).max()
        
        win_rate = (trades['return'] > 0).mean()
        pl_ratio = trades[trades['return'] > 0]['return'].mean() / abs(trades[trades['return'] < 0]['return'].mean())
        risk_return_ratio = win_rate/(1-win_rate)*pl_ratio
        return {
            'Total Return': total_return,
            'Sharpe Ratio': sharpe_ratio,
            'Max Drawdown': max_drawdown,
            'Win Rate': win_rate,
            'PL Ratio': pl_ratio,
            'Risk Return Ratio':risk_return_ratio
        }

    def plot_cumulative_returns(self):
        self.trades['cumulative_return'] = (1 + self.trades['return']).cumprod()
        
        # Calculate buy and hold returns
        buy_and_hold = self.df[['trade_time', 'close']].copy()
        buy_and_hold['return'] = buy_and_hold['close'].pct_change()
        buy_and_hold['cumulative_return'] = (1 + buy_and_hold['return']).cumprod()
        
        plt.figure(figsize=(12, 6))
        sns.lineplot(x='exit_time', y='cumulative_return', data=self.trades, label='Strategy')
        sns.lineplot(x='trade_time', y='cumulative_return', data=buy_and_hold, label='Buy and Hold')
        plt.title('Cumulative Returns')
        plt.xlabel('Date')
        plt.ylabel('Cumulative Return')
        
        for i, trade in self.trades.iterrows():
            color = 'g' if trade['position'] == 1 else 'r'
            plt.scatter(trade['entry_time'], trade['cumulative_return'], color=color, marker='^')
            plt.scatter(trade['exit_time'], trade['cumulative_return'], color=color, marker='v')
        
        plt.legend()
        plt.tight_layout()
        plt.savefig('cumulative_returns.png')
        plt.show()
        plt.close()

    def plot_interactive_cumulative_returns(self):
        self.trades['cumulative_return'] = (1 + self.trades['return']).cumprod()
        
        # Calculate buy and hold returns
        buy_and_hold = self.df[['trade_time', 'close']].copy()
        buy_and_hold['return'] = buy_and_hold['close'].pct_change()
        buy_and_hold['cumulative_return'] = (1 + buy_and_hold['return']).cumprod()
        
        fig = make_subplots(specs=[[{"secondary_y": True}]])
        
        fig.add_trace(
            go.Scatter(x=self.trades['exit_time'], y=self.trades['cumulative_return'], name="Strategy"),
            secondary_y=False,
        )
        
        fig.add_trace(
            go.Scatter(x=buy_and_hold['trade_time'], y=buy_and_hold['cumulative_return'], name="Buy and Hold"),
            secondary_y=False,
        )
        
        for i, trade in self.trades.iterrows():
            color = 'green' if trade['position'] == 1 else 'red'
            symbol = 'triangle-up' if trade['position'] == 1 else 'triangle-down'
            
            fig.add_trace(
                go.Scatter(
                    x=[trade['entry_time'], trade['exit_time']],
                    y=[trade['cumulative_return'], trade['cumulative_return']],
                    mode='markers',
                    marker=dict(color=color, symbol=symbol, size=10),
                    name=f"Trade {i+1}",
                    text=[f"Entry: {trade['entry_time']}<br>Price: {trade['entry_price']:.2f}",
                          f"Exit: {trade['exit_time']}<br>Price: {trade['exit_price']:.2f}<br>Return: {trade['return']:.2%}<br>Reason: {trade['exit_reason']}"],
                    hoverinfo='text'
                ),
                secondary_y=False,
            )
        
        fig.update_layout(
            title_text="Cumulative Returns with Trade Points",
            xaxis_title="Date",
            yaxis_title="Cumulative Return",
            hovermode="closest"
        )
        
        fig.write_html("interactive_cumulative_returns.html")
        fig.show()

    def save_results(self):
        self.trades.to_csv('trades.csv', index=False)
        pd.DataFrame(self.metrics, index=[0]).to_csv('metrics.csv', index=False)
        pd.DataFrame(self.long_metrics, index=[0]).to_csv('long_metrics.csv', index=False)
        pd.DataFrame(self.short_metrics, index=[0]).to_csv('short_metrics.csv', index=False)
        self.plot_cumulative_returns()
        self.plot_interactive_cumulative_returns()

    def print_results(self):
        print("Overall Metrics:")
        print(self.metrics)
        print("\nLong Trades Metrics:")
        print(self.long_metrics)
        print("\nShort Trades Metrics:")
        print(self.short_metrics)

In [None]:
# Assume df is loaded from 'processed_data.xlsx'
df = pd.read_excel('processed_data.xlsx')

# Initialize and run the backtest
backtest = SentimentBacktest(df, model='FinBERT', thres1=0.1, thres2=-0.1, T=60, r_pt=0.005, r_sl=-0.005, mode='close', show_trades=True, period='', MA='')

# Save and print results
backtest.save_results()
backtest.print_results()

## 优化tune。。。

In [None]:
import pandas as pd
import numpy as np
from datetime import timedelta
import matplotlib.pyplot as plt
from itertools import product
import time
import multiprocessing as mp

class SentimentBacktest:
    def __init__(self, df):
        self.df = df

    def run_optimization(self, models, thres1s, thres2s, r_pts, r_sls, Ts, periods, MAs, keywords):
        combinations = self.generate_valid_combinations(models, thres1s, thres2s, r_pts, r_sls, Ts, periods, MAs, keywords)
        
        fig, ax = plt.subplots(figsize=(15, 10))

        # 计算买入并持有策略的收益率
        buy_and_hold_return = (self.df['close'].iloc[-1] / self.df['close'].iloc[0]) - 1
        ax.axhline(y=buy_and_hold_return, color='black', linestyle='--', label='Buy and Hold')

        start_time = time.time()

        # 使用多进程
        with mp.Pool(processes=mp.cpu_count()) as pool:
            results = pool.map(self.run_single_backtest, combinations)

        end_time = time.time()
        print(f"总运行时间: {end_time - start_time:.2f} 秒")
        
        # 过滤掉无效结果并按Win Rate排序
        valid_results = [r for r in results if r['trades'] is not None and not r['trades'].empty]
        sorted_results = sorted(valid_results, key=lambda x: x['performance']['Total Return'], reverse=True)

        # 只处理前10名的结果
        for result in sorted_results[:10]:
            if result['trades'] is not None and not result['trades'].empty:
                trades = result['trades']
                trades['cumulative_return'] = (1 + trades['return']).cumprod() - 1
                label = (f"{result['model']}, t1={result['thres1']}, t2={result['thres2']}, "
                         f"pt={result['r_pt']}, sl={result['r_sl']}, T={result['T']}, "
                         f"p={result['period']}, MA={result['MA']}, kw={result['keyword']}")
                ax.plot(trades['exit_time'], trades['cumulative_return'], label=label)

                print(f"配置: {label}")
                print(f"Win Ratio: {result['performance']['Win Rate']:.4f}")
                print(f"PL Ratio: {result['performance']['PL Ratio']:.4f}")
                print(f"Risk Return Ratio: {result['performance']['Risk Return Ratio']:.4f}")
                print("---")

        ax.set_xlabel('Time')
        ax.set_ylabel('Cumulative Return')
        ax.set_title('Backtest Results')
        ax.legend(bbox_to_anchor=(1.05, 1), loc='upper left', borderaxespad=0.)
        plt.tight_layout()
        plt.show()

        return pd.DataFrame([r for r in results if r['trades'] is not None])

    def generate_valid_combinations(self, models, thres1s, thres2s, r_pts, r_sls, Ts, periods, MAs, keywords):
        valid_combinations = []
        for model, thres1, thres2, r_pt, r_sl, T, keyword in product(models, thres1s, thres2s, r_pts, r_sls, Ts, keywords):
            valid_combinations.append((model, thres1, thres2, r_pt, r_sl, T, '', '', keyword))
            for period, MA in product(periods, MAs):
                if period and MA:
                    valid_combinations.append((model, thres1, thres2, r_pt, r_sl, T, period, MA, keyword))
        return valid_combinations

    def run_single_backtest(self, params):
        model, thres1, thres2, r_pt, r_sl, T, period, MA, keyword = params
        
        # 根据keyword筛选数据
        df_filtered = self.df[self.df['keyword'] == keyword] if keyword else self.df

        backtest = SentimentBacktest(
            df=df_filtered,
            model=model,
            thres1=thres1,
            thres2=thres2,
            r_pt=r_pt,
            r_sl=r_sl,
            T=T,
            show_trades=False,
            period=period,
            MA=MA
        )

        trades, performance = backtest.run()

        return {
            'model': model,
            'thres1': thres1,
            'thres2': thres2,
            'r_pt': r_pt,
            'r_sl': r_sl,
            'T': T,
            'period': period,
            'MA': MA,
            'keyword': keyword,
            'trades': trades,
            'performance': performance
        }

    # ... (其他方法保持不变)

# 使用示例
if __name__ == '__main__':
    df = pd.read_csv('your_data.csv')  # 请替换为您的数据文件
    df['trade_time'] = pd.to_datetime(df['trade_time'])

    backtest = SentimentBacktest(df)

    optimization_results = backtest.run_optimization(
        models=['FinBERT', 'Max', 'Mean'],
        thres1s=[0.3, 0.5],
        thres2s=[-0.3, -0.5],
        r_pts=[0.01, 0.02],
        r_sls=[-0.005, -0.01],
        Ts=[5, 10, 30, 60],
        periods=['1D', '1H'],
        MAs=['SMA', 'EMA'],
        keywords=['A', 'B']  # 添加关键词列表
    )

    print(optimization_results.sort_values('total_return', ascending=False))

In [None]:
# 计算买入并持有策略的累积收益率
self.df['daily_return'] = self.df['close'].pct_change()
self.df['buy_and_hold_return'] = (1 + self.df['daily_return']).cumprod() - 1
ax.plot(self.df.index, self.df['buy_and_hold_return'], color='black', linestyle='--', label='Buy and Hold')