In [12]:
import requests
import pandas as pd
from datetime import datetime, timedelta
import time
import pytz

BACKTEST_DATA_FOLDER = "backtest_data"


def get_binance_klines(symbol, interval, start_time, end_time):
    url = "https://api.binance.com/api/v3/klines"
    params = {
        "symbol": symbol,
        "interval": interval,
        "startTime": int(start_time.timestamp() * 1000),
        "endTime": int(end_time.timestamp() * 1000),
        "limit": 1000,
    }
    response = requests.get(url, params=params)
    data = response.json()
    df = pd.DataFrame(
        data,
        columns=[
            "timestamp",
            "open",
            "high",
            "low",
            "close",
            "volume",
            "close_time",
            "quote_asset_volume",
            "number_of_trades",
            "taker_buy_base_asset_volume",
            "taker_buy_quote_asset_volume",
            "ignore",
        ],
    )
    df["timestamp"] = pd.to_datetime(df["timestamp"], unit="ms", utc=True)
    df["timestamp"] = df["timestamp"].dt.tz_convert("Asia/Tokyo")  # UTCからJSTに変換
    return df[["timestamp", "open", "high", "low", "close", "volume"]]


def get_historical_data(symbol, interval, start_date, end_date):
    all_data = []
    current_date = start_date
    while current_date < end_date:
        next_date = min(current_date + timedelta(days=1), end_date)
        print(f"Fetching data from {current_date} to {next_date}")
        df = get_binance_klines(symbol, interval, current_date, next_date)
        if df.empty:
            print(f"No data found for {symbol} in the specified time range.")
            exit()
        all_data.append(df)
        current_date = next_date
        time.sleep(1)  # APIレート制限を考慮
    return pd.concat(all_data)


# パラメータ設定
symbol = "SOLUSDT"
interval = "5m"
end_date = datetime(
    2024, 8, 11, tzinfo=pytz.timezone("Asia/Tokyo")
)  # 固定の終了日（JST）
start_date = end_date - timedelta(days=180)

# データ取得
historical_data = get_historical_data(symbol, interval, start_date, end_date)

# CSVに保存
historical_data.to_csv(
    f"{BACKTEST_DATA_FOLDER}/{symbol}_{interval}_{start_date.date()}_{end_date.date()}_JST.csv", index=False
)

print(f"Total rows: {len(historical_data)}")

Fetching data from 2024-02-13 00:00:00+09:19 to 2024-02-14 00:00:00+09:19
Fetching data from 2024-02-14 00:00:00+09:19 to 2024-02-15 00:00:00+09:19
Fetching data from 2024-02-15 00:00:00+09:19 to 2024-02-16 00:00:00+09:19
Fetching data from 2024-02-16 00:00:00+09:19 to 2024-02-17 00:00:00+09:19
Fetching data from 2024-02-17 00:00:00+09:19 to 2024-02-18 00:00:00+09:19
Fetching data from 2024-02-18 00:00:00+09:19 to 2024-02-19 00:00:00+09:19
Fetching data from 2024-02-19 00:00:00+09:19 to 2024-02-20 00:00:00+09:19
Fetching data from 2024-02-20 00:00:00+09:19 to 2024-02-21 00:00:00+09:19
Fetching data from 2024-02-21 00:00:00+09:19 to 2024-02-22 00:00:00+09:19
Fetching data from 2024-02-22 00:00:00+09:19 to 2024-02-23 00:00:00+09:19
Fetching data from 2024-02-23 00:00:00+09:19 to 2024-02-24 00:00:00+09:19
Fetching data from 2024-02-24 00:00:00+09:19 to 2024-02-25 00:00:00+09:19
Fetching data from 2024-02-25 00:00:00+09:19 to 2024-02-26 00:00:00+09:19
Fetching data from 2024-02-26 00:00:00

In [10]:
historical_data = pd.read_csv("ETHUSDT_5m_2024-02-13_2024-08-11_JST.csv")

In [14]:
import pandas as pd
import numpy as np
from src.utils.indicators import calculate_rsi, calculate_bollinger_bands


def calculate_performance(trades):
    if not trades:
        return 0, 0, 0, 0
    total_return = sum(trades)
    avg_return = total_return / len(trades)
    win_rate = sum(1 for t in trades if t > 0) / len(trades)
    returns = np.array(trades)
    sharpe_ratio = (returns.mean() / returns.std()) * np.sqrt(252)  # Annualized
    return total_return, avg_return, win_rate, sharpe_ratio


def prepare_data(df):
    # Reset index to ensure it's unique
    df = df.reset_index(drop=True)

    # Convert relevant columns to numeric types
    numeric_columns = ["open", "high", "low", "close", "volume"]
    for col in numeric_columns:
        df[col] = pd.to_numeric(df[col], errors="coerce")

    # Remove existing RSI column if it exists
    if "rsi" in df.columns:
        df = df.drop("rsi", axis=1)

    # Drop any rows with NaN values after conversion
    df = df.dropna(subset=numeric_columns)

    return df


def backtest_rsi_bollinger_strategy(
    df, rsi_period=14, bb_period=20, bb_std=2, rsi_oversold=35, rsi_overbought=70
):
    # Prepare the data
    df = prepare_data(df)

    # Calculate indicators
    df["rsi"] = calculate_rsi(df["close"].values, period=rsi_period)

    # Calculate Bollinger Bands
    bb_upper, bb_middle, bb_lower = calculate_bollinger_bands(
        df["close"].values, period=bb_period, num_std_dev=bb_std
    )

    # Add Bollinger Bands to the DataFrame, aligning with the index
    df["bb_upper"] = pd.Series(bb_upper, index=df.index[: len(bb_upper)])
    df["bb_middle"] = pd.Series(bb_middle, index=df.index[: len(bb_middle)])
    df["bb_lower"] = pd.Series(bb_lower, index=df.index[: len(bb_lower)])

    # Drop rows with NaN values
    df = df.dropna()

    # Initialize variables
    position = None
    entry_price = 0
    trades = []

    for i in range(len(df)):
        if i < max(rsi_period, bb_period):
            continue  # Skip until we have enough data for indicators

        row = df.iloc[i]
        if position is None:
            if row["rsi"] < rsi_oversold and row["close"] < row["bb_lower"]:
                position = "long"
                entry_price = row["close"]
        elif position == "long":
            if row["rsi"] > rsi_overbought or row["close"] > row["bb_upper"]:
                profit = (row["close"] - entry_price) / entry_price
                trades.append(profit)
                position = None

    return trades


# Use the function
trades = backtest_rsi_bollinger_strategy(historical_data)

# Calculate performance metrics
total_return, avg_return, win_rate, sharpe_ratio = calculate_performance(trades)

print(historical_data.describe())

# トレード詳細の表示
print("\nFirst 10 trades:")
print(trades[:10])
print("\nLast 10 trades:")
print(trades[-10:])

print(f"\nBacktesting Results:")
print(f"Total Return: {total_return:.2%}")
print(f"Average Trade Return: {avg_return:.2%}")
print(f"Win Rate: {win_rate:.2%}")
print(f"Sharpe Ratio: {sharpe_ratio:.2f}")
print(f"Number of Trades: {len(trades)}")

                                  timestamp          open          high  \
count                                 51840         51840         51840   
unique                                  NaN          9295          9264   
top                                     NaN  145.00000000  145.00000000   
freq                                    NaN            32            37   
mean    2024-05-12 23:42:29.999999744+09:00           NaN           NaN   
min               2024-02-12 23:45:00+09:00           NaN           NaN   
25%               2024-03-28 23:43:45+09:00           NaN           NaN   
50%               2024-05-12 23:42:30+09:00           NaN           NaN   
75%               2024-06-26 23:41:15+09:00           NaN           NaN   
max               2024-08-10 23:40:00+09:00           NaN           NaN   

                 low         close          volume  
count          51840         51840           51840  
unique          9259          9322           51725  
top     146.000

In [10]:
print(historical_data.info())
print(historical_data.head())
print(historical_data.index.is_unique)

<class 'pandas.core.frame.DataFrame'>
Index: 51840 entries, 0 to 287
Data columns (total 7 columns):
 #   Column     Non-Null Count  Dtype                     
---  ------     --------------  -----                     
 0   timestamp  51840 non-null  datetime64[ns, Asia/Tokyo]
 1   open       51840 non-null  object                    
 2   high       51840 non-null  object                    
 3   low        51840 non-null  object                    
 4   close      51840 non-null  float64                   
 5   volume     51840 non-null  object                    
 6   rsi        51840 non-null  float64                   
dtypes: datetime64[ns, Asia/Tokyo](1), float64(2), object(4)
memory usage: 3.2+ MB
None
                  timestamp            open            high             low  \
0 2024-02-12 23:45:00+09:00  48556.30000000  48719.17000000  48455.51000000   
1 2024-02-12 23:50:00+09:00  48606.20000000  48663.20000000  48520.00000000   
2 2024-02-12 23:55:00+09:00  48623.66000000