# 融资利率套利

In [1]:
import os
import sys
import datetime as dt

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import matplotlib.ticker as mticker
import ffn

plt.style.use("ggplot")

In [15]:
def read_raw_data(data_dir: str, token: str) -> tuple:
    """读取现货价格，合约价格和资金利率数据"""
    token = token.lower()
    spot_ohlcv_file_path = os.path.join(data_dir, f"binance_{token}_4h.csv")
    perp_ohlcv_file_path = os.path.join(data_dir, f"binanceusdm_{token}_4h.csv")
    funding_file_path = os.path.join(data_dir, f"funding_rates_{token}.csv")

    spot_ohlcv = pd.read_csv(
        spot_ohlcv_file_path, index_col="timestamp", parse_dates=True
    )
    perp_ohlcv = pd.read_csv(
        perp_ohlcv_file_path, index_col="timestamp", parse_dates=True
    )
    funding_rates = pd.read_csv(
        funding_file_path, index_col="funding_time", parse_dates=True
    )

    return spot_ohlcv, perp_ohlcv, funding_rates


def preprocess_data(
    spot_ohlcv: pd.DataFrame, perp_ohlcv: pd.DataFrame, funding_rates: pd.DataFrame
) -> pd.DataFrame:
    """预处理现货价格，合约价格和资金利率数据"""
    # 合并现货和合约价格，仅保留开盘价和收盘价
    df_joined = pd.merge(
        spot_ohlcv[["open", "close"]],
        perp_ohlcv[["open", "close"]],
        on="timestamp",
        suffixes=("_spot", "_perp"),
    )

    # 合并融资利率
    funding_rates.index.name = "timestamp"
    funding_rates.index = funding_rates.index.floor("s")
    df_joined = pd.merge(
        df_joined, funding_rates["funding_rate"], on="timestamp", how="left"
    )

    # 融资利率每8小时计算一次，缺失值用0填充
    df_joined["funding_rate"] = df_joined["funding_rate"].fillna(0)

    return df_joined


def read_and_preprocess_data(data_dir: str, token: str) -> pd.DataFrame:
    """读取并预处理数据"""
    spot_ohlcv, perp_ohlcv, funding_rates = read_raw_data(data_dir, token)
    return preprocess_data(spot_ohlcv, perp_ohlcv, funding_rates)

In [28]:
class FundingRateArbitrage:
    """回溯检验融资利率套利策略

    策略逻辑：
    - 当累计融资利率超过阈值时开仓，买入现货，做空永续合约
    - 当累计融资利率低于阈值时平仓
    - 当合约空头亏损超过阈值时平仓
    """

    def __init__(
        self,
        cumulative_funding_days: int,
        entry_threshold: float,
        exit_threshold: float,
        max_short_position_loss: float,
        capital: float = 10000,
        leverage: float = 1,
        spot_fee: float = 0.001,
        perp_fee: float = 0.0005,
        verbose: bool = True,
    ):
        # 策略参数
        self.cumulative_funding_days = cumulative_funding_days
        self.entry_threshold = entry_threshold
        self.exit_threshold = exit_threshold
        self.max_short_position_loss = max_short_position_loss

        # 回溯检验设置
        self.capital = capital
        self.leverage = leverage
        self.spot_fee = spot_fee
        self.perp_fee = perp_fee

        # 跟踪策略变量
        self.positions: np.ndarray = None
        self.capitals: np.ndarray = None

        self.spot_entry_price: float = 0
        self.spot_exit_price: float = 0
        self.spot_amount: float = 0
        self.spot_entry_fee: float = 0
        self.spot_exit_fee: float = 0

        self.perp_entry_price: float = 0
        self.perp_exit_price: float = 0
        self.perp_amount: float = 0
        self.perp_entry_fee: float = 0
        self.perp_exit_fee: float = 0

        # 打印信息控制
        self.verbose = verbose

    def log(self, message: str) -> None:
        if self.verbose:
            print(message)

    def initialize(self, data: pd.DataFrame) -> None:
        """
        输入数据必须包含以下字段：spot_open, spot_close, perp_open, perp_close, funding_rate
        """
        self.data = data.copy()
        self.data["cumulative_funding"] = (
            self.data["funding_rate"]
            .rolling(dt.timedelta(days=self.cumulative_funding_days))
            .sum()
        )
        self.positions = np.zeros(len(self.data))
        self.capitals = np.zeros(len(self.data))

    def open_position(self, idx: int, row: pd.Series) -> None:
        self.log("=== Open Position ===")

        available_capital = self.capitals[idx]

        # 买入现货，使用95%的资金开仓，剩余5%用于支付手续费
        self.spot_entry_price = row["open_spot"]
        self.spot_amount = available_capital / 2 * 0.95 / self.spot_entry_price
        self.spot_entry_fee = -(
            self.spot_entry_price * self.spot_amount * self.spot_fee
        )
        self.log(
            f"Buy spot: price={self.spot_entry_price:.2f}, amount={self.spot_amount:.2f}, fee={self.spot_entry_fee:.2f}"
        )

        # 做空永续合约
        self.perp_entry_price = row["open_perp"]
        self.perp_amount = available_capital / 2 * 0.95 / self.perp_entry_price
        self.perp_entry_fee = -(
            self.perp_entry_price * self.perp_amount * self.perp_fee
        )
        self.log(
            f"Sell perp: price={self.perp_entry_price:.2f}, amount={self.perp_amount:.2f}, fee={self.perp_entry_fee:.2f}"
        )

        # 更新头寸和盈亏
        total_fee = self.spot_entry_fee + self.perp_entry_fee
        self.positions[idx] = 1
        self.capitals[idx] += total_fee
        self.log(f"Position: 0 -> 1")
        self.log(f"Total fee: {total_fee:.2f}")
        self.log(f"Capital: {available_capital:.2f} -> {self.capitals[idx]:.2f}")

        self.log("=====================")

    def close_position(self, idx: int, row: pd.Series) -> None:
        self.log("=== Close Position ===")

        # 卖出现货
        self.spot_exit_price = row["open_spot"]
        self.spot_exit_fee = -(self.spot_exit_price * self.spot_amount * self.spot_fee)
        spot_pnl = (self.spot_exit_price - self.spot_entry_price) * self.spot_amount
        self.log(
            f"Sell spot: price={self.spot_exit_price:.2f}, amount={self.spot_amount:.2f}, fee={self.spot_exit_fee:.2f}, pnl={spot_pnl:.2f}"
        )

        # 买入永续合约
        self.perp_exit_price = row["open_perp"]
        self.perp_exit_fee = -(self.perp_exit_price * self.perp_amount * self.perp_fee)
        perp_pnl = (self.perp_entry_price - self.perp_exit_price) * self.perp_amount
        self.log(
            f"Buy perp: price={self.perp_exit_price:.2f}, amount={self.perp_amount:.2f}, fee={self.perp_exit_fee:.2f}, pnl={perp_pnl:.2f}"
        )

        # 更新头寸和盈亏
        prev_capital = self.capitals[idx]
        total_fee = self.spot_exit_fee + self.perp_exit_fee
        trading_pnl = spot_pnl + perp_pnl
        self.positions[idx] = 0
        self.capitals[idx] += trading_pnl + total_fee
        self.log(f"Position: 1 -> 0")
        self.log(f"Trading pnl: {trading_pnl:.2f}")
        self.log(f"Total fee: {total_fee:.2f}")
        self.log(f"Capital: {prev_capital:.2f} -> {self.capitals[idx]:.2f}")

        # 平仓后重置变量
        self.reset_position_variables()

        self.log("=====================")

    def reset_position_variables(self) -> None:
        self.spot_entry_price = 0
        self.spot_exit_price = 0
        self.spot_amount = 0
        self.spot_entry_fee = 0
        self.spot_exit_fee = 0
        self.perp_entry_price = 0
        self.perp_exit_price = 0
        self.perp_amount = 0
        self.perp_entry_fee = 0
        self.perp_exit_fee = 0

    def run_backtest(self, data: pd.DataFrame) -> None:
        self.initialize(data)
        self.reset_position_variables()

        for idx, (ts, row) in enumerate(self.data.iterrows()):
            if idx == 0:
                self.capitals[idx] = self.capital
                continue

            # 获取前一根K线收盘后的数据
            prev_position = self.positions[idx - 1]
            prev_capital = self.capitals[idx - 1]
            prev_funding = self.data["funding_rate"].iloc[idx - 1]
            prev_cumulative_funding = self.data["cumulative_funding"].iloc[idx - 1]

            # 更新当前时间点的头寸和账户资本
            self.positions[idx] = prev_position
            self.capitals[idx] = prev_capital

            self.log(
                f"{ts}: PrevPos={prev_position:.0f}, PrevCapital={prev_capital:.2f}, PrevFunding={prev_funding:.3%}, PrevCumFunding={prev_cumulative_funding:.3%}"
            )

            # 计算融资利率收入
            if prev_position > 0 and abs(prev_funding) > 0:
                funding_income = self.perp_entry_price * self.perp_amount * prev_funding
                self.capitals[idx] += funding_income
                self.log(
                    f"FundingIncome={funding_income:.3f}, Capital={self.capitals[idx]:.2f}"
                )

            # 开仓
            if prev_position == 0 and prev_cumulative_funding > self.entry_threshold:
                self.open_position(idx, row)

            # 平仓：累计融资利率低于阈值
            if prev_position > 0 and prev_cumulative_funding < self.exit_threshold:
                self.close_position(idx, row)

            # 平仓：合约空头亏损超过阈值
            if prev_position > 0 and self.positions[idx] != 0:
                perp_pnl_pct = (
                    self.perp_entry_price - row["open_perp"]
                ) / self.perp_entry_price
                if perp_pnl_pct <= -self.max_short_position_loss:
                    self.log(f"Perp pnl pct: {perp_pnl_pct:.1%}")
                    self.close_position(idx, row)

    def plot_equity_curve(self) -> None:
        capitals = pd.Series(self.capitals, index=self.data.index)
        capitals.plot(figsize=(12, 6), title="Equity Curve")

    def calculate_performance(self) -> ffn.core.PerformanceStats:
        capitals = pd.Series(self.capitals, index=self.data.index)
        return capitals.calc_stats()

    def stats(self) -> dict:
        stats = self.calculate_performance().stats
        return {
            "start": stats["start"],
            "end": stats["end"],
            "total_return": stats["total_return"],
            "cagr": stats["cagr"],
            "max_drawdown": stats["max_drawdown"],
            "sharpe": stats["daily_sharpe"],
        }

In [32]:
tokens = [
    "BTC",
    "ETH",
    "DOGE",
    "SOL",
    "XRP",
    "XLM",
    "1000PEPE",
    "ENS",
    "PNUT",
    "SUI",
    "TIA",
    "SAND",
    "AVAX",
    "FTM",
    "UNI",
    "WIF",
    "NEIRO",
    "ETC",
    "LINK",
    "ENA",
    "OP",
    "GOAT",
    "EIGEN",
    "ARB",
    "NEAR",
    "WLD",
    "1000SHIBUSDT",
    "FIL",
    "LTC",
    "LDO",
    "FET",
    "MASK",
    "BAN",
    "INJ",
    "ACT",
    "ZEC",
    "TAO",
    "TON",
    "STX",
]

data_dir = "/users/scofield/quant-research/data/funding_arbitrage"

In [33]:
# 策略参数
cumulative_funding_days = 30
entry_threshold = 0.005
exit_threshold = 0
max_short_position_loss = 0.7
capital = 10000

# 将策略应用到所有币种
all_stats = []
strategy = FundingRateArbitrage(
    cumulative_funding_days=cumulative_funding_days,
    entry_threshold=entry_threshold,
    exit_threshold=exit_threshold,
    max_short_position_loss=max_short_position_loss,
    capital=capital,
    verbose=False,
)

for token in tokens:
    try:
        data = read_and_preprocess_data(data_dir, token)
        strategy.run_backtest(data)
    except Exception as e:
        print(f"Error: {token}, {e}")
        continue
    else:
        stats = strategy.stats()
        stats["token"] = token
        all_stats.append(stats)
        print(f"{token}: backtest completed")

BTC: backtest completed
ETH: backtest completed
DOGE: backtest completed
SOL: backtest completed
XRP: backtest completed
XLM: backtest completed
1000PEPE: backtest completed
ENS: backtest completed
PNUT: backtest completed
SUI: backtest completed
TIA: backtest completed
SAND: backtest completed
AVAX: backtest completed
FTM: backtest completed
UNI: backtest completed
WIF: backtest completed
NEIRO: backtest completed
ETC: backtest completed
LINK: backtest completed
ENA: backtest completed
OP: backtest completed
GOAT: backtest completed
EIGEN: backtest completed
ARB: backtest completed
NEAR: backtest completed
WLD: backtest completed
1000SHIBUSDT: backtest completed
FIL: backtest completed
LTC: backtest completed
LDO: backtest completed
FET: backtest completed
MASK: backtest completed
BAN: backtest completed
INJ: backtest completed
ACT: backtest completed
ZEC: backtest completed
TAO: backtest completed
TON: backtest completed
STX: backtest completed


In [45]:
stats_df = pd.DataFrame(all_stats)
stats_df["start"] = stats_df["start"].apply(
    lambda x: x.replace(hour=0, minute=0, second=0)
)
stats_df = stats_df.sort_values("cagr", ascending=False)
stats_df = stats_df.query("start < '2024-01-01'")

stats_df.style.format(
    {
        "total_return": "{:.1%}",
        "cagr": "{:.1%}",
        "max_drawdown": "{:.1%}",
        "sharpe": "{:.1f}",
    }
).bar(subset=["cagr", "sharpe"], color="lightgreen", width=60, height=80)

Unnamed: 0,start,end,total_return,cagr,max_drawdown,sharpe,token
35,2020-02-05 00:00:00,2024-11-20 00:00:00,48.8%,8.6%,-0.3%,8.3,ZEC
5,2020-01-20 00:00:00,2024-11-20 00:00:00,49.2%,8.6%,-0.4%,6.8,XLM
4,2020-01-06 00:00:00,2024-11-20 00:00:00,48.4%,8.4%,-0.7%,6.8,XRP
13,2020-09-24 00:00:00,2024-11-20 00:00:00,40.0%,8.4%,-2.4%,4.8,FTM
28,2020-01-09 00:00:00,2024-11-20 00:00:00,47.6%,8.3%,-0.3%,7.9,LTC
1,2020-01-01 00:00:00,2024-11-20 00:00:00,45.2%,7.9%,-0.3%,8.4,ETH
14,2020-09-18 00:00:00,2024-11-20 00:00:00,36.4%,7.7%,-0.6%,7.0,UNI
18,2020-01-17 00:00:00,2024-11-20 00:00:00,42.0%,7.5%,-1.2%,6.8,LINK
6,2023-05-05 00:00:00,2024-11-20 00:00:00,10.9%,6.9%,-0.3%,7.9,1000PEPE
27,2020-10-16 00:00:00,2024-11-20 00:00:00,31.4%,6.9%,-0.8%,6.4,FIL


测试结果分析

- 所有资产都实现盈利
- CAGR维持在3-9%之间，均没有超过10%
- 最大回撤普遍低于1%
- 策略表现和市值大小无关，BTC和ETH的表现远胜于很多山寨币

总结

- 基于测试结果，没有理由交易小市值山寨币，交易主流币已经可以获得6-8%的年化收益率