In [2]:
import pandas as pd
import numpy as np

In [9]:
# 读取数据
output_file = 'D:\\FutureData\\ricequant\\1d_2017to2024_noadjust.h5'
df0 = pd.read_hdf(output_file, key='/MA').reset_index()
df1 = pd.read_hdf(output_file, key='/PP').reset_index()


In [17]:
import numpy as np

class KalmanFilter:
    def __init__(self):
        self.x = np.array([1.0])  # 初始系数（假设1:1配比）
        self.P = np.eye(1)        # 状态协方差
        self.Q = 0.01             # 过程噪声
        self.R = 0.1              # 观测噪声

    def update(self, z):
        # 预测步骤
        x_pred = self.x
        P_pred = self.P + self.Q

        # 更新步骤
        K = P_pred / (P_pred + self.R)
        self.x = x_pred + K * (z - x_pred)
        self.P = (1 - K) * P_pred
        return self.x[0]

def kalman_ratio(df1, df2):
    kf = KalmanFilter()
    spreads = []
    for p1, p2 in zip(df1, df2):
        if p2 != 0:
            ratio = p1 / p2  # 实时价格比
            beta = kf.update(ratio)
        spreads.append(p1 - beta * p2)

    # 取末段均值确定整数配比
    final_beta = np.mean(kf.x[-30:]) if len(df1) >30 else round(kf.x[-1])
    return simplify_ratio(final_beta), np.array(spreads)

kalman_ratio(df1.close,df0.close)

((25, 9),
 array([[531.71171171],
        [241.63814015],
        [180.71105   ],
        ...,
        [-31.38092881],
        [-81.91067103],
        [-57.64115829]]))

In [14]:
import statsmodels.api as sm
from statsmodels.tsa.stattools import adfuller
def simplify_ratio(ratio, max_denominator=10):
    """
    将浮点比例转换为最简整数比
    :param ratio: 浮点比例值
    :param max_denominator: 最大允许的分母值
    :return: (分子, 分母) 的元组
    """
    from fractions import Fraction
    frac = Fraction(ratio).limit_denominator(max_denominator)
    return (frac.numerator, frac.denominator)

def cointegration_ratio(df1, df2):

    # 协整回归
    X = sm.add_constant(df2)
    model = sm.OLS(df1, X).fit()
    beta = model.params[1] # 回归系数整数化
    spread = df1 - beta * df2  # 价差序列

    return simplify_ratio(beta), spread  # 配比格式(资产1单位:资产β单位)

cointegration_ratio(df1.close,df0.close)

  beta = model.params[1] # 回归系数整数化


((13, 8),
 0       4250.271689
 1       4208.413727
 2       4242.987613
 3       4153.277424
 4       4246.924975
            ...     
 1938    3105.942179
 1939    3112.561500
 1940    3099.851310
 1941    3062.936444
 1942    3099.976144
 Name: close, Length: 1943, dtype: float64)

In [8]:
import statsmodels.api as sm
def cointegration_ratio(df1, df2):

    # 协整回归
    X = sm.add_constant(df2)
    model = sm.OLS(df1, X).fit()
    beta = model.params[1] # 回归系数整数化
    spread = df1 - beta * df2  # 价差序列

    return simplify_ratio(beta), spread  # 配比格式(资产1单位:资产β单位)

def simplify_ratio(ratio, max_denominator=10):
    """
    将浮点比例转换为最简整数比
    :param ratio: 浮点比例值
    :param max_denominator: 最大允许的分母值
    :return: (分子, 分母) 的元组
    """
    from fractions import Fraction
    frac = Fraction(ratio).limit_denominator(max_denominator)
    return (frac.numerator, frac.denominator)


In [23]:

def calculate_rolling_spread(df0, df1, window: int = 60):
    """滚动计算 β 和价差（spread）"""
    # 1. 对齐并合并价格
    df = (df0.set_index('date')['close']
              .rename('close0')
              .to_frame()
              .join(df1.set_index('date')['close']
                        .rename('close1'),
                    how='inner'))

    # 2. 计算滚动 β（向量化做法，比 rolling-apply 快很多）
    cov  = df['close0'].rolling(window).cov(df['close1'])
    var1 = df['close1'].rolling(window).var()
    beta = (cov / var1).round(1)

    # 3. 计算价差
    spread = df['close0'] - beta * df['close1']

    # 4. 整理输出
    out = (pd.DataFrame({'date': df.index,
                         'beta': beta,
                         'close': spread})
             .dropna()
             .reset_index(drop=True))
    return out

# 读取数据
output_file = 'D:\\FutureData\\ricequant\\1d_2017to2024_noadjust.h5'
df0 = pd.read_hdf(output_file, key='/J').reset_index()
df1 = pd.read_hdf(output_file, key='/JM').reset_index()

# 计算滚动价差
df_spread = calculate_rolling_spread(df0, df1, window=60)
print("滚动价差计算完成，系数示例：")
print(df_spread.head())


滚动价差计算完成，系数示例：
        date  beta   close
0 2017-04-05   1.9 -620.05
1 2017-04-06   1.9 -636.70
2 2017-04-07   1.9 -620.65
3 2017-04-10   1.9 -605.55
4 2017-04-11   1.9 -574.40


In [24]:
df_spread

Unnamed: 0,date,beta,close
0,2017-04-05,1.9,-620.05
1,2017-04-06,1.9,-636.70
2,2017-04-07,1.9,-620.65
3,2017-04-10,1.9,-605.55
4,2017-04-11,1.9,-574.40
...,...,...,...
1879,2024-12-25,1.1,549.20
1880,2024-12-26,1.1,550.40
1881,2024-12-27,1.1,526.00
1882,2024-12-30,1.0,641.00


In [11]:
df0

Unnamed: 0,underlying_symbol,date,dominant_id,open,close,high,low,total_turnover,volume,prev_close,settlement,prev_settlement,open_interest,limit_up,limit_down,day_session_open
0,J,2017-01-03,J1705,1519.0,1480.5,1595.0,1476.0,1.487567e+10,97848.0,1518.0,1520.0,1538.0,143064.0,1676.0,1400.0,1519.0
1,J,2017-01-04,J1705,1488.0,1495.0,1505.5,1442.0,1.926395e+10,130410.0,1480.5,1477.0,1520.0,145968.0,1656.5,1383.5,1468.0
2,J,2017-01-05,J1705,1493.0,1506.5,1527.0,1485.0,1.343434e+10,89294.0,1495.0,1504.5,1477.0,132964.0,1609.5,1344.5,1502.0
3,J,2017-01-06,J1705,1501.0,1532.0,1568.0,1486.0,1.882877e+10,123202.0,1506.5,1528.0,1504.5,134196.0,1639.5,1369.5,1507.0
4,J,2017-01-09,J1705,1540.0,1589.0,1589.5,1512.0,1.605840e+10,103720.0,1532.0,1548.0,1528.0,123670.0,1665.5,1390.5,1518.5
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1938,J,2024-12-25,J2505,1824.5,1817.5,1839.0,1805.0,3.159435e+09,17369.0,1824.5,1819.0,1810.0,24904.0,1954.5,1665.5,1830.0
1939,J,2024-12-26,J2505,1824.5,1805.5,1830.0,1803.0,2.712152e+09,14927.0,1817.5,1816.5,1819.0,26321.0,1964.5,1673.5,1821.0
1940,J,2024-12-27,J2505,1800.0,1780.0,1806.0,1773.0,3.577917e+09,20046.0,1805.5,1784.5,1816.5,30107.0,1961.5,1671.5,1788.0
1941,J,2024-12-30,J2505,1781.0,1798.5,1799.0,1766.0,2.765209e+09,15476.0,1780.0,1786.5,1784.5,29103.0,1927.0,1642.0,1782.0


In [17]:
df0.date

0      2017-01-03
1      2017-01-04
2      2017-01-05
3      2017-01-06
4      2017-01-09
          ...    
1938   2024-12-25
1939   2024-12-26
1940   2024-12-27
1941   2024-12-30
1942   2024-12-31
Name: date, Length: 1943, dtype: datetime64[ns]