In [4]:
import scipy as sp
import numpy as np
import pandas as pd
import seaborn as sns
from scipy import stats
import statsmodels.api as sm
from pathlib import Path
import os
from rqdatac import *
from rqfactor import *
from rqfactor import Factor
from rqfactor.extension import *

init("13522652015", "123456")
import rqdatac

from tqdm import *

import matplotlib.pyplot as plt

plt.rcParams["font.sans-serif"] = [
    "Arial Unicode MS",
    "PingFang SC",
    "Hiragino Sans GB",
    "STHeiti",
    "DejaVu Sans",
]
plt.rcParams["axes.unicode_minus"] = False

import warnings

warnings.filterwarnings("ignore")

In [12]:
start_date = "2000-01-01"
end_date = "2025-09-02"
stock_symbol = ["688196.XSHG"]

In [14]:
factors = [
    "market_cap_3",
    "a_share_market_val_3",
    "market_cap_2",
    "a_share_market_val_in_circulation",
]
get_factor(stock_symbol, factors, start_date, end_date)

Unnamed: 0_level_0,Unnamed: 1_level_0,market_cap_3,a_share_market_val_3,market_cap_2,a_share_market_val_in_circulation
order_book_id,date,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
688196.XSHG,2000-01-04,,,,
688196.XSHG,2000-01-05,,,,
688196.XSHG,2000-01-06,,,,
688196.XSHG,2000-01-07,,,,
688196.XSHG,2000-01-10,,,,
688196.XSHG,...,...,...,...,...
688196.XSHG,2025-08-27,6.205200e+09,6.205200e+09,,6.205200e+09
688196.XSHG,2025-08-28,5.874000e+09,5.874000e+09,,5.874000e+09
688196.XSHG,2025-08-29,5.707200e+09,5.707200e+09,,5.707200e+09
688196.XSHG,2025-09-01,5.580000e+09,5.580000e+09,,5.580000e+09


In [None]:
stacked_universe = (
    stock_universe.stack("order_book_id").swaplevel().sort_index().to_frame("signal")
)
stacked_universe

In [None]:
# csv文件总计包含特征如下（32 in total）：
# 1.交易日期、股票代码、股票简称
# 2.开盘价、最高价、最低价、收盘价、昨收价、涨跌额、涨跌幅、振幅
# 3.成交量、成交额、复权因子、换手率（%）、换手率（自由流通股）、量比
# 4.市盈率（静态，TTM，动态），市净率，市销率TTM，股息率（%），股息率TTM
# 5.总股本，流动股本，自由流通股本、总市值、流通市值、股东人数、行业、地域

# 已经取到的因子：
# 1. 交易日期（未和米筐完全对齐，米筐数据称历史日行情数据最早从2005年开始，
# 尝试了000001，从2000年开始，但2000年前000001数据缺失，csv文件从1991年开始），会根据csv文件中的tikcer逐一过滤米筐数据
# 2. 未对齐：开盘价、最高价、最低价、收盘价、昨收价、涨跌额 || 对齐：涨跌幅、振幅
# 3. 未对齐：成交量、复权因子 || 对齐：成交额、换手率（%） || 待确认：换手率（自由流通股）、量比
# 4. 未对齐：股息率TTM(有的股票可以对齐，有的股票无法对齐但差别不大) || 股息率（%）
# 4. 对齐：市盈率（静态，TTM，csv缺的数据用rqdata补齐）、市净率、市销率TTM || 待确认：市盈率（动态）
# 5. 未对齐：自由流通股本 (有区别，但幅度有限，估计各家定义略有不同) || 对齐：总股本、流动股本 总市值、流通市值、股东人数、行业、地域

In [2]:
stock_symbol = "000001.XSHE"
start_date = instruments(stock_symbol).listed_date
end_date = "20250815"
print(start_date, instruments(stock_symbol).symbol)

1991-04-03 平安银行


In [3]:
daily_price = get_price(
    stock_symbol,
    start_date,
    end_date,
    fields=[
        "open",
        "high",
        "low",
        "close",
        "prev_close",
        "volume",
        "total_turnover",
        "num_trades",
    ],
    adjust_type="none",
    skip_suspended=False,
).sort_index()
daily_price

Unnamed: 0_level_0,Unnamed: 1_level_0,open,high,low,close,prev_close,volume,total_turnover,num_trades
order_book_id,date,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
000001.XSHE,2000-01-04,17.50,18.55,17.20,18.29,17.45,8216000.0,1.473254e+08,11288.0
000001.XSHE,2000-01-05,18.35,18.85,18.00,18.06,18.29,9399300.0,1.734752e+08,14591.0
000001.XSHE,2000-01-06,18.02,19.05,17.75,18.78,18.06,12022200.0,2.211925e+08,15029.0
000001.XSHE,2000-01-07,19.00,19.77,18.90,19.54,18.78,22934600.0,4.435924e+08,29217.0
000001.XSHE,2000-01-10,19.79,20.48,19.77,20.14,19.54,18521000.0,3.722945e+08,26004.0
000001.XSHE,...,...,...,...,...,...,...,...,...
000001.XSHE,2025-08-11,12.40,12.42,12.27,12.30,12.40,93435315.0,1.150305e+09,69873.0
000001.XSHE,2025-08-12,12.30,12.40,12.30,12.33,12.30,68005619.0,8.396356e+08,48750.0
000001.XSHE,2025-08-13,12.37,12.40,12.23,12.26,12.33,111981305.0,1.376778e+09,69878.0
000001.XSHE,2025-08-14,12.27,12.34,12.19,12.20,12.26,124104129.0,1.523448e+09,73812.0


In [28]:
# 获取daily_vol的起始日期
daily_vol = daily_price.volume
daily_vol_start_date = daily_vol.index.get_level_values("date").min()

# 使用daily_vol的起始日期获取stock_free_circulation的截面
stock_free_circulation = get_shares(
    stock_symbol, daily_vol_start_date, end_date
).free_circulation
stock_free_circulation

order_book_id  date      
000001.XSHE    2000-01-04    0.000000e+00
               2000-01-05    0.000000e+00
               2000-01-06    0.000000e+00
               2000-01-07    0.000000e+00
               2000-01-10    0.000000e+00
                                 ...     
               2025-08-11    8.160452e+09
               2025-08-12    8.160452e+09
               2025-08-13    8.160452e+09
               2025-08-14    8.160452e+09
               2025-08-15    8.160452e+09
Name: free_circulation, Length: 6209, dtype: float64

In [30]:
free_turnover = daily_vol / stock_free_circulation
free_turnover

order_book_id  date      
000001.XSHE    2000-01-04         inf
               2000-01-05         inf
               2000-01-06         inf
               2000-01-07         inf
               2000-01-10         inf
                               ...   
               2025-08-11    0.011450
               2025-08-12    0.008334
               2025-08-13    0.013722
               2025-08-14    0.015208
               2025-08-15    0.023877
Length: 6209, dtype: float64

In [14]:
get_turnover_rate(stock_symbol, start_date, end_date)

Unnamed: 0_level_0,Unnamed: 1_level_0,today,week,month,year,current_year
order_book_id,tradedate,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
000001.XSHE,2000-01-04,0.7667,0.7667,0.7667,0.7667,0.7667
000001.XSHE,2000-01-05,0.8771,0.8219,0.8219,0.8219,0.8219
000001.XSHE,2000-01-06,1.1219,0.9219,0.9219,0.9219,0.9219
000001.XSHE,2000-01-07,2.1402,1.2264,1.2264,1.2264,1.2264
000001.XSHE,2000-01-10,1.7283,1.3268,1.3268,1.3268,1.3268
000001.XSHE,...,...,...,...,...,...
000001.XSHE,2025-08-11,0.4815,0.4441,0.6689,0.6548,0.5918
000001.XSHE,2025-08-12,0.3504,0.3975,0.6544,0.6555,0.5902
000001.XSHE,2025-08-13,0.5771,0.4417,0.6511,0.6565,0.5901
000001.XSHE,2025-08-14,0.6395,0.4952,0.6369,0.6582,0.5904


In [12]:
factor_list = [
    "pe_ratio_lyr",
    "pe_ratio_ttm",
    "pb_ratio_lyr",
    "pb_ratio_ttm",
    "pb_ratio_lf",
    "ps_ratio_lyr",
    "ps_ratio_ttm",
    "dividend_yield_ttm",
    "market_cap_3",
    "market_cap_2",
]
get_factor(
    stock_symbol,
    factor_list,
    start_date,
    end_date,
)

Unnamed: 0_level_0,Unnamed: 1_level_0,pe_ratio_lyr,pe_ratio_ttm,pb_ratio_lyr,pb_ratio_ttm,pb_ratio_lf,ps_ratio_lyr,ps_ratio_ttm,dividend_yield_ttm,market_cap_3,market_cap_2
order_book_id,date,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
830779.BJSE,2023-06-01,22.316800,20.380891,7.664162,8.281664,7.195818,9.438705,8.836830,0.0,1.576432e+09,379727220.0
830779.BJSE,2023-06-02,22.810750,20.831992,7.833797,8.464966,7.355087,9.647617,9.032420,0.0,1.611324e+09,388131922.0
830779.BJSE,2023-06-05,21.685193,19.804074,7.447251,8.047277,6.992163,9.171572,8.586731,0.0,1.531816e+09,368980223.0
830779.BJSE,2023-06-06,21.336999,19.486084,7.327673,7.918064,6.879892,9.024307,8.448856,0.0,1.507220e+09,363055596.0
830779.BJSE,2023-06-07,21.523242,19.656172,7.391633,7.987178,6.939944,9.103077,8.522603,0.0,1.520376e+09,366224583.0
830779.BJSE,...,...,...,...,...,...,...,...,...,...,...
830779.BJSE,2025-08-11,47.534920,50.067008,6.834512,6.893068,6.680977,19.949539,20.396075,0.0,3.498465e+09,875250552.0
830779.BJSE,2025-08-12,46.718916,49.207538,6.717188,6.774739,6.566289,19.607077,20.045948,0.0,3.438408e+09,860225651.0
830779.BJSE,2025-08-13,46.370755,48.840830,6.667130,6.724252,6.517355,19.460960,19.896560,0.0,3.412785e+09,853815027.0
830779.BJSE,2025-08-14,45.587392,48.015739,6.554499,6.610656,6.407254,19.132197,19.560438,0.0,3.355131e+09,839391122.0


In [73]:
get_instrument_industry("000001.XSHE")

Unnamed: 0_level_0,first_industry_code,first_industry_name
order_book_id,Unnamed: 1_level_1,Unnamed: 2_level_1
000001.XSHE,40,银行


In [77]:
get_holder_number("000002.XSHE", start_date, end_date)

Unnamed: 0_level_0,Unnamed: 1_level_0,end_date,share_holders,a_share_holders,avg_circulation_share_holders,avg_share_holders,avg_a_share_holders
order_book_id,info_date,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
000002.XSHE,1995-03-05,1994-12-31,97468.0,97025.0,1404.0,2492.67,1404.68
000002.XSHE,1996-04-25,1995-12-31,51490.0,50982.0,3074.0,5597.69,3074.28
000002.XSHE,1997-04-10,1996-12-31,128726.0,126592.0,1353.0,2462.97,1353.19
000002.XSHE,1998-04-08,1997-12-31,225083.0,222433.0,1106.0,2003.07,1106.34
000002.XSHE,1999-04-14,1998-12-31,242817.0,240080.0,1186.0,2042.46,1186.27
000002.XSHE,...,...,...,...,...,...,...
000002.XSHE,2024-08-31,2024-07-31,585818.0,585746.0,16588.0,20365.90,16601.39
000002.XSHE,2024-10-31,2024-09-30,627280.0,627209.0,15492.0,19019.75,15503.92
000002.XSHE,2025-04-01,2024-12-31,572979.0,572906.0,16960.0,20822.25,16973.46
000002.XSHE,2025-04-01,2025-02-28,571412.0,571342.0,17007.0,20879.35,17019.92


In [76]:
cfoa_mrq = Factor("cash_flow_from_operating_activities_mrq_0") / Factor(
    "total_assets_mrq_0"
)
## 资产周转率变动 = 当期营业收入/当期总资产 - 去年同期营业收入/去年同期总资产
atdy_mrq = Factor("operating_revenue_mrq_0") / Factor("total_assets_mrq_0") - Factor(
    "operating_revenue_mrq_4"
) / Factor("total_assets_mrq_4")
## 现金流动负债率比 = 经营性现金流/流动负债
ccr_mrq = Factor("cash_flow_from_operating_activities_mrq_0") / Factor(
    "current_liabilities_mrq_0"
)

factor_dict = {
    "cfoa_mrq": cfoa_mrq,
    "atdy_mrq": atdy_mrq,
    "ccr_mrq": ccr_mrq,
}
# 准备N个因子
factor_list = []

for factor_name in factor_dict.keys():
    # 构建因子
    raw_factor = execute_factor(
        factor_dict[factor_name], stock_list, start_date, end_date
    )
    raw_factor.to_pickle(
        f"factor_lib/raw/{factor_name}_{index_item}_{start_date}_{end_date}.pkl"
    )
    # 因子预处理
    processed_factor = preprocess_factor(raw_factor, stock_universe, index_item)
    factor_stacked = (
        processed_factor.stack("order_book_id")
        .swaplevel()
        .sort_index()
        .to_frame(factor_name)
    )
    factor_list.append(factor_stacked)
    print(factor_name)

# 一次性拼接所有因子
all_factors = pd.concat(factor_list, axis=1)

# 最后与价格数据拼接
df = pd.concat([daily_price, all_factors], axis=1)

cfoa_mrq
atdy_mrq
ccr_mrq


In [69]:
all_factors

Unnamed: 0_level_0,Unnamed: 1_level_0,cfoa_mrq,atdy_mrq,ccr_mrq
order_book_id,datetime,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
000005.XSHE,2016-12-12,-1.523261,1.539114,-0.809689
000005.XSHE,2016-12-13,-1.540992,1.483022,-0.818220
000005.XSHE,2016-12-14,-1.535448,1.468800,-0.816904
000005.XSHE,2016-12-15,-1.537001,1.468507,-0.815230
000005.XSHE,2016-12-16,-1.537387,1.472740,-0.819964
...,...,...,...,...
688800.XSHG,2024-12-09,1.082750,1.302645,0.384973
688800.XSHG,2024-12-10,1.085509,1.309257,0.387863
688800.XSHG,2024-12-11,1.085693,1.304568,0.390075
688800.XSHG,2024-12-12,1.081204,1.305440,0.384955


In [77]:
df

Unnamed: 0_level_0,Unnamed: 1_level_0,open,close,high,low,limit_up,limit_down,total_turnover,volume,cfoa_mrq,atdy_mrq,ccr_mrq
order_book_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
000005.XSHE,2015-01-05,4.10,4.10,4.10,4.10,0.00,0.00,0.0,0.0,,,
000005.XSHE,2015-01-06,4.10,4.10,4.10,4.10,0.00,0.00,0.0,0.0,,,
000005.XSHE,2015-01-07,4.10,4.10,4.10,4.10,0.00,0.00,0.0,0.0,,,
000005.XSHE,2015-01-08,4.10,4.10,4.10,4.10,0.00,0.00,0.0,0.0,,,
000005.XSHE,2015-01-09,4.10,4.10,4.10,4.10,0.00,0.00,0.0,0.0,,,
...,...,...,...,...,...,...,...,...,...,...,...,...
688800.XSHG,2025-06-25,46.44,46.00,46.97,44.88,54.78,36.52,500082286.0,10907621.0,,,
688800.XSHG,2025-06-26,46.21,46.89,47.80,46.20,55.20,36.80,688699198.0,14631866.0,,,
688800.XSHG,2025-06-27,47.00,48.89,49.50,45.72,56.27,37.51,765640681.0,15909310.0,,,
688800.XSHG,2025-06-30,48.80,49.20,49.47,48.28,58.67,39.11,628471273.0,12850167.0,,,


In [78]:
df_inner = pd.concat([daily_price, all_factors], axis=1, join="inner")

In [79]:
df_inner

Unnamed: 0_level_0,Unnamed: 1_level_0,open,close,high,low,limit_up,limit_down,total_turnover,volume,cfoa_mrq,atdy_mrq,ccr_mrq
order_book_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
000005.XSHE,2016-12-12,7.21,6.83,7.22,6.81,7.91,6.47,1.039433e+08,14877116.0,-1.523261,1.539114,-0.809689
000005.XSHE,2016-12-13,6.83,6.85,6.91,6.68,7.51,6.15,7.449941e+07,11002313.0,-1.540992,1.483022,-0.818220
000005.XSHE,2016-12-14,6.83,6.76,6.89,6.72,7.54,6.17,4.338016e+07,6381301.0,-1.535448,1.468800,-0.816904
000005.XSHE,2016-12-15,6.77,6.82,6.88,6.73,7.44,6.08,4.517998e+07,6627005.0,-1.537001,1.468507,-0.815230
000005.XSHE,2016-12-16,6.82,6.93,6.99,6.82,7.50,6.14,6.067851e+07,8746542.0,-1.537387,1.472740,-0.819964
...,...,...,...,...,...,...,...,...,...,...,...,...
688800.XSHG,2024-12-09,41.78,40.82,42.77,40.78,50.53,33.69,1.607842e+08,3868924.0,1.082750,1.302645,0.384973
688800.XSHG,2024-12-10,42.17,41.08,42.84,40.60,48.98,32.66,2.681991e+08,6450906.0,1.085509,1.309257,0.387863
688800.XSHG,2024-12-11,41.08,43.29,43.94,40.62,49.30,32.86,3.236192e+08,7630302.0,1.085693,1.304568,0.390075
688800.XSHG,2024-12-12,43.70,43.82,45.00,42.87,51.95,34.63,3.537051e+08,8014597.0,1.081204,1.305440,0.384955


In [73]:
df_inner.cfoa_mrq.unstack("order_book_id")

order_book_id,000005.XSHE,000006.XSHE,000007.XSHE,000008.XSHE,000010.XSHE,000011.XSHE,000012.XSHE,000014.XSHE,000016.XSHE,000018.XSHE,...,688733.XSHG,688739.XSHG,688766.XSHG,688776.XSHG,688778.XSHG,688779.XSHG,688788.XSHG,688789.XSHG,688798.XSHG,688800.XSHG
2015-01-05,,,-1.901475,0.358935,,1.035917,,-1.250619,-1.217686,,...,,,,,,,,,,
2015-01-06,,,-1.899383,0.359349,,1.038206,,-1.251646,-1.226516,,...,,,,,,,,,,
2015-01-07,,,-1.876891,0.357655,,1.070860,,-1.215603,-1.223794,,...,,,,,,,,,,
2015-01-08,,,-1.876460,0.353034,,1.073892,,-1.217722,-1.181417,,...,,,,,,,,,,
2015-01-09,,,-1.874269,0.344252,,1.068797,,-1.203635,-1.176900,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2025-06-25,,,,,,,0.548543,,,,...,,-0.064672,-2.146696,-0.124559,,-0.673004,,0.120509,0.266852,
2025-06-26,,,,,,,0.552994,,,,...,,-0.066103,-2.148109,-0.119819,,-0.673106,,0.123740,0.271824,
2025-06-27,,,,,,,0.552465,,,,...,,-0.064831,-2.145358,-0.122865,,-0.675159,,0.123201,0.273465,
2025-06-30,,,,,,,0.550153,,,,...,,-0.066648,-2.141739,-0.124489,,-0.672259,,0.125520,0.273148,
