In [4]:
import tushare as ts
from typing import Any

import numpy as np
import pandas as pd

pro = ts.pro_api('2026c96ef5fa7fc3241c96baafd638c585284c7fefaa00b93ef0a62c')

def code_to_secid(df : pd.DataFrame , code_col = 'ts_code' , retain = False):
    '''switch old symbol into secid'''
    if code_col not in df.columns.values: return df
    replace_dict = {'T00018' : '600018'}
    df['secid'] = df[code_col].astype(str).str.slice(0, 6).replace(replace_dict)
    df['secid'] = df['secid'].where(df['secid'].str.isdigit() , '-1').astype(int)
    if not retain: del df[code_col]
    return df

# adj0 = pro.query('adj_factor',  trade_date='20240705')


In [17]:
date = 19970103
date_str = str(date)
adj = pro.query('adj_factor',  trade_date=date_str).rename(columns={'adj_factor':'adjfactor'})

quote = pro.daily(trade_date=date_str).rename(columns={'pct_chg':'pctchange','pre_close':'preclose','vol':'volume'})
quote['volume'] = quote['volume'] / 10. # to 10^3
quote['vwap'] = np.where(quote['volume'] == 0 , quote['close'] , quote['amount'] / quote['volume'])

shr = pro.daily_basic(trade_date=date_str).loc[:,['ts_code','trade_date' , 'total_share','float_share','free_share']]
shr.loc[:,['total_share','float_share','free_share']] *= 1e4
shr.loc[shr['free_share'].isna() , 'free_share'] = shr.loc[shr['free_share'].isna() , 'float_share']

limit = pro.stk_limit(trade_date=date_str)
if len(limit) == 0:
    limit = quote.loc[:,['ts_code' , 'trade_date' , 'close']].copy()
    limit['up_limit'] = (limit['close'] * 1.1).round(2)
    limit['down_limit'] = (limit['close'] * 0.9).round(2)
    limit = limit.drop(columns=['close'])

susp = pro.suspend_d(suspend_type='S', trade_date=date_str)

mutual_col = ['ts_code' , 'trade_date']

trade = quote.merge(adj,on=mutual_col,how='left').\
    merge(limit,on=mutual_col,how='left').\
    merge(shr,on=mutual_col,how='left')
trade['status'] = 1.0 * ~trade['ts_code'].isin(susp['ts_code']).fillna(0)
trade['limit'] = 1.0 * (trade['close'] >= trade['up_limit']).fillna(0) - 1.0 * (trade['close'] <= trade['down_limit']).fillna(0)
trade['turn_tt'] = (trade['volume'] / trade['total_share'] * 1e5).fillna(0)
trade['turn_fl'] = (trade['volume'] / trade['float_share'] * 1e5).fillna(0)
trade['turn_fr'] = (trade['volume'] / trade['free_share'] * 1e5).fillna(0)

trade = code_to_secid(trade).set_index('secid').sort_index().reset_index().loc[
    :,['secid', 'adjfactor', 'open', 'high', 'low', 'close', 'amount','volume', 'vwap', 
    'status', 'limit', 'pctchange', 'preclose', 'turn_tt','turn_fl', 'turn_fr']]
trade

Unnamed: 0,secid,adjfactor,open,high,low,close,amount,volume,vwap,status,limit,pctchange,preclose,turn_tt,turn_fl,turn_fr
0,1,13.968,16.50,16.59,16.18,16.30,118729.5778,7246.2,16.385082,1.0,0.0,-1.21,16.50,0.700411,1.014273,27.344151
1,2,5.121,10.60,11.20,10.38,10.45,66558.8782,6157.9,10.808697,1.0,0.0,0.00,10.45,1.942263,3.557413,21.992500
2,3,3.833,8.74,9.30,8.70,8.80,89765.3515,9964.3,9.008696,1.0,0.0,1.27,8.69,2.988391,5.196384,41.270681
3,4,3.112,6.15,6.20,5.87,6.00,9643.5748,1585.7,6.081588,1.0,0.0,-3.54,6.22,1.888262,3.806542,31.714000
4,5,2.275,9.96,10.30,9.60,9.70,44827.8666,4502.3,9.956659,1.0,0.0,-1.02,9.80,1.694824,3.574279,3.574279
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
511,600896,1.000,7.66,7.88,7.35,7.60,9195.5540,1203.1,7.643217,1.0,0.0,-0.78,7.66,0.721716,2.885132,2.885132
512,600897,1.000,17.40,17.40,16.80,16.84,12697.5670,746.7,17.004911,1.0,0.0,-3.22,17.40,0.691389,2.765556,2.765556
513,600898,1.320,8.00,8.05,7.47,7.60,11454.2910,1471.7,7.783034,1.0,0.0,-5.00,8.00,1.094004,2.198635,2.858225
514,600899,1.000,11.20,11.75,11.10,11.23,8995.8720,793.3,11.339811,1.0,0.0,0.45,11.18,0.717269,2.644333,2.644333


In [1]:
from src.data.tushare.daily import DailyQuote
updater = DailyQuote()
updater.update()

DailyQuote Updating trade_ts/day at 20080814
DailyQuote Updating trade_ts/day at 20080815
DailyQuote Updating trade_ts/day at 20080816
DailyQuote Updating trade_ts/day at 20080817
DailyQuote Updating trade_ts/day at 20080818
DailyQuote Updating trade_ts/day at 20080819
DailyQuote Updating trade_ts/day at 20080820
DailyQuote Updating trade_ts/day at 20080821
DailyQuote Updating trade_ts/day at 20080822
DailyQuote Updating trade_ts/day at 20080823
DailyQuote Updating trade_ts/day at 20080824
DailyQuote Updating trade_ts/day at 20080825
DailyQuote Updating trade_ts/day at 20080826
DailyQuote Updating trade_ts/day at 20080827
DailyQuote Updating trade_ts/day at 20080828
DailyQuote Updating trade_ts/day at 20080829
DailyQuote Updating trade_ts/day at 20080830
DailyQuote Updating trade_ts/day at 20080831
DailyQuote Updating trade_ts/day at 20080901
DailyQuote Updating trade_ts/day at 20080902
DailyQuote Updating trade_ts/day at 20080903
DailyQuote Updating trade_ts/day at 20080904
DailyQuote

In [2]:
updater.get_data(19970103)

Unnamed: 0,secid,adjfactor,open,high,low,close,amount,volume,vwap,status,limit,pctchange,preclose,turn_tt,turn_fl,turn_fr


In [3]:
pro.index_weight(index_code='000300.SH', start_date='20180901', end_date='20180930')

Unnamed: 0,index_code,con_code,trade_date,weight
0,000300.SH,601318.SH,20180928,7.092
1,000300.SH,600519.SH,20180928,3.506
2,000300.SH,600036.SH,20180928,3.026
3,000300.SH,601166.SH,20180928,1.900
4,000300.SH,000651.SZ,20180928,1.849
...,...,...,...,...
295,000300.SH,002625.SZ,20180928,0.037
296,000300.SH,601838.SH,20180928,0.035
297,000300.SH,601108.SH,20180928,0.034
298,000300.SH,601212.SH,20180928,0.031


In [2]:
df

Unnamed: 0,secid,adjfactor,open,high,low,close,amount,volume,vwap,status,limit,pctchange,preclose,turn_tt,turn_fl,turn_fr
0,1,125.0490,10.26,10.29,9.92,9.97,1721256.576,171335.386,10.046124,1.0,0.0,-2.8265,10.26,0.882903,0.882920,2.099588
1,2,181.7040,6.94,7.01,6.83,6.88,807585.336,117078.328,6.897821,1.0,0.0,-0.8646,6.94,0.981319,1.204889,1.979952
2,4,4.0640,10.40,10.89,10.10,10.39,119598.778,11447.553,10.447541,1.0,0.0,-2.8972,10.70,8.647476,9.064634,13.363287
3,6,39.7400,3.65,3.71,3.59,3.71,44108.657,12081.172,3.651025,1.0,0.0,1.3661,3.66,0.894905,0.894910,1.420313
4,7,8.2840,5.01,5.42,4.82,5.20,128925.424,25289.607,5.097961,1.0,0.0,0.7752,5.16,7.299682,8.185715,10.991913
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5329,873706,1.1319,8.90,8.98,8.70,8.87,2795.544,318.811,8.768656,1.0,0.0,-0.4489,8.91,0.345014,1.503295,1.503295
5330,873726,1.0428,16.03,16.10,15.51,15.82,3734.164,236.085,15.817032,1.0,0.0,-0.3778,15.88,0.287638,1.186034,1.434622
5331,873806,1.0000,8.81,8.99,8.60,8.71,82485.607,9398.561,8.776408,1.0,0.0,-4.6002,9.13,3.125180,12.375746,12.375746
5332,873833,1.1148,7.79,7.79,7.61,7.73,1444.943,187.865,7.691390,1.0,0.0,0.2594,7.71,0.228102,0.593852,0.741376


In [None]:
import numpy as np  
import statsmodels.api as sm  
  

x = np.random.random(20)
y = np.random.random(20)

l = 4
lambda_ = np.log(2) / l  
weights = np.exp(-lambda_ * np.arange(len(x)))  
weights = weights / weights.mean()
x_ = (x - x.mean())
y_ = (y - y.mean())

model = sm.WLS(y_, sm.add_constant(x_), weights=weights)  
results = model.fit()  
  
# 输出结果  
print(results.summary())
x_ = (x - x.mean()) * np.sqrt(weights)
y_ = (y - y.mean()) * np.sqrt(weights)
x_ = np.vstack([np.sqrt(weights),x_]).T
   
model = sm.OLS(y_, x_)  
results = model.fit()  
  
# 输出结果  
print(results.summary())

In [13]:
x

array([-3.55788298, -1.25790159,  0.        ,  0.6289508 ,  0.88947075])

In [27]:
def get_daily_quote(trade_date):
    d = str(trade_date)
    adj = code_to_secid(pro.query('adj_factor',  trade_date=d)).rename(columns={'adj_factor':'adjfactor'})

    quote = code_to_secid(pro.daily(trade_date=d)).rename(
        columns={'pct_chg':'pctchange','pre_close':'preclose','vol':'volume'})
    quote['volume'] = quote['volume'] / 10. # to 10^3
    quote['vwap'] = np.where(quote['volume'] == 0 , quote['close'] , quote['amount'] / quote['volume'])

    basic = code_to_secid(pro.daily_basic(trade_date=d))
    basic.loc[:,['total_share','float_share','free_share','total_mv','circ_mv']] *= 1e4

    limit = code_to_secid(pro.stk_limit(trade_date=d))
    susp = code_to_secid(pro.suspend_d(suspend_type='S', trade_date=d))
    # rst = quote.join(adj , on = ['ts_code' , 'start_date'])
    return adj , quote , basic , limit , susp

a = get_daily_quote(20240705)

In [28]:
adj , quote , basic , limit , susp = a

In [29]:
shr = basic.loc[:,['secid','trade_date' , 'total_share','float_share','free_share']]
mutual_col = ['secid' , 'trade_date']

trade = quote.merge(adj,on=mutual_col).merge(limit,on=mutual_col).merge(shr,on=mutual_col)
trade = trade.set_index('secid').sort_index().reset_index()

trade['status'] = 1.0 * ~trade['secid'].isin(susp['secid'])
trade['limit'] = 1.0 * (trade['close'] >= trade['up_limit']) - 1.0 * (trade['close'] <= trade['down_limit'])
trade['turn_tt'] = trade['volume'] / trade['total_share'] * 1e5
trade['turn_fl'] = trade['volume'] / trade['float_share'] * 1e5
trade['turn_fr'] = trade['volume'] / trade['free_share'] * 1e5

trade = trade.loc[
    :,['secid', 'adjfactor', 'open', 'high', 'low', 'close', 'amount','volume', 'vwap', 
       'status', 'limit', 'pctchange', 'preclose', 'turn_tt','turn_fl', 'turn_fr']].copy()
trade

Unnamed: 0,secid,adjfactor,open,high,low,close,amount,volume,vwap,status,limit,pctchange,preclose,turn_tt,turn_fl,turn_fr
0,1,125.0490,10.26,10.29,9.92,9.97,1721256.576,171335.386,10.046124,1.0,0.0,-2.8265,10.26,0.882903,0.882920,2.099588
1,2,181.7040,6.94,7.01,6.83,6.88,807585.336,117078.328,6.897821,1.0,0.0,-0.8646,6.94,0.981319,1.204889,1.979952
2,4,4.0640,10.40,10.89,10.10,10.39,119598.778,11447.553,10.447541,1.0,0.0,-2.8972,10.70,8.647476,9.064634,13.363287
3,6,39.7400,3.65,3.71,3.59,3.71,44108.657,12081.172,3.651025,1.0,0.0,1.3661,3.66,0.894905,0.894910,1.420313
4,7,8.2840,5.01,5.42,4.82,5.20,128925.424,25289.607,5.097961,1.0,0.0,0.7752,5.16,7.299682,8.185715,10.991913
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5329,873706,1.1319,8.90,8.98,8.70,8.87,2795.544,318.811,8.768656,1.0,0.0,-0.4489,8.91,0.345014,1.503295,1.503295
5330,873726,1.0428,16.03,16.10,15.51,15.82,3734.164,236.085,15.817032,1.0,0.0,-0.3778,15.88,0.287638,1.186034,1.434622
5331,873806,1.0000,8.81,8.99,8.60,8.71,82485.607,9398.561,8.776408,1.0,0.0,-4.6002,9.13,3.125180,12.375746,12.375746
5332,873833,1.1148,7.79,7.79,7.61,7.73,1444.943,187.865,7.691390,1.0,0.0,0.2594,7.71,0.228102,0.593852,0.741376


In [17]:
trade


Unnamed: 0,secid,trade_date,open,high,low,close,preclose,change,pctchange,vol,...,up_limit,down_limit,total_share,float_share,free_share,status,limit,turn_tt,turn_fl,turn_fr
0,1,20240705,10.26,10.29,9.92,9.97,10.26,-0.29,-2.8265,171335.386,...,11.29,9.23,1.940592e+06,1.940555e+06,816042.7512,1.0,0.0,0.088290,0.088292,0.209959
1,2,20240705,6.94,7.01,6.83,6.88,6.94,-0.06,-0.8646,117078.328,...,7.63,6.25,1.193071e+06,9.716936e+05,591319.0256,1.0,0.0,0.098132,0.120489,0.197995
2,4,20240705,10.40,10.89,10.10,10.39,10.70,-0.31,-2.8972,11447.553,...,11.77,9.63,1.323803e+04,1.262881e+04,8566.4199,1.0,0.0,0.864748,0.906463,1.336329
3,6,20240705,3.65,3.71,3.59,3.71,3.66,0.05,1.3661,12081.172,...,4.03,3.29,1.349995e+05,1.349987e+05,85059.9248,1.0,0.0,0.089490,0.089491,0.142031
4,7,20240705,5.01,5.42,4.82,5.20,5.16,0.04,0.7752,25289.607,...,5.68,4.64,3.464480e+04,3.089480e+04,23007.4655,1.0,0.0,0.729968,0.818572,1.099191
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5329,873706,20240705,8.90,8.98,8.70,8.87,8.91,-0.04,-0.4489,318.811,...,11.58,6.24,9.240524e+03,2.120749e+03,2120.7485,1.0,0.0,0.034501,0.150329,0.150329
5330,873726,20240705,16.03,16.10,15.51,15.82,15.88,-0.06,-0.3778,236.085,...,20.64,11.12,8.207725e+03,1.990542e+03,1645.6256,1.0,0.0,0.028764,0.118603,0.143462
5331,873806,20240705,8.81,8.99,8.60,8.71,9.13,-0.42,-4.6002,9398.561,...,11.86,6.40,3.007367e+04,7.594339e+03,7594.3387,1.0,0.0,0.312518,1.237575,1.237575
5332,873833,20240705,7.79,7.79,7.61,7.73,7.71,0.02,0.2594,187.865,...,10.02,5.40,8.236000e+03,3.163500e+03,2534.0032,1.0,0.0,0.022810,0.059385,0.074138


In [13]:
basic.loc[:,['secid','trade_date' , 'total_share','float_share','free_share']]

Unnamed: 0,secid,trade_date,total_share,float_share,free_share
0,605028,20240705,16000.0000,4000.0000,4000.0000
1,832491,20240705,14114.8348,11414.7977,9652.7032
2,300968,20240705,41338.0000,41337.9925,17312.5673
3,688563,20240705,45000.0000,7326.3074,7326.3074
4,2775,20240705,61276.7053,44826.1053,28972.8653
...,...,...,...,...,...
5329,301139,20240705,12158.0800,7172.3517,6814.9799
5330,301218,20240705,11404.0000,5604.0468,5604.0468
5331,870866,20240705,18020.5900,6685.9807,5343.9237
5332,2484,20240705,85027.4941,82022.7015,51457.8049


In [26]:
df = pd.read_feather('./data/DataBase/DB_trade/day/2024/day.20240705.feather')
df

Unnamed: 0,secid,adjfactor,open,high,low,close,amount,volume,vwap,status,limit,pctchange,preclose,turn_tt,turn_fl,turn_fr
0,1,125.049332,10.260000,10.290000,9.920000,9.970000,1.721257e+06,1.713354e+06,10.046100,1.0,0.0,-2.8265,10.260000,0.8829,0.8829,2.0996
1,2,181.703781,6.940000,7.010000,6.830000,6.880000,8.075853e+05,1.170783e+06,6.897800,1.0,0.0,-0.8646,6.940000,1.2049,1.2049,1.9800
2,4,4.063862,10.400000,10.890000,10.100000,10.390000,1.195988e+05,1.144755e+05,10.447500,1.0,0.0,-2.8972,10.700000,9.0646,9.0646,13.3633
3,6,39.739674,3.650000,3.710000,3.590000,3.710000,4.410866e+04,1.208117e+05,3.651000,1.0,0.0,1.3661,3.660000,0.8949,0.8949,1.4203
4,7,8.283528,5.010000,5.420000,4.820000,5.200000,1.289254e+05,2.528961e+05,5.098000,1.0,0.0,0.7752,5.160000,8.1857,8.1857,10.9919
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5108,688799,1.046135,41.200001,42.580002,40.110001,42.279999,3.193936e+04,7.683520e+03,41.568699,1.0,0.0,3.0968,41.009998,1.3602,1.3602,1.8792
5109,688800,1.419317,25.790001,25.860001,24.940001,25.440001,4.471424e+04,1.770236e+04,25.258900,1.0,0.0,-1.0117,25.700001,1.6608,1.6608,1.9899
5110,688819,1.076469,22.799999,22.910000,22.549999,22.830000,3.544032e+04,1.558031e+04,22.746901,1.0,0.0,0.1316,22.799999,0.1603,0.1603,1.1550
5111,688981,1.000000,46.020000,46.669998,45.209999,46.639999,1.057957e+06,2.298842e+05,46.021301,1.0,0.0,1.0399,46.160000,1.1648,1.1648,1.4318


In [64]:
def complete_calendar():
    cal = pd.read_feather('./data/DataBase/DB_information_ts/calendar.feather')
    trd = cal[cal['trade'] == 1].reset_index(drop=True)
    trd['pre'] = trd['calendar'].shift(1, fill_value=-1)
    return trd



Unnamed: 0,calendar,trade,pre
0,19901219,1,-1
1,19901220,1,19901219
2,19901221,1,19901220
3,19901224,1,19901221
4,19901225,1,19901224
...,...,...,...
8307,20241225,1,20241224
8308,20241226,1,20241225
8309,20241227,1,20241226
8310,20241230,1,20241227


In [12]:
pro.daily(start_date='20240708')

Unnamed: 0,ts_code,trade_date,open,high,low,close,pre_close,change,pct_chg,vol,amount
0,003032.SZ,20240708,9.47,9.50,8.74,8.88,9.47,-0.59,-6.2302,195958.79,177127.325
1,002688.SZ,20240708,3.68,3.70,3.59,3.60,3.71,-0.11,-2.9650,66932.00,24218.867
2,688238.SH,20240708,4.56,4.56,4.30,4.32,4.56,-0.24,-5.2632,54944.70,24271.305
3,605090.SH,20240708,28.76,28.86,28.07,28.45,28.87,-0.42,-1.4548,43069.40,122560.860
4,600611.SH,20240708,2.77,3.04,2.77,2.95,2.78,0.17,6.1151,466175.26,135351.973
...,...,...,...,...,...,...,...,...,...,...,...
5328,002873.SZ,20240708,8.58,8.63,8.21,8.23,8.53,-0.30,-3.5170,26403.99,21990.333
5329,001373.SZ,20240708,23.85,24.15,23.12,23.26,23.91,-0.65,-2.7185,6656.00,15626.912
5330,603018.SH,20240708,8.52,8.55,8.25,8.35,8.56,-0.21,-2.4533,171655.07,143505.269
5331,000797.SZ,20240708,2.35,2.36,2.27,2.28,2.37,-0.09,-3.7975,176813.00,40648.298


In [11]:
import tushare as ts
from typing import Any

import numpy as np
import pandas as pd

pro = ts.pro_api('2026c96ef5fa7fc3241c96baafd638c585284c7fefaa00b93ef0a62c')

def code_to_secid(df : pd.DataFrame , code_col = 'ts_code' , retain = False):
    '''switch old symbol into secid'''
    if code_col not in df.columns.values: return df
    replace_dict = {'T00018' : '600018'}
    df['secid'] = df[code_col].astype(str).str.slice(0, 6).replace(replace_dict)
    df['secid'] = df['secid'].where(df['secid'].str.isdigit() , '-1').astype(int)
    if not retain: del df[code_col]
    return df

df = code_to_secid(df)
df['start_date'] = df['start_date'].fillna(-1).astype(int)
df['ann_date'] = df['ann_date'].fillna(-1).astype(int)
df['end_date'] = df['end_date'].fillna(99991231).astype(int)
df['st'] = np.where(df['ann_date'] > 0 , np.minimum(df['start_date'] , df['ann_date']) , df['ann_date'])
df['ed'] = df['end_date']