In [2]:
import pandas as pd

desc = pd.read_feather('./data/DataBase/DB_information_ts/description.feather')
indus = pd.read_feather('./data/DataBase/DB_information_ts/industry.feather')
cname = pd.read_feather('./data/DataBase/DB_information_ts/change_name.feather')
desc

Unnamed: 0,ts_code,sec_name,industry,exchange_name,list_dt,delist_dt,secid
0,000001.SZ,平安银行,银行,SZSE,19910403,99991231,1
1,000002.SZ,万科A,全国地产,SZSE,19910129,99991231,2
2,000004.SZ,国华网安,软件服务,SZSE,19910114,99991231,4
3,000006.SZ,深振业A,区域地产,SZSE,19920427,99991231,6
4,000007.SZ,全新好,其他商业,SZSE,19920413,99991231,7
...,...,...,...,...,...,...,...
5615,688555.SH,退泽达(退),,SSE,20200623,20230707,688555
5616,832317.BJ,观典防务(退),,BSE,20200727,20220426,832317
5617,833874.BJ,泰祥股份(退),,BSE,20200727,20220718,833874
5618,833994.BJ,翰博高新(退),,BSE,20200727,20220725,833994


In [3]:
import numpy as np
from typing import Any , Literal
from src.data import load_target_file

class TradeCalendar:
    def __init__(self) -> None:
        cal = pd.read_feather('./data/DataBase/DB_information_ts/calendar.feather').loc[:,['calendar' , 'trade']]

        trd = cal[cal['trade'] == 1].reset_index(drop=True)
        trd['td'] = trd['calendar']
        trd['pre'] = trd['calendar'].shift(1, fill_value=-1)
        trd = cal.merge(trd.drop(columns='trade') , on = 'calendar' , how = 'left').ffill()
        trd['cd_index'] = np.arange(len(trd))
        trd['td_index'] = trd['trade'].cumsum() - 1
        trd = trd.astype(int)

        self.calendar = trd.set_index('calendar')
        self.cal_cal = trd.set_index('cd_index')
        self.cal_trd = trd[trd['trade'] == 1].set_index('td_index')
    
    def __td_transform(self , td , as_numpy : bool) -> Any:
        if isinstance(td , pd.Series): td = td.to_numpy()
        if as_numpy and isinstance(td , int): td = np.array([td])
        return td 

    def td(self , date , as_numpy = True): 
        td = self.calendar.loc[date , 'td']
        return self.__td_transform(td , as_numpy)
    
    def pre(self , date , as_numpy = True):
        td = self.calendar.loc[date , 'pre']
        return self.__td_transform(td , as_numpy)
    
    def offset(self , date , n : Any = 0 , type : Literal['t' , 'c'] = 't' , as_numpy = True):
        if type == 't':
            d_index = self.calendar.loc[date , 'td_index'] + n
            d_index = np.maximum(np.minimum(d_index , len(self.cal_trd) - 1) , 0)
            td = self.cal_trd.loc[d_index , 'calendar']
        else:
            d_index = self.calendar.loc[date , 'cd_index'] + n
            d_index = np.maximum(np.minimum(d_index , len(self.cal_cal) - 1) , 0)
            td = self.cal_cal.loc[d_index , 'calendar']
        return self.__td_transform(td , as_numpy)
    
    @property
    def calendar_start(self): return self.calendar.index.min()
    @property
    def calendar_end(self): return self.calendar.index.max()
        
calendar = TradeCalendar()
calendar.calendar_start

19901219

In [6]:

date = 20120105

def risk_universe(date : int):
    list_days = 252
    redempt_tmv_pct = 0.8

    new_desc = desc[(desc['list_dt'] <= date) & (desc['delist_dt'] > date)].copy()
    new_desc['list_dt'] = np.maximum(new_desc['list_dt'] , calendar.calendar_start)
    new_desc = new_desc.set_index('secid')

    val = load_target_file('trade_ts' , 'day_val' , date)
    val['weight'] = val['circ_mv'] / 1e8
    val = val.set_index('secid').reindex(new_desc.index)

    cal_after = calendar.offset(new_desc['list_dt'] , list_days , 't')

    trd = load_target_file('trade_ts' , 'day' , calendar.offset(date , -21 , 't')).loc[:,['secid','status']]
    trd = load_target_file('trade_ts' , 'day' , date).loc[:,['secid','status']].merge(trd , on = 'secid' , how = 'left').\
        set_index('secid').reindex(new_desc.index).fillna(0)
    
    new_cname = cname[cname['secid'] >= 0].sort_values(['secid','ann_date','start_date'])
    new_cname = new_cname[new_cname['start_date'] <= date].drop_duplicates('secid' , keep = 'last')
    new_cname = new_cname[new_cname['change_reason'].isin(['终止上市', '暂停上市' , 'ST', '*ST', ])]
    
    # trade status are 1.0 this day or 1 month ealier
    rule0 = ((trd['status_x'] == 1) | (trd['status_y'] == 1))

    # list date 1 year eailier and not delisted or total mv in the top 90%
    rule1 = ((new_desc['delist_dt'] > date) & (cal_after <= date)) | (val['total_mv'].rank(pct = True , na_option='bottom') >= redempt_tmv_pct)

    # not st
    rule2 = ~new_desc.index.isin(new_cname['secid'])


    new_desc['estuniv'] = 1 * (rule0 & rule1 & rule2)
    new_desc['weight'] = val['weight'].fillna(0).values

    return new_desc.reset_index(drop=True)
df = risk_universe(date)
df.iloc[:10]

Unnamed: 0,ts_code,sec_name,industry,exchange_name,list_dt,delist_dt,estuniv,weight
0,000001.SZ,平安银行,银行,SZSE,19910403,99991231,1,477.9147
1,000002.SZ,万科A,全国地产,SZSE,19910129,99991231,1,700.246929
2,000004.SZ,国华网安,软件服务,SZSE,19910114,99991231,1,6.148198
3,000006.SZ,深振业A,区域地产,SZSE,19920427,99991231,1,38.810776
4,000007.SZ,全新好,其他商业,SZSE,19920413,99991231,1,12.133728
5,000008.SZ,神州高铁,运输设备,SZSE,19920507,99991231,1,4.676973
6,000009.SZ,中国宝安,电气设备,SZSE,19910625,99991231,1,102.978356
7,000010.SZ,美丽生态,建筑工程,SZSE,19951027,99991231,0,0.0
8,000011.SZ,深物业A,房产服务,SZSE,19920330,99991231,1,7.925437
9,000012.SZ,南玻A,玻璃,SZSE,19920228,99991231,1,105.766239


In [5]:
rsk = pd.read_feather('./data/DataBase/DB_models/risk_exp/2012/risk_exp.20120105.feather')
rsk.iloc[:10]

Unnamed: 0,secid,estuniv,weight,market,petro,coal,nonferrous,utility,public,steel,...,size,beta,momentum,residual_volatility,non_linear_size,book_to_price,liquidity,earnings_yield,growth,leverage
0,1,1,477.914703,1,0,0,0,0,0,0,...,0.840386,-0.47212,0.159113,-0.20982,-0.489287,-0.258295,0.289957,1.660876,0.515678,1.064224
1,2,1,700.246948,1,0,0,0,0,0,0,...,0.840386,0.070228,-0.061737,-0.308938,-0.489287,0.262514,0.211596,0.569209,0.165846,0.906542
2,4,1,6.148198,1,0,0,0,0,0,0,...,-3.698964,0.77785,0.192959,0.182589,-2.320993,-1.490264,-0.591618,-2.070229,1.005509,-1.183045
3,5,0,0.0,1,0,0,0,0,0,0,...,-1.891241,-2.04266,0.418196,-1.018986,1.03436,-1.195547,-0.173172,-1.92643,-2.014068,-1.423251
4,6,1,38.810776,1,0,0,0,0,0,0,...,-1.770511,0.254027,-0.421254,-0.710314,1.121449,0.52363,-0.668045,1.660876,0.137248,0.495918
5,7,0,0.0,1,0,0,0,0,0,0,...,-2.766564,-0.258178,0.367667,1.050261,-0.570277,-2.076923,-0.698384,-2.086478,0.018083,-1.693052
6,8,0,0.0,1,0,0,0,0,0,0,...,-3.98331,0.999235,-0.753704,-0.283624,-2.320993,-1.324957,-1.073261,-2.247055,-0.179757,-1.921547
7,9,1,102.978355,1,0,0,0,0,0,0,...,-0.771988,1.893352,0.567452,1.831254,0.98129,-1.001495,1.299373,-1.602258,-0.275433,0.73175
8,10,0,0.0,1,0,0,0,0,0,0,...,-2.403883,-2.04266,0.9917,-1.485509,0.322576,-1.87646,0.122572,-2.234237,-0.211353,-1.192546
9,11,1,7.925437,1,0,0,0,0,0,0,...,-1.975303,1.398808,-1.387071,1.212768,0.956871,-0.885509,2.55669,0.353014,1.721017,0.179217


In [9]:
rsk['market']

0       1
1       1
2       1
3       1
4       1
       ..
2315    1
2316    1
2317    1
2318    1
2319    1
Name: market, Length: 2320, dtype: int64

In [None]:
rsk

In [None]:
val = pd.read_feather('./data/DataBase/DB_trade_ts/day_val/2012/day_val.20120105.feather')

In [None]:
val

In [None]:
val['total_mv'].rank(pct = True , na_option='bottom')

In [None]:
# ['终止上市', 'ST', '*ST', '暂停上市']
date = 20240105
cname[(cname['change_reason'].isin(['终止上市', '暂停上市' , 'ST', '*ST', ]) ) & 
      (cname['entry_dt'] <= date) & (cname['remove_dt'] >= date)]['name'].unique()

In [None]:
indus['l1_name'].unique().shape

In [None]:
indus['l2_name'].unique()