In [39]:
import pandas as pd
import numpy as np
import statsmodels.api as sm

from typing import Literal

from src.factor.calculator import StockFactorCalculator
from src.data import DATAVENDOR

def get_latest(numerator : str , date : int , **kwargs):
    return DATAVENDOR.get_fin_latest(numerator , date , **kwargs)

def get_hist(numerator : str , date : int , n_last : int = 1 , **kwargs):
    return DATAVENDOR.get_fin_hist(numerator , date , n_last , **kwargs)

def hist_zscore(data : pd.Series | pd.DataFrame):
    if isinstance(data , pd.DataFrame):
        print(data)
        assert data.shape[1] == 1 , 'data must be a single column'
        data = data.iloc[:,0]
    grp = data.groupby('secid')
    return (grp.last() - grp.mean()) / grp.std()

def ts_std(ts : pd.Series | pd.DataFrame):
    return (ts - ts.mean()) / ts.std()

def ts_reg_resid(ts : pd.DataFrame , y_col : str , x_cols : list[str]) -> float:
    y = ts_std(ts[y_col])
    x = sm.add_constant(ts[x_cols].fillna(0))
    model = sm.OLS(y , x).fit()
    return model.resid.iloc[-1]

class lpnp(StockFactorCalculator):
    init_date = 20070101
    category0 = 'fundamental'
    category1 = 'earning'
    description = '线性提纯净利润'
    
    def calc_factor(self, date: int):
        npro = get_hist('is@qtr@n_income_attr_p' , date , 20)
        nonop_sales = get_hist('is@qtr@total_revenue' , date , 20).iloc[:,0] - get_hist('is@qtr@revenue' , date , 20).iloc[:,0]
        inprocess   = get_hist('bs@qtr@cip' , date , 20).iloc[:,0]

        df = npro.join(nonop_sales.rename('nonop')).join(inprocess.rename('inprocess'))
        return 


class ocfa(StockFactorCalculator):
    init_date = 20070101
    category0 = 'fundamental'
    category1 = 'earning'
    description = '产能利用率提升'
    
    def calc_factor(self, date: int):
        ...

class op_equ(StockFactorCalculator):
    init_date = 20070101
    category0 = 'fundamental'
    category1 = 'earning'
    description = 'TTM营业利润/净资产'
    
    def calc_factor(self, date: int):
        ...

class op_equ_zscore(StockFactorCalculator):
    init_date = 20070101
    category0 = 'fundamental'
    category1 = 'earning'
    description = 'TTM营业利润/净资产,Z-Score'
    
    def calc_factor(self, date: int):
        ...

class op_q_equ(StockFactorCalculator):
    init_date = 20070101
    category0 = 'fundamental'
    category1 = 'earning'
    description = '单季度营业利润/净资产'
    
    def calc_factor(self, date: int):
        ...

class periodexp_q_sale(StockFactorCalculator):
    init_date = 20070101
    category0 = 'fundamental'
    category1 = 'earning'
    description = '单季度费用/营业收入'
    
    def calc_factor(self, date: int):
        ...

class periodexp_sale(StockFactorCalculator):
    init_date = 20070101
    category0 = 'fundamental'
    category1 = 'earning'
    description = 'TTM费用/营业收入'
    
    def calc_factor(self, date: int):
        ...

class roic(StockFactorCalculator):
    init_date = 20070101
    category0 = 'fundamental'
    category1 = 'earning'
    description = 'TTM,EBIT(1-税率)/投入资本'
    
    def calc_factor(self, date: int):
        ...

class rotc(StockFactorCalculator):
    init_date = 20070101
    category0 = 'fundamental'
    category1 = 'earning'
    description = 'TTM,EBIT/有形资产'
    
    def calc_factor(self, date: int):
        ...

class rroc(StockFactorCalculator):
    init_date = 20070101
    category0 = 'fundamental'
    category1 = 'earning'
    description = '营业能力改善'
    
    def calc_factor(self, date: int):
        ...

class sales_ta_zscore(StockFactorCalculator):
    init_date = 20070101
    category0 = 'fundamental'
    category1 = 'earning'
    description = '营业收入/总资产,Z-Score'
    
    def calc_factor(self, date: int):
        ...

class tax_equ(StockFactorCalculator):
    init_date = 20070101
    category0 = 'fundamental'
    category1 = 'earning'
    description = 'TTM所得税/净资产'
    
    def calc_factor(self, date: int):
        ...

class tax_equ_zscore(StockFactorCalculator):
    init_date = 20070101
    category0 = 'fundamental'
    category1 = 'earning'
    description = 'TTM所得税/净资产,Z-Score'
    
    def calc_factor(self, date: int):
        ...

class tax_q_equ(StockFactorCalculator):
    init_date = 20070101
    category0 = 'fundamental'
    category1 = 'earning'
    description = '单季度所得税/净资产'
    
    def calc_factor(self, date: int):
        ...

class tp_equ(StockFactorCalculator):
    init_date = 20070101
    category0 = 'fundamental'
    category1 = 'earning'
    description = 'TTM利润总额/净资产'
    
    def calc_factor(self, date: int):
        ...

class tp_equ_zscore(StockFactorCalculator):
    init_date = 20070101
    category0 = 'fundamental'
    category1 = 'earning'
    description = 'TTM利润总额/净资产,Z-Score'
    
    def calc_factor(self, date: int):
        ...

class tp_q_equ(StockFactorCalculator):
    init_date = 20070101
    category0 = 'fundamental'
    category1 = 'earning'
    description = '单季度利润总额/净资产'
    
    def calc_factor(self, date: int):
        ...

In [40]:
date = 20241203
npro = get_hist('is@qtr@n_income_attr_p' , date , 20).iloc[:,0].rename('npro')
nonop_sales = get_hist('is@qtr@total_revenue' , date , 20).iloc[:,0] - get_hist('is@qtr@revenue' , date , 20).iloc[:,0]
inprocess   = get_hist('bs@qtr@cip' , date , 20).iloc[:,0]

df = npro.to_frame().join(nonop_sales.rename('nonop')).join(inprocess.rename('inprocess'))

func = lambda df : ts_reg_resid(df , y_col='npro' , x_cols=['nonop' , 'inprocess'])
df.groupby('secid').apply(func)


secid
1         1.144538
2        -0.801277
3         0.052021
4         0.280636
5        -0.136739
            ...   
920060         nan
920066         nan
920088         nan
920099         0.0
920118   -0.263365
Length: 5784, dtype: object

In [33]:
ts_reg_resid(df , y_col = 'npro' , x_cols = ['nonop' , 'inprocess'])

-0.05235245622236135

In [29]:
df['nonop'].sum()

881891668549.7501

In [25]:
npro_q_equ().calc_factor(20241203)

secid
1         0.028238
2        -0.034558
3        -0.022491
4        -0.105303
5        -0.012094
            ...   
920060    0.144784
920066    0.130947
920088    0.070960
920099    0.022330
920118    0.014440
Length: 5755, dtype: float64

In [32]:
date = 20241203
npro = get_latest('is@qtr@n_income_attr_p' , date)
sales = get_latest('is@qtr@revenue' , date)
bv = get_latest('bs@qtr@total_hldr_eqy_exc_min_int' , date)

x = sm.add_constant(pd.concat([sales , bv] , axis = 1).reindex(npro.index))
model = sm.OLS(npro, x , missing = 'drop').fit()
model.resid


secid
1         1.188522e+09
2        -1.439021e+10
3         4.953072e+07
4         3.019981e+07
5         1.335730e+07
              ...     
920060    1.246787e+08
920066    7.484909e+07
920088    7.189899e+07
920099    4.832514e+07
920118    4.292733e+07
Length: 5733, dtype: float64

In [23]:
date = 20241203
gp_ta_zscore().calc_factor(date)

secid
1              NaN
2        -0.848494
3         0.277415
4         1.638457
5         0.294666
            ...   
920098         NaN
920099   -0.707107
920111         NaN
920118   -0.633026
920128         NaN
Length: 5800, dtype: float64

In [16]:
date = 20241203
gp_q_ta().calc_factor(date)

secid
1              NaN
2         0.006858
3         0.019682
4         0.065962
5         0.006246
            ...   
920098         NaN
920099    0.021257
920111         NaN
920118    0.026899
920128         NaN
Length: 5766, dtype: float64

In [1]:
from src.api import FactorAPI
hier = FactorAPI.factor_hierarchy()
hier.factor_df()['file_name'].unique()




src.basic.INSTANCE_RECORD can be accessed to check ['trainer', 'account']
Basic module imported!


array(['behavior\\behavior_anndt', 'behavior\\behavior_ff3',
       'behavior\\behavior_slice', 'behavior\\correlation_beta',
       'behavior\\correlation_vp', 'behavior\\liquidity_cap',
       'behavior\\liquidity_cv', 'behavior\\liquidity_illiquid',
       'behavior\\liquidity_moneyflow', 'behavior\\liquidity_turnover',
       'behavior\\momentum_classic', 'behavior\\momentum_mdr',
       'behavior\\momentum_phigh', 'behavior\\momentum_weivol',
       'behavior\\volatility_exret', 'behavior\\volatility_ret',
       'behavior\\volatility_skew', 'fundamental\\earning_classic',
       'fundamental\\earning_sue', 'fundamental\\growth_earnings_acc',
       'fundamental\\quality', 'fundamental\\valuation'], dtype=object)

In [2]:
hier.test_calc_all_factors(file_name = 'fundamental/earning_sue')

sue_gp calculated
sue_gp_reg calculated
sue_npro calculated
sue_npro_reg calculated
sue_op calculated
sue_op_reg calculated
sue_sales calculated
sue_sales_reg calculated
sue_tax calculated
sue_tax_reg calculated
sue_tp calculated
sue_tp_reg calculated
no abnormal factor variation
no abnormal factor diffs


{'sue_gp': secid
 1              NaN
 2        -0.621375
 3         0.015947
 4         0.703756
 5         1.295163
             ...   
 920098         NaN
 920099   -0.352183
 920111   -0.364047
 920118   -0.538792
 920128         NaN
 Name: gross_margin, Length: 5769, dtype: float64,
 'sue_gp_reg': secid
 1              NaN
 2         0.396976
 3         0.815989
 4         0.591274
 5         1.362929
             ...   
 920098         NaN
 920099     0.38813
 920111    0.383489
 920118    0.363405
 920128         NaN
 Name: gross_margin, Length: 5769, dtype: object,
 'sue_npro': secid
 1         0.656500
 2        -1.284456
 3        -0.319064
 4         0.403735
 5         0.427382
             ...   
 920060         NaN
 920066         NaN
 920088         NaN
 920099         NaN
 920118   -0.951940
 Name: n_income_attr_p, Length: 5754, dtype: float64,
 'sue_npro_reg': secid
 1         0.355321
 2        -0.077522
 3        -0.226876
 4        -0.399623
 5         0.354042
     

In [29]:
DATAVENDOR.INDI.ttm_latest('ocfps' , date)

secid   end_date
1       20240930    5.8560
2       20240930   -0.0981
3       20240630    0.0009
4       20240930   -0.1546
5       20240630    0.0104
                     ...  
920098  20240930    0.2531
920099  20240930    4.2904
920111  20240630    0.5100
920118  20240930   -9.8545
920128  20240930    0.1865
Name: ocfps, Length: 5785, dtype: float64

In [25]:
from src.data import DATAVENDOR
date = 20241203

cp = DATAVENDOR.TRADE.get_trd(date , ['secid' , 'close']).set_index('secid')['close']
cetop = DATAVENDOR.INDI.ttm_latest('ocfps' , date) / cp


In [26]:
from src.factor.calculator import TuShareCNE5_Calculator

model = TuShareCNE5_Calculator()
model.descriptor(cetop.fillna(0) , date , 'cetop' , 'median')

ValueError: Buffer dtype mismatch, expected 'Python object' but got 'long'

In [28]:
v = cetop.fillna(0)
v

secid   end_date
1       20240930    0.509661
2       20240930   -0.011224
3       20240630    0.000000
4       20240930   -0.008077
5       20240630    0.000000
                      ...   
920098  20240930    0.000000
920099  20240930    0.124721
920111  20240630    0.016452
920118  20240930   -0.297719
920128  20240930    0.004585
Length: 5785, dtype: float64

In [27]:
univ = model.get_estuniv(date)
indus = model.ind_grp.get(date)
if indus is None or indus.empty:
    model.calc_indus(date)
    indus = model.ind_grp.get(date)


In [8]:
hier.get_factor('ff_alpha_1m')().calc_factor(20241031)


secid
1         0.000371
2         0.004179
4         0.001460
6         0.035622
7        -0.000548
            ...   
920016   -0.011799
920019   -0.013624
920088   -0.008201
920099   -0.011923
920118   -0.002821
Length: 5360, dtype: float64

secid
1         0.673100
2         3.570594
4         0.036688
5              NaN
6         0.513091
            ...   
920019         NaN
920066         NaN
920088         NaN
920099         NaN
920118         NaN
Length: 5635, dtype: float64

In [10]:
etop_q().calc_factor(date)

secid
1         0.062715
2        -0.073076
3              NaN
4        -0.005856
5              NaN
            ...   
920060         NaN
920066    0.013726
920088    0.013299
920099    0.020798
920118    0.005907
Length: 5731, dtype: float64

In [45]:
from cProfile import Profile
start_date , end_date = DATAVENDOR.CALENDAR.td_start_end(date , 1 , 'y')
p = Profile()
p.run('ebit_ev1_rank1y().calc_factor(date)')
p.print_stats('tottime')


         440288 function calls (433633 primitive calls) in 1.774 seconds

   Ordered by: internal time

   ncalls  tottime  percall  cumtime  percall filename:lineno(function)
      162    0.242    0.001    0.257    0.002 take.py:120(_take_nd_ndarray)
       44    0.105    0.002    0.105    0.002 {method 'argsort' of 'numpy.ndarray' objects}
        1    0.079    0.079    0.079    0.079 algorithms.py:1067(rank)
        9    0.073    0.008    0.182    0.020 sorting.py:687(compress_group_index)
       90    0.066    0.001    0.066    0.001 {method 'copy' of 'numpy.ndarray' objects}
        3    0.064    0.021    0.064    0.021 sorting.py:246(_decons_group_index)
       16    0.060    0.004    0.061    0.004 algorithms.py:548(factorize_array)
     5659    0.056    0.000    0.200    0.000 rolling.py:601(calc)
       81    0.050    0.001    0.050    0.001 {method 'take' of 'numpy.ndarray' objects}
        8    0.044    0.005    0.044    0.005 {method 'put' of 'numpy.ndarray' objects}
     5

In [3]:
start_date , end_date = DATAVENDOR.CALENDAR.td_start_end(date , 1 , 'y')
v = 1 / DATAVENDOR.TRADE.get_val_data(start_date , end_date , 'pb' , prev=False , pivot=True)


In [6]:
v.rank(pct=True).ffill().tail(1).sum()

secid
1         0.130952
2         0.265873
4         0.007937
5         0.903226
6         0.003968
            ...   
920019    0.533333
920066    1.000000
920088    1.000000
920099    0.136364
920118    0.090909
Length: 5402, dtype: float64

In [7]:
hier.update_jobs(file_name = 'liquidity_illiq')
df = hier.jobs()


array([illiq_12m(from20070101,behavior,liquidity)[0dates],
       illiq_1m(from20070101,behavior,liquidity)[0dates],
       illiq_2m(from20070101,behavior,liquidity)[0dates],
       illiq_3m(from20070101,behavior,liquidity)[0dates],
       illiq_6m(from20070101,behavior,liquidity)[0dates]], dtype=object)

In [5]:
df = hier.jobs()
df['factor'].unique()


array([illiq_12m(from20070101,behavior,liquidity)[0dates],
       illiq_1m(from20070101,behavior,liquidity)[0dates],
       illiq_2m(from20070101,behavior,liquidity)[0dates],
       illiq_3m(from20070101,behavior,liquidity)[0dates],
       illiq_6m(from20070101,behavior,liquidity)[0dates]], dtype=object)

In [6]:
hier.Update()

Factor : illiq_12m at date 20070104 deploy successful
Factor : illiq_1m at date 20070104 deploy successful
Factor : illiq_2m at date 20070104 deploy successful
Factor : illiq_3m at date 20070104 deploy successful
Factor : illiq_6m at date 20070104 deploy successful
Factor : illiq_12m at date 20070105 deploy successful
Factor : illiq_1m at date 20070105 deploy successful
Factor : illiq_2m at date 20070105 deploy successful
Factor : illiq_3m at date 20070105 deploy successful
Factor : illiq_6m at date 20070105 deploy successful
Factor : illiq_12m at date 20070108 deploy successful
Factor : illiq_1m at date 20070108 deploy successful
Factor : illiq_2m at date 20070108 deploy successful
Factor : illiq_3m at date 20070108 deploy successful
Factor : illiq_6m at date 20070108 deploy successful
Factor : illiq_12m at date 20070109 deploy successful
Factor : illiq_1m at date 20070109 deploy successful
Factor : illiq_2m at date 20070109 deploy successful
Factor : illiq_3m at date 20070109 deploy 