In [1]:
# %%writefile alpha_base10.py

import sys
import os 
module_path = os.path.abspath(os.path.join('..')) 
if module_path not in sys.path: 
    sys.path.append(module_path)
    
import numpy as np
import pandas as pd
from scipy import stats
from sklearn import preprocessing
from sklearn import linear_model
import tools.Pretreat_Tools as pretreat

from base.JuUnits import excute_for_multidates


def STD(data, windows):
    return data.rolling(window=windows, min_periods=windows).std()
def MEAN(data, windows):
    return data.rolling(window=windows, min_periods=windows).mean()
def DELTA(data, windows):
    return data.diff(4)
def SEQUENCE(n):
    return pd.Series(np.arange(1,n+1))

def SMA(data,windows,alpha):
    return data.ewm(adjust=False, alpha=float(alpha)/windows, min_periods=windows, ignore_na=False).mean()

def REGBETA(xs, y, n):
    assert len(y)>=n,  'len(y)!>=n !!!'+ str(y.index[0])
    regress = linear_model.LinearRegression(fit_intercept=False)
    def reg(X,Y):
        try:
            if len(Y)>len(X):
                Y_ =  Y[X.index]
                if Y_.isnull().any():
                    return np.nan
                res = regress.fit(X.values.reshape(-1, 1), Y_.values.reshape(-1, 1)).coef_[0]
            else:
                # if Y.isnull().any():
                #     return np.nan
                res = regress.fit(X.values.reshape(-1, 1), Y.values.reshape(-1, 1)).coef_[0]
        except Exception as e:
            print(e)
            return np.nan
        return res
    return xs.rolling(window=n, min_periods=n).apply(lambda x:reg(x,y))


def COVIANCE(A,B,d):
    se = pd.Series(np.arange(len(A.index)),index=A.index)
    se = se.rolling(5).apply(lambda x: A.iloc[x].cov(B.iloc[x]))
    return se

def CORR(A,B,d):
    se = pd.Series(np.arange(len(A.index)),index=A.index)
    se = se.rolling(5).apply(lambda x: A.iloc[x].corr(B.iloc[x]))
    return se




In [2]:
from tools.Cacher import (CACHE_TYPE, load_cache,load_caches_adv)
import tools.Sample_Tools as smpl
import tools.Pretreat_Tools as pretreat
import QUANTAXIS as QA


%load_ext autoreload
%autoreload 2
%aimport tools.Cacher

stock_df = load_cache('all_train_qfq',cache_type=CACHE_TYPE.STOCK).sort_index()
# stock_df = pd.concat(list(map(lambda file:load_cache(file,cache_type=CACHE_TYPE.STOCK),['all_train_qfq','all_tail_qfq','all_older_qfq']))).sort_index()
ret_fs = pd.read_csv(module_path+'/data/static/china10yearbond.csv').set_index('date').sort_index()
ret_fs = (ret_fs['high']+ret_fs['low'])/2 * 0.01
ret_fs_daily = ret_fs/252
ret_fs_daily

date
2002-07    0.000114
2002-08    0.000113
2002-09    0.000122
2002-10    0.000122
2002-11    0.000132
             ...   
2022-09    0.000108
2022-10    0.000109
2022-11    0.000111
2022-12    0.000116
2023-01    0.000116
Length: 247, dtype: float64

In [3]:
# codes = smpl.get_codes_by_market(sse='all',only_main=True,filter_st=True)
# 

ret_t = smpl.get_current_return(stock_df,'close')

# 超额回报
ret_t_excess = ret_t.groupby(pd.Grouper(level='date', freq='1M')).apply(
        lambda x:x-ret_fs_daily.get(x.index[0][0].strftime('%Y-%m'),default=ret_fs_daily[-1]))
# 市值
market_value_t = stock_df['totalCapital']*stock_df['close']
market_value_t.name='market_value'

# 计算市场（平均）收益
def cal_ret_market(market_value,ret_excess_data):
    ##  不取对数有时候有精度问题,权重不是精确1
    market_value_log = np.log(market_value)
    weight = market_value_log / market_value_log.sum()
    ret_market_f = (ret_excess_data * weight).sum()
    return ret_market_f

# 市场收益，全市场收益加权平均
ret_market_t = excute_for_multidates(ret_t_excess,
                                     lambda ret: cal_ret_market(market_value_t.loc[ret.index[0][0]],ret), 
                                     level=0)


def camp_beta_alpha(ret_excess,ret_market):
    # 5年daily单核执行约35分钟
    
    window=252
    half_life_window = 63
    half_life_ = list(map(lambda n:0.5**(n/half_life_window),range(1,window+1)))[::-1]
    half_life_weight = half_life/np.sum(half_life)

    model = linear_model.LinearRegression(fit_intercept=True)
    res_tmp = []
    def reg(ret_t_ex):
        # print()
        # assert False,None
        res = model.fit(ret_market[ret_t_ex.index.get_level_values(0)].values.reshape(-1, 1),
                        ret_t_ex.values.reshape(-1, 1), 
                        sample_weight=half_life_weight)
        
        res_tmp.append({'date':ret_t_ex.index[-1][0],
                        'code':ret_t_ex.index[-1][1], 
                        'beta':float(res.coef_), 
                        'alpha':float(res.intercept_)})
        return 0
    
    ret_excess.dropna().groupby(level=1,group_keys=False).apply(
            lambda x:x.rolling(window).apply(reg))

    res_final = pd.DataFrame(res_tmp)
    res_final.set_index(['date', 'code'], inplace=True)
    res_final = res_final.sort_index()
    return res_final

def generat_tmpxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx():
    pd.DataFrame(res_final['beta']).reset_index().to_feather('./beta_camp.feather')
    pd.DataFrame(res_final['alpha']).reset_index().to_feather('./alpha_camp.feather')

def momentum(ret,ret_fs):
    ret_excess = ret.groupby(pd.Grouper(level='date', freq='1M')).apply(
            lambda x:np.log(1+x)-np.log(1+ret_fs.get(x.index[0][0].strftime('%Y-%m'),default=ret_fs[-1])))

    def calc_(data,window=252,half_life_window=126):
        if len(data) < 253:
            return None
        ewma = data.rolling(window).apply(
                        lambda xx:(xx.ewm(adjust=False,halflife=126).mean()[-1]))
        return ewma.rolling(11).mean().shift(11)

    mom = excute_for_multidates(ret_excess.dropna(), lambda x:calc_(x), level='code')
          
    return mom

def size(stock_data):
    mv = np.log(stock_data['totalCapital']*stock_data['close'])
    return mv
    


In [34]:

mv = market_value_t#.loc[(slice(None),['000001','000008','600600','000729'])]
codes = mv.index.get_level_values(1).unique().tolist()
date_ = mv.index.get_level_values(0)
date_start = str(int(date_.min().strftime("%Y"))-1)
date_end = date_.max().strftime("%Y")

# # 利润总额  经营活动产生的现金流量净额  
report_df = QA.QA_fetch_financial_report_adv(codes, date_start, date_end,ltype='EN').data[['totalProfit','netCashFlowsFromOperatingActivities']]
  

# 年报转累进转当季
report_df = excute_for_multidates(report_df,
                                  lambda stock:stock.groupby(pd.Grouper(level='report_date', freq='1Y')).apply(
                                  lambda x:x.diff(1).fillna(x)),level='code')

# 四季（年）滚动加总，“最近12个月”
report_df = excute_for_multidates(report_df,lambda x:x.rolling(4).sum(),level='code')

# EARNYILD = 0.68*EPIBS + 0.11*ETOP + 0.21*CETOP
# EPIBS ：分析师预测的 EP （ earnings to price ）。
# ETOP ： ttm-ep ，最近 12 个月的总盈利除以当前总市值。
# CETOP ：最近 12 个月的运营现金流处于当前总市值。
data_ = excute_for_multidates(pd.concat([mv,report_df], axis=1),lambda x:x.fillna(method='ffill'),level='code').loc[mv.index].sort_index()
ETOP = data_['totalProfit']/data_['market_value']
CETOP = data_['netCashFlowsFromOperatingActivities']/data_['market_value']


# # EPIBS 分析师的期望暂时用季度收益斜率+行业季度收益斜率来代表。
m = linear_model.LinearRegression(fit_intercept=True)
def ret_cum_reg(ret,window=63):
        # print()
        # assert False,None
        def reg(window_slice):
            res = m.fit(np.arange(0.01,0.01*window+0.01,0.01).reshape(-1, 1), 
                        window_slice.values.reshape(-1, 1)
                       )
            # print(window_slice.index[-1],float(res.coef_),float(res.intercept_))
            return float(res.coef_)
    
        k = np.log(1+ret).rolling(window).apply(lambda x:reg(x))
        return k
    
ret_expect = excute_for_multidates(ret_t.loc[(slice(None),['000001','000008','600600','000729'])], lambda x:ret_cum_reg(x),level='code').sort_index()
ret_expect = excute_for_multidates(ret_t.loc[(slice(None),['000001','000008','600600','000729'])], lambda x:ret_cum_reg(x),level='code').sort_index()


ret_industry = pd.concat([ret_t,stock_df['industry']], axis=1).loc[(slice(None),['000001','000008','600600','000729']),:].sort_index()
ret_industry_meam = ret_industry.reset_index().set_index(['date','industry']).groupby(level=[0,1]).mean()
ret_industry_expect = excute_for_multidates(ret_industry_meam, lambda x:ret_cum_reg(x),level='industry')

EPIBS = ret_expect + ret_industry_expect.loc[list(zip(ret_industry.index.get_level_values(0),ret_industry['industry']))]['ret'].values

# # # 测试
# with pd.option_context("display.max_rows", None, "display.max_columns", None):
#     x = pd.DataFrame(ret_industry_expect.loc[list(zip(ret_industry.index.get_level_values(0),ret_industry['industry']))].values,index=ret_industry.index)
#     display(pd.concat([ret_industry,ret_expect,x],axis=1))

EARNYILD = 0.68*EPIBS + 0.11*ETOP + 0.21*CETOP

In [35]:
# with pd.option_context("display.max_rows", None, "display.max_columns", None):
#     display(data_.sort_index(level=1))
# pd.concat(
#     [data_['totalProfit']/data_['market_value'],data_['netCashFlowsFromOperatingActivities']/data_['market_value']],axis=1).iloc[1210:1250]


# ret_expect
# xx = pd.concat([ret_industry,ret_expect],axis=1)
# 
# 0.68*EPIBS + 0.11*ETOP + 0.21*CETOP
# with pd.option_context("display.max_rows", None, "display.max_columns", None):
#     display(data_.sort_index(level=1))
EARNYILD

date        code  
2017-01-03  000001         NaN
            000008         NaN
            000729         NaN
            600600         NaN
2017-01-04  000001         NaN
                        ...   
2021-12-30  600600   -0.003551
2021-12-31  000001   -0.030796
            000008    0.050465
            000729    0.012572
            600600   -0.011817
Length: 4812, dtype: float64

In [29]:
data_.index.difference(mv.index)

MultiIndex([('2017-09-30', '000001'),
            ('2017-09-30', '000008'),
            ('2017-09-30', '000729'),
            ('2017-09-30', '600600'),
            ('2017-12-31', '000001'),
            ('2017-12-31', '000008'),
            ('2017-12-31', '000729'),
            ('2017-12-31', '600600'),
            ('2018-03-31', '000001'),
            ('2018-03-31', '000008'),
            ('2018-03-31', '000729'),
            ('2018-03-31', '600600'),
            ('2018-06-30', '000001'),
            ('2018-06-30', '000008'),
            ('2018-06-30', '000729'),
            ('2018-06-30', '600600'),
            ('2018-09-30', '000001'),
            ('2018-09-30', '000008'),
            ('2018-09-30', '000729'),
            ('2018-09-30', '600600'),
            ('2018-12-31', '000001'),
            ('2018-12-31', '000008'),
            ('2018-12-31', '000729'),
            ('2018-12-31', '600600'),
            ('2019-03-31', '000001'),
            ('2019-03-31', '000008'),
            