In [3]:
# %%writefile alpha_base10.py

import sys
import os 
module_path = os.path.abspath(os.path.join('..')) 
if module_path not in sys.path: 
    sys.path.append(module_path)
    
import numpy as np
import pandas as pd
from scipy import stats
from sklearn import preprocessing
from sklearn import linear_model
import tools.Pretreat_Tools as pretreat


def STD(data, windows):
    return data.rolling(window=windows, min_periods=windows).std()
def MEAN(data, windows):
    return data.rolling(window=windows, min_periods=windows).mean()
def DELTA(data, windows):
    return data.diff(4)
def SEQUENCE(n):
    return pd.Series(np.arange(1,n+1))

def SMA(data,windows,alpha):
    return data.ewm(adjust=False, alpha=float(alpha)/windows, min_periods=windows, ignore_na=False).mean()

def REGBETA(xs, y, n):
    assert len(y)>=n,  'len(y)!>=n !!!'+ str(y.index[0])
    regress = linear_model.LinearRegression(fit_intercept=False)
    def reg(X,Y):
        try:
            if len(Y)>len(X):
                Y_ =  Y[X.index]
                if Y_.isnull().any():
                    return np.nan
                res = regress.fit(X.values.reshape(-1, 1), Y_.values.reshape(-1, 1)).coef_[0]
            else:
                # if Y.isnull().any():
                #     return np.nan
                res = regress.fit(X.values.reshape(-1, 1), Y.values.reshape(-1, 1)).coef_[0]
        except Exception as e:
            print(e)
            return np.nan
        return res
    return xs.rolling(window=n, min_periods=n).apply(lambda x:reg(x,y))


def COVIANCE(A,B,d):
    se = pd.Series(np.arange(len(A.index)),index=A.index)
    se = se.rolling(5).apply(lambda x: A.iloc[x].cov(B.iloc[x]))
    return se

def CORR(A,B,d):
    se = pd.Series(np.arange(len(A.index)),index=A.index)
    se = se.rolling(5).apply(lambda x: A.iloc[x].corr(B.iloc[x]))
    return se

def excute_for_multidates(data, func, level=0, **pramas):
    return data.groupby(level=level, group_keys=False).apply(func,**pramas)



In [4]:
from tools.Cacher import (CACHE_TYPE, load_cache,load_caches_adv)
import tools.Sample_Tools as smpl
import tools.Pretreat_Tools as pretreat
import QUANTAXIS as QA


%load_ext autoreload
%autoreload 2
%aimport tools.Cacher

stock_df = load_cache('all_train_qfq',cache_type=CACHE_TYPE.STOCK).sort_index()
# stock_df = pd.concat(list(map(lambda file:load_cache(file,cache_type=CACHE_TYPE.STOCK),['all_train_qfq','all_tail_qfq','all_older_qfq']))).sort_index()
ret_fs = pd.read_csv(module_path+'/data/static/china10yearbond.csv').set_index('date').sort_index()
ret_fs = (ret_fs['high']+ret_fs['low'])/2 * 0.01
ret_fs_daily = ret_fs/252
ret_fs_daily

date
2002-07    0.000114
2002-08    0.000113
2002-09    0.000122
2002-10    0.000122
2002-11    0.000132
             ...   
2022-09    0.000108
2022-10    0.000109
2022-11    0.000111
2022-12    0.000116
2023-01    0.000116
Length: 247, dtype: float64

In [133]:
# codes = smpl.get_codes_by_market(sse='all',only_main=True,filter_st=True)
# 

ret_t = smpl.get_current_return(stock_df,'close')

# 超额回报
ret_t_excess = ret_t.groupby(pd.Grouper(level='date', freq='1M')).apply(
        lambda x:x-ret_fs_daily.get(x.index[0][0].strftime('%Y-%m'),default=ret_fs_daily[-1]))

# 计算市场（平均）收益
def cal_ret_market(stock_data,ret_excess_data):
    ##  不取对数有时候有精度问题,权重不是精确1
    market_value = np.log(stock_data['totalCapital']*stock_data['close'])
    weight = market_value / market_value.sum()
    ret_market_f = (ret_excess_data * weight).sum()
    return ret_market_f

# 市场收益，全市场收益加权平均
ret_market_t = ret_t_excess.groupby(level=0, group_keys=False).apply(
        lambda ret: cal_ret_market(stock_df.loc[ret.index[0][0]],ret))


def camp_beta_alpha(ret_excess,ret_market):
    # 5年daily单核执行约35分钟
    
    window=252
    half_life_window = 63
    half_life_ = list(map(lambda n:0.5**(n/half_life_window),range(1,window+1)))[::-1]
    half_life_weight = half_life/np.sum(half_life)

    model = linear_model.LinearRegression(fit_intercept=True)
    res_tmp = []
    def reg(ret_t_ex):
        # print()
        # assert False,None
        res = model.fit(ret_t_ex.values.reshape(-1, 1), 
                        ret_market[ret_t_ex.index.get_level_values(0)].values.reshape(-1, 1),
                        sample_weight=half_life_weight)
        
        res_tmp.append({'date':ret_t_ex.index[-1][0],
                        'code':ret_t_ex.index[-1][1], 
                        'beta':float(res.coef_), 
                        'alpha':float(res.intercept_)})
        return 0
    
    ret_excess.dropna().groupby(level=1,group_keys=False).apply(
            lambda x:x.rolling(window).apply(reg))

    res_final = pd.DataFrame(res_tmp)
    res_final.set_index(['date', 'code'], inplace=True)
    res_final = res_final.sort_index()
    return res_final

def generat_tmpxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx():
    pd.DataFrame(res_final['beta']).reset_index().to_feather('./beta_camp.feather')
    pd.DataFrame(res_final['alpha']).reset_index().to_feather('./alpha_camp.feather')

def momentum(ret,ret_fs):
    ret_excess = ret.groupby(pd.Grouper(level='date', freq='1M')).apply(
            lambda x:np.log(1+x)-np.log(1+ret_fs.get(x.index[0][0].strftime('%Y-%m'),default=ret_fs[-1])))

    def calc_(data,window=252,half_life_window=126):
        if len(data) < 253:
            return None
        ewma = data.rolling(window).apply(
                        lambda xx:(xx.ewm(adjust=False,halflife=126).mean()[-1]))
        return ewma.rolling(11).mean().shift(11)

    mom = ret_excess.dropna().groupby(level=1,group_keys=False).apply(
                    lambda x:calc_(x))

    return mom

def size(stock_data):
    mv = np.log(stock_data['totalCapital']*stock_data['close'])
    return mv
    


In [94]:

# t = stock_df.loc[(slice(None),['000001','000008','601868']),:]
# codes = t.index.get_level_values(1).unique().tolist()
# date_ = t.index.get_level_values(0)
# date_start = str(int(date_.min().strftime("%Y"))-1)
# date_end = date_.max().strftime("%Y")

# # 利润总额  经营活动产生的现金流量净额  
report_df = QA.QA_fetch_financial_report_adv(codes, date_start, date_end,ltype='EN').data[['totalProfit','netCashFlowsFromOperatingActivities','totalCapital']]
  
report_df[['totalProfit','netCashFlowsFromOperatingActivities']] = report_df[['totalProfit','netCashFlowsFromOperatingActivities']].groupby(level='code',group_keys=False).apply(
        lambda stock:stock.groupby(pd.Grouper(level='report_date', freq='1Y')).apply(
            lambda x:x.diff(1).fillna(x)))


Unnamed: 0_level_0,Unnamed: 1_level_0,totalProfit,netCashFlowsFromOperatingActivities,totalCapital
report_date,code,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2016-03-31,1,8029000000.0,59268000000.0,14308680000.0
2016-06-30,1,8125000000.0,-92466000000.0,17170410000.0
2016-09-30,1,8406001000.0,-119744000000.0,17170410000.0
2016-12-31,1,5374999000.0,163931000000.0,17170410000.0
2017-03-31,1,8230000000.0,-115008000000.0,17170410000.0
2017-06-30,1,8202000000.0,-13172010000.0,17170410000.0
2017-09-30,1,8507000000.0,-29806990000.0,17170410000.0
2017-12-31,1,5218001000.0,39206990000.0,17170410000.0
2018-03-31,1,8567000000.0,41442000000.0,17170410000.0
2018-06-30,1,8799999000.0,-33987000000.0,17170410000.0


In [95]:
report_df[['totalProfit','netCashFlowsFromOperatingActivities']].groupby(level='code',group_keys=False).apply(lambda x:x.rolling(4).sum())

Unnamed: 0_level_0,Unnamed: 1_level_0,totalProfit,netCashFlowsFromOperatingActivities
report_date,code,Unnamed: 2_level_1,Unnamed: 3_level_1
2016-03-31,1,,
2016-06-30,1,,
2016-09-30,1,,
2016-12-31,1,29935000000.0,10989000000.0
2017-03-31,1,30136000000.0,-163287000000.0
2017-06-30,1,30213000000.0,-83993000000.0
2017-09-30,1,30314000000.0,5944006000.0
2017-12-31,1,30157000000.0,-118780000000.0
2018-03-31,1,30494000000.0,37669990000.0
2018-06-30,1,31092000000.0,16855000000.0


0.0039938178695502445