In [1]:
import numpy as np
import pandas as pd
import tushare as ts
from sklearn import datasets,linear_model
import matplotlib.pyplot as plt

ts.set_token('d3fdbde82434cd6d7897550852136449f9fcba912e3eacb47b004600')
pro = ts.pro_api()


class gtja_191:

    def __init__(self, start_date, end_date, index):

        self.price = ts.pro_bar(ts_code=index, adj='qfq', start_date=start_date, end_date=end_date)
        
        if self.price is None:
            return
        
        ## 设置索引
        self.price.set_index(['trade_date'],inplace=True)

        ## 升序排序
        self.price = self.price.sort_index()

        ###分别取开盘价，收盘价，最高价，最低价，最低价，均价，成交量#######
        self.open_price = self.price['open']
        self.close = self.price['close']
        self.low = self.price['low']
        self.high = self.price['high']
        self.pct_chg = self.price['pct_chg']
        self.volume = self.price['vol']

        self.ts_code = index


    def get_price(self):
        return self.price

    def alpha_002(self):
        ##### -1 * delta((((close-low)-(high-close))/((high-low)),1))####
        #### ((收盘价-最低价) - (最高价-收盘价))/(最高价-最低价)前一天差值

        result = ((self.close - self.low) - (self.high - self.close)) / ((self.high - self.low)).diff()
        m = result.iloc[1:].dropna()
        alpha = m[(m < np.inf) & (m > -np.inf)]  # 去除值为inf
        return alpha.dropna()

    ##################################################################
    def alpha_031(self):
        # result = (self.close - pd.rolling_mean(self.close, 12)) * 100 / pd.rolling_mean(self.close, 12)
        alpha = (self.close - self.close.rolling(12).mean()) * 100 / self.close.rolling(12).mean()

        # alpha = result.iloc[-1, :]
        return alpha.dropna()


def train(ts_code):
    f = gtja_191(start_date='20190125', end_date='20190525', index=ts_code)

    ap_002 =  f.alpha_002()
    ap_002_df = pd.DataFrame(ap_002.values, columns=['alpha_002'], index=ap_002.index)

    ap_031 =  f.alpha_031()
    ap_031_df = pd.DataFrame(ap_031.values, columns=['alpha_031'], index=ap_031.index)


    ret = pd.merge(ap_002_df, ap_031_df, on=['trade_date'])
    dt = (pd.merge(f.get_price()['pct_chg'], ret, on=['trade_date'])).values
    
    X = dt[:,1:]
    Y = dt[:,0].reshape(-1,1)
    
    # # 建立模型
    regr = linear_model.LinearRegression()

    # # # 训练数据
    regr.fit(X,Y)

    # # # 拿到相关系数
    print(ts_code, ' coefficients(b1,b2...):',regr.coef_)
    print(ts_code, ' intercept(b0):',regr.intercept_)
    
    print(ts_code, ' score:', regr.score(X,Y))
    
    return regr.coef_, regr.intercept_, regr.score(X,Y)
    
if __name__ == '__main__':
    df = pro.stock_company(exchange='SZSE', fields='ts_code')
    
    ret = []
    for stock_code in df.values[:100]:
        try:
            coef_,intercept_,score = train(stock_code[0])
            ret.append([stock_code[0], coef_, intercept_, score])
        except Exception as e:
            print(e)
    

    result_df = pd.DataFrame(ret, columns=['code', 'coef', 'intercept', 'score'])
    
    print(result_df[['code','score']])
    
    print(result_df.sort_values(by = 'score', ascending=False))


002927.SZ  coefficients(b1,b2...): [[0.00164497 0.27828503]]
002927.SZ  intercept(b0): [-0.02590751]
002927.SZ  score: 0.21441719302876305
300696.SZ  coefficients(b1,b2...): [[0.0518693  0.25183847]]
300696.SZ  intercept(b0): [-0.23814159]
300696.SZ  score: 0.24618602938063183
300742.SZ  coefficients(b1,b2...): [[-4.86655060e-14  2.16655726e-01]]
300742.SZ  intercept(b0): [-0.11004337]
300742.SZ  score: 0.25137129086954046
300597.SZ  coefficients(b1,b2...): [[0.17035501 0.29036763]]
300597.SZ  intercept(b0): [0.03103207]
300597.SZ  score: 0.2960649735620139
300700.SZ  coefficients(b1,b2...): [[9.22712783e-15 2.53243501e-01]]
300700.SZ  intercept(b0): [-0.01824888]
300700.SZ  score: 0.24497856237957583
300685.SZ  coefficients(b1,b2...): [[0.03208326 0.34374096]]
300685.SZ  intercept(b0): [-0.16666583]
300685.SZ  score: 0.2637583305966147
300703.SZ  coefficients(b1,b2...): [[0.08971746 0.30366884]]
300703.SZ  intercept(b0): [-0.21512047]
300703.SZ  score: 0.2678094887754092
002887.SZ  co

002377.SZ  coefficients(b1,b2...): [[-2.36143343e-14  2.29259156e-01]]
002377.SZ  intercept(b0): [-0.15673504]
002377.SZ  score: 0.2942460221242911
300114.SZ  coefficients(b1,b2...): [[-5.13046451e-15  2.74724617e-01]]
300114.SZ  intercept(b0): [-0.14447773]
300114.SZ  score: 0.25056392162470464
002900.SZ  coefficients(b1,b2...): [[0.00415259 0.30038857]]
002900.SZ  intercept(b0): [-0.06373994]
002900.SZ  score: 0.26542220830516905
002948.SZ  coefficients(b1,b2...): [[0.0209057  0.26816967]]
002948.SZ  intercept(b0): [-0.00582894]
002948.SZ  score: 0.2770590253465165
002338.SZ  coefficients(b1,b2...): [[0.07736375 0.26029799]]
002338.SZ  intercept(b0): [0.05733018]
002338.SZ  score: 0.3186880778680109
300026.SZ  coefficients(b1,b2...): [[3.15737692e-16 3.02771302e-01]]
300026.SZ  intercept(b0): [-0.100933]
300026.SZ  score: 0.2571222886353669
002116.SZ  coefficients(b1,b2...): [[0.07540449 0.30326468]]
002116.SZ  intercept(b0): [-0.37025409]
002116.SZ  score: 0.28614240529239166
002331