In [77]:
import tushare as ts
import pandas as pd
import numpy as np
import statsmodels.api as sm
from statsmodels import regression
import ffn
from sklearn.svm import SVR
import matplotlib.pyplot as plt
from collections import defaultdict
import os

In [57]:
class Model:
    
    #初始化参数
    def __init__(self, data, train_start, train_end, test_start, test_end, n):
        
        self.code = data['code']
        self.train_start = train_start
        self.train_end = train_end
        self.test_start = test_start
        self.test_end = test_end
        self.n = n
        
    #获取单只股票初始训练集和测试集
    def getInitData(self, code):
        
        train_start = self.train_start
        train_end = self.train_end
        
        test_start = self.test_start
        test_end = self.test_end
        
        train_data = ts.get_hist_data(code, start = train_start, end = train_end)
        test_data = ts.get_hist_data(code, start = test_start, end = test_end)

        train_data = train_data.loc[:,['open','high','close','low','price_change','p_change','turnover']]
        test_data = test_data.loc[:,['open','high','close','low','price_change','p_change','turnover']]
        
        return train_data, test_data
    
    #训练和预测股票的N期收益
    def train(self):
        #使用scikit-learn导入SVR进行训练和预测
        x_train = []
        y_label = []
        x_test = []
        
        for code in self.code:
            train_data, test_data = self.getInitData(code)
            
            #计算特征
            '''
            以后特征的计算，写在feature的class里即可
            '''
            feature = Feature(train_data, test_data, self.n)
            
            
            train_data, test_data = feature.cal_mean()
            train_data = feature.cal_nReturn(train_data)

            svr_rbf = SVR(kernel = 'rbf', C = 1e3, gamma = 0.1)
            X = train_data.iloc[:,-9:-2]
            X1 = test_data.iloc[:,-8:-1]
            y = train_data.iloc[:,-1:]
            for i in range(len(X)):
                x_train.append(list(X.iloc[i]))
            for i in range(len(y)):
                y_label.append(list(y.iloc[i]))
            for i in range(len(X1)):    
                x_test.append(list(X1.iloc[i]))

        y_rbf = svr_rbf.fit(x_train,y_label).predict(x_test)
        
        return y_rbf

    #得到预期代码

#     def getCode():
        
#         return 
        

In [58]:
class Feature:
    
    def __init__(self, train_data, test_data, n):
        self.train_data = train_data
        self.test_data = test_data
        self.n = n
        
    #计算开盘到收盘之间的价格均值
    def cal_mean(self):
        train_data = self.train_data
        test_data = self.test_data
        
        first_data_open = train_data.loc[:,['open']]
        first_data_close = train_data.loc[:,['close']]

        second_data_open = test_data.loc[:,['open']]
        second_data_close = test_data.loc[:,['close']]

        first_data_open.rename(columns = {'open':'mean'},inplace=True)
        first_data_close.rename(columns = {'close':'mean'},inplace=True)
        first_data_mean = (first_data_close + first_data_open)/2

        second_data_open.rename(columns = {'open':'mean'},inplace=True)
        second_data_close.rename(columns = {'close':'mean'},inplace=True)
        second_data_mean = (second_data_close + second_data_open)/2

        train_data['mean'] = first_data_mean
        test_data['mean'] = second_data_mean
        
        return train_data,test_data
    
    #计算N期收益
    def cal_nReturn(self, train_data):
        n = self.n
        #训练集的N期平均收益率
        origin_mean = train_data.loc[:,['mean']]
        origin_mean_n = origin_mean.shift(n)
        train_simpleret = (origin_mean - origin_mean_n) / origin_mean_n
        train_data['n_simpleret'] = train_simpleret
        #填充缺失的值
        train_data = train_data.fillna(method='bfill')
        
        return train_data

In [109]:
class Season_Feature:
    
    def __init__(self,year,season):
        self.year = year
        self.season = season
    
    
    
        
    #基本面数据的处理
    #
    #将基本面数据中的中文名称hash
    def name_to_code(name,data):
        d = defaultdict(list)
        real_name = []
        num = 0
        for i in name:
            if (not len(d[i])):
                d[i].append(num)
                num+=1
            real_name.append(d[i][0])
        text = str(num)
        return real_name
    
    #change_name 默认为基本面数据的列名，可以根据需要修改
    def need_change(data,change_name = ['name','industry','area']):
        for i in change_name:
            name = data[i]
            real_name = name_to_code(name,data)
            del data[i]
            data[i] = real_name
        return data
    
    #获取2017年第二季度的基本面数据
    def get_basic(self):
        year = self.year
        season = self.season
        
        base = ts.get_stock_basics()
        #获取2017年第2季度的业绩报表数据
        table1 = ts.get_report_data(year,season)
        #获取 2017年第二季度的盈利能力数据
        table2 = ts.get_profit_data(year,season)
        #获取2017年第2季度的营运能力数据
        table3 = ts.get_operation_data(year,season)
        #获取2017年第2季度的成长能力数据
        table4 = ts.get_growth_data(year,season)
        #获取2017年第2季度的偿债能力数据
        table5 = ts.get_debtpaying_data(year,season)
        #获取2017年第2季度的现金流量数据
        table6 = ts.get_cashflow_data(year,season)
        
        return base,table1,table2,table3,table4,table5,table6
    
    #将基本面的后6个表按 code，name 和并
    def merge_basic(self):
        year = self.year
        season = self.season
        
        base,table1,table2,table3,table4,table5,table6 = self.get_basic()
        #合并后6个表的数据
        result = pd.merge(table1,table2,on=['code','name'])
        result = pd.merge(result,table3,on=['code','name'])
        result = pd.merge(result,table4,on=['code','name'])
        result = pd.merge(result,table5,on=['code','name'])
        result = pd.merge(result,table6,on=['code','name'])
        #result = result.set_index('code')
        #合并前两个表的数据
        
        return result


In [60]:
#主函数
model = Model(ts.get_sz50s(), '2017-03-01', '2017-06-30', '2017-07-01', '2017-08-31', 15)

print (model.train())

[-0.01226435  0.00128179  0.00536256 ...,  0.05089279  0.04711263
  0.04966329]


In [110]:
season2 = Season_Feature(2017,2)
season2 = season2.merge_basic()
season2 = season2.drop('name',1)
season2.to_csv('2017_season2.csv',sep='\t',na_rep='NaN',encoding='UTF-8')


[Getting data:]########################################################[Getting data:]########################################################[Getting data:]########################################################[Getting data:]########################################################[Getting data:]########################################################[Getting data:]########################################################

In [107]:
season1 = Season_Feature(2017,1)
season1 = season1.merge_basic()
season1 = season1.drop('name',1)
#season1 = season1.set_index('code')
season1.to_csv('2017_season1.csv',sep='\t',na_rep='NaN',encoding='UTF-8')

[Getting data:]#######################################################[Getting data:]#######################################################[Getting data:]#######################################################[Getting data:]#######################################################[Getting data:]#######################################################[Getting data:]#######################################################

In [116]:
#获取两个季度股票代码
def get_season_codes(season1,season2):
    season1_codes = season1['code']
    season2_codes = season2['code']
    return season1_codes,season2_codes

In [117]:
season1_codes,season2_codes = get_season_codes(season1,season2)

In [145]:
test = season1.code

In [138]:
#有些股票的第一季度历史数据获取不到
data = ts.get_hist_data('600829',start='2017-01-01',end='2017-03-31')
data = data.loc[:,['open','close']]
data.head()

Unnamed: 0_level_0,open,close
date,Unnamed: 1_level_1,Unnamed: 2_level_1
2017-03-31,14.41,14.42
2017-03-30,14.78,14.4
2017-03-29,14.87,14.79
2017-03-28,14.88,14.9
2017-03-27,15.02,14.9


In [144]:
#获取时间序列
season_date = data.index.tolist()
print(len(season_date))

59


In [None]:
#获取季度内股票的历史数据
def get_season_hist_data(season_codes,season_start,season_end):
    y_label = []
    for code in season_codes:
        data = ts.get_hist_data(code,season_start,season_end)
        #获取需要计算收益的两列数据
        
        data = data.iloc(['open','close'])
        ''''
        #需要考虑在当前季度中没有该股票的历史数据的情况
        #没有数据无法进行计算，直接抛弃。
        '''
        
        # 获取季度有交易日区间
        days = len(data.index.tolist())
        
        #找出收益率 >=15% 和 收益率 <= -5%的股票并给出对应的y_label
        for start in range(1，days):
            
            for end in range(start+1,days):
                #计算收益率
                '''
                #需要考虑在哪一天入手和哪一天出手 达到了设置的要求
                
                '''
                
            
        
        