## 实现方式
- 不做模型预测，直接做数据分析
- 基于原始特征属性产生隐特征来进行分析

In [1]:
import os
import warnings
import numpy as np
import pandas as pd
import matplotlib.pylab as plt
from sklearn import svm
from sklearn import tree
from sklearn import metrics
from sklearn import preprocessing
import talib as ta
from hmmlearn import hmm

In [2]:
np.random.seed(28)

In [3]:
warnings.filterwarnings('ignore')

In [4]:
%matplotlib inline

In [5]:
plt.rcParams['font.sans-serif'] = 'SimHei'
plt.rcParams['axes.unicode_minus'] = False

In [6]:
stockcode = '000001.SZ'
stockname = '平安银行'
file_path = './data/000001.SZ_day_qfq.csv'
stockfile = pd.read_csv(file_path, index_col='trade_date', parse_dates=True)

#### 加入隐特征

- 构建涨跌幅

In [7]:
stock_data = stockfile.drop(['pre_close', 'change', 'pct_change'], axis=1)
stock_data['amplitude_price'] = stockfile['high'] - stockfile['low']
print("样本数目:%d, 每个样本的特征数目:%d" % stock_data.shape)

样本数目:2551, 每个样本的特征数目:7


In [8]:
stock_data.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 2551 entries, 2008-01-02 to 2018-10-31
Data columns (total 7 columns):
open               2551 non-null float64
high               2551 non-null float64
low                2551 non-null float64
close              2551 non-null float64
vol                2551 non-null float64
amount             2551 non-null float64
amplitude_price    2551 non-null float64
dtypes: float64(7)
memory usage: 159.4 KB


In [9]:
# 对于空值做一个均值填充
imputer = preprocessing.Imputer()
stock_data = imputer.fit_transform(stock_data)

In [10]:
split_idx = int(4.0 * stock_data.shape[0] / 5)
stock_train_data, stock_test_data = stock_data[:split_idx,:], stock_data[split_idx:, :]
print("训练数据格式:{}, 测试数据格式:{}".format(stock_train_data.shape, stock_test_data.shape))

# 获取收盘价
buy_price = stockfile['close']
train_data_buy_price = buy_price[:split_idx]
test_data_buy_price = buy_price[split_idx:]

训练数据格式:(2040, 7), 测试数据格式:(511, 7)


In [11]:
# 模型构建
## 给定隐特征的数目
n = 30
model = hmm.GaussianHMM(n_components=n, random_state=28)
model.fit(stock_train_data)

GaussianHMM(algorithm='viterbi', covariance_type='diag', covars_prior=0.01,
      covars_weight=1, init_params='stmc', means_prior=0, means_weight=0,
      min_covar=0.001, n_components=30, n_iter=10, params='stmc',
      random_state=28, startprob_prior=1.0, tol=0.01, transmat_prior=1.0,
      verbose=False)

In [12]:
all_states = model.predict(stock_data)
train_states = model.predict(stock_train_data)
test_states = model.predict(stock_test_data)

In [13]:
def calc_rate_of_return_by_state(buy_price, buy_infos, initial_capital=100000.0):
    """
    buy_infos: 购买信息，True表示购买，False表示卖出
    :param buy_price: 价格信息
    :param buy_infos:
    :param initial_capital: 初始资本
    :return:
    """
    # 资本
    capital = initial_capital
    # 持有的股本
    hold_equity = 0.0

    flag = False
    for idx, buy_flag in enumerate(buy_infos):
        if buy_flag:
            # 当需要进行购买操作的时候，两种情况：如果之前就是购买，那就继续持有；否则全部买入
            if not flag:
                # 买入
                hold_equity = 1.0 * capital / buy_price[idx]
                flag = True
                capital = 0.0
        else:
            # 当需要进行卖出操作的时候，两种情况：如果之前就是卖出, 那么继续不买入；否则卖出
            if flag:
                # 卖出
                flag = False
                capital = hold_equity * buy_price[idx]
                hold_equity = 0.0

    # 计算最终的金额以及收益率
    total_capital = capital + hold_equity * buy_price[-1]
    return total_capital, (total_capital - initial_capital) / initial_capital


In [14]:
# 2. 对所有隐状态进行分析（计算各个状态的收益以及收益率）
x = []
y = []
# 分别存储购买状态、卖出状态、持有状态
buy_states = set()
sell_states = set()
hold_states = set()

unique_states = np.unique(all_states)
print("总的状态类别:{}".format(unique_states))
for state in unique_states:
    total_capital, rate_of_return = calc_rate_of_return_by_state(buy_price, all_states == state)
    print("状态{}--最终本金+收益:{}, 收益率:{}".format(state, total_capital, rate_of_return))
    x.append(state)
    y.append(rate_of_return)
    if rate_of_return > 0:
        buy_states.add(state)
    elif rate_of_return < 0:
        sell_states.add(state)
    else:
        hold_states.add(state)

buy_states = list(buy_states)
sell_states = list(sell_states)
hold_states = list(hold_states)
print("推荐购买信息为：{}".format(buy_states))
print("推荐卖出信息为：{}".format(sell_states))
print("推荐波动持有信号为：{}".format(hold_states))

总的状态类别:[ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
 24 25 26 27 28 29]
状态0--最终本金+收益:131808.68995541465, 收益率:0.3180868995541465
状态1--最终本金+收益:107652.54683524289, 收益率:0.07652546835242888
状态2--最终本金+收益:106167.81032404304, 收益率:0.061678103240430354
状态3--最终本金+收益:106375.01112657557, 收益率:0.0637501112657557
状态4--最终本金+收益:77756.91543864389, 收益率:-0.2224308456135611
状态5--最终本金+收益:101344.96961857956, 收益率:0.013449696185795619
状态6--最终本金+收益:86623.89213871295, 收益率:-0.13376107861287048
状态7--最终本金+收益:109841.63249915195, 收益率:0.0984163249915195
状态8--最终本金+收益:96472.46071053168, 收益率:-0.03527539289468317
状态9--最终本金+收益:87914.28878678873, 收益率:-0.12085711213211267
状态10--最终本金+收益:64687.98280503746, 收益率:-0.35312017194962536
状态11--最终本金+收益:100590.47075035502, 收益率:0.005904707503550162
状态12--最终本金+收益:133624.17186551238, 收益率:0.33624171865512387
状态13--最终本金+收益:110191.30483197159, 收益率:0.10191304831971588
状态14--最终本金+收益:98542.02401372213, 收益率:-0.014579759862778737
状态15--最终本金+收益:94394.11257524384, 收益率:-0.

In [15]:
# 计算最终收益率
def calc_rate_of_return_by_state3(buy_price, states, buy_states, sell_states, initial_capital=100000.0):
    # 资本
    capital = initial_capital
    # 持有的股本
    hold_equity = 0.0

    # flag为True表示持有股票，False表示未持有股票
    flag = False
    for idx, state in enumerate(states):
        if state in buy_states:
            # 当需要进行购买操作的时候，两种情况：如果之前就是购买，那就继续持有；否则全部买入
            if not flag:
                # 买入
                hold_equity = 1.0 * capital / buy_price[idx]
                flag = True
                capital = 0.0
        elif state in sell_states:
            # 当需要进行卖出操作的时候，两种情况：如果之前就是卖出, 那么继续不买入；否则卖出
            if flag:
                # 卖出
                flag = False
                capital = hold_equity * buy_price[idx]
                hold_equity = 0.0

    # 计算最终的金额以及收益率
    total_capital = capital + hold_equity * buy_price[-1]
    return total_capital, (total_capital - initial_capital) / initial_capital

In [16]:
print("所有数据上")
print("时间长度:{}".format(buy_price.shape))
print("最终收益:{}-{}".format(*calc_rate_of_return_by_state3(buy_price, all_states, buy_states, sell_states)))

所有数据上
时间长度:(2551,)
最终收益:995488.0445697262-8.954880445697261


In [17]:
print("训练数据上")
print("时间长度:{}".format(train_data_buy_price.shape))
print("最终收益:{}-{}".format(*calc_rate_of_return_by_state3(train_data_buy_price, train_states, buy_states, sell_states)))

训练数据上
时间长度:(2040,)
最终收益:794554.5765293054-6.945545765293054


In [18]:
print("测试数据上")
print("时间长度:{}".format(test_data_buy_price.shape))
print("最终收益:{}-{}".format(*calc_rate_of_return_by_state3(test_data_buy_price, test_states, buy_states, sell_states)))

测试数据上
时间长度:(511,)
最终收益:125288.8189151359-0.25288818915135897
