In [46]:
import numpy as np
from hmmlearn import hmm
import matplotlib.pyplot as plt
import matplotlib as mpl
from sklearn.metrics.pairwise import pairwise_distances_argmin
import warnings

In [47]:
def expand(a, b):
    d = (b - a) * 0.05
    return a-d, b+d

In [48]:
warnings.filterwarnings("ignore")   # hmmlearn(0.2.0) < sklearn(0.18)

In [49]:
# 加载数据
# 0日期  1开盘  2最高  3最低  4收盘  5成交量  6成交额
# delimiter: 指定分隔符
# skiprows：给定前面多少行数据不进行处理
# usecols: 给定第4 5 6 2 3这五列数据进行读入(下标从0开始)
x = np.loadtxt('SH600000.txt', delimiter='\t', skiprows=2, usecols=(4, 5, 6, 2, 3))
x = x[:-1, :] # 最后一天的数据不考虑
close_price = x[:, 0] # 收盘价
volumn = x[:, 1] # 成交量
amount = x[:, 2] # 成交额
amplitude_price = x[:, 3] - x[:, 4] # 每天的最高价与最低价的差
diff_price = np.diff(close_price)   # 涨跌值(每天相对于昨天的涨跌幅)
volumn = volumn[1:]                 # 成交量(今天的成交量)
amount = amount[1:]                 # 成交额(今天的成交额度)
amplitude_price = amplitude_price[1:]   # 每日振幅(今天的振幅)

# 相当于整个数据相当于一个序列，序列中的每个样本具有四个特征
sample = np.column_stack((volumn, amount, amplitude_price, diff_price))    # 观测值
print("样本数目:%d, 每个样本的特征数目:%d" % sample.shape)
sample

样本数目:1509, 每个样本的特征数目:4


array([[  1.15147943e+08,   2.43689088e+09,   4.70000000e-01,
          9.00000000e-02],
       [  9.67825750e+07,   2.03417408e+09,   2.40000000e-01,
         -2.40000000e-01],
       [  8.52360720e+07,   1.76180096e+09,   4.40000000e-01,
         -2.80000000e-01],
       ..., 
       [  1.75381840e+07,   3.13324800e+08,   2.90000000e-01,
          2.10000000e-01],
       [  3.56315260e+07,   6.50177344e+08,   4.80000000e-01,
          3.40000000e-01],
       [  1.83124240e+07,   3.33790688e+08,   2.20000000e-01,
         -1.00000000e-01]])

In [50]:
# 模型构建
## 给定隐特征的数目
n = 30
model = hmm.GaussianHMM(n_components=n, random_state=28)
model.fit(sample)
y = model.predict_proba(sample) # 获取预测的概率
print(y)
print(model.predict(sample)) # 获取预测状态值

[[  4.03112907e-026   6.31885681e-267   6.93815594e-104 ...,
    2.23727514e-096   4.98183050e-193   0.00000000e+000]
 [  2.95966363e-008   2.74822748e-030   2.72959623e-020 ...,
    3.41640596e-008   2.59298817e-052   1.06228993e-115]
 [  3.30635002e-013   2.37037585e-023   8.20456770e-023 ...,
    3.09923216e-006   2.96914079e-050   1.13773561e-083]
 ..., 
 [  4.45606520e-045   4.31505411e-005   1.71354189e-060 ...,
    3.53700049e-009   1.49435744e-062   1.54323075e-002]
 [  1.06641252e-037   1.67192244e-006   1.10255153e-047 ...,
    5.91800259e-006   4.15070540e-051   3.26812735e-018]
 [  7.08496983e-036   7.88661754e-005   1.39178271e-036 ...,
    2.32456592e-010   7.48533142e-046   6.78180901e-001]]
[16  4 14 ..., 10 11 29]


In [51]:
# 预测部分数据
test_sample = sample[:2].reshape((-1, sample.shape[1]))
print("部分数据的样本值:\n{}".format(test_sample))
print(model.decode(test_sample))
print(model.predict(test_sample))

部分数据的样本值:
[[  1.15147943e+08   2.43689088e+09   4.70000000e-01   9.00000000e-02]
 [  9.67825750e+07   2.03417408e+09   2.40000000e-01  -2.40000000e-01]]
(-82.41191676770723, array([16,  4]))
[16  4]


In [52]:
# 画图
row_num = int(np.ceil((n+3)/3.0))
t = np.arange(len(diff_price))
mpl.rcParams['font.sans-serif'] = [u'SimHei']
mpl.rcParams['axes.unicode_minus'] = False
plt.figure(figsize=(row_num * 3, 9), facecolor='w')
plt.subplot(row_num, 3, 1)
plt.plot(t, diff_price, 'r-')
plt.grid(True)
plt.title(u'涨跌幅')
plt.subplot(row_num, 3, 2)
plt.plot(t, volumn, 'g-')
plt.grid(True)
plt.title(u'交易量')

# 产生一个颜色
clrs = plt.cm.terrain(np.linspace(0, 0.8, n))
plt.subplot(row_num, 3, 3)
for i, clr in enumerate(clrs):
    # 画到一张图中
    plt.plot(t, y[:, i], '-', color=clr, alpha=0.7)
plt.title(u'所有组分')
plt.grid(True)

# 分开画
for i, clr in enumerate(clrs):
    plt.subplot(row_num, 3, i+4)
    plt.plot(t, y[:, i], '-', color=clr)
    plt.title(u'组分%d' % (i+1))
    plt.grid(True)
plt.suptitle(u'SH600000股票：GaussianHMM分解隐变量', fontsize=18)
plt.tight_layout()
plt.subplots_adjust(top=0.9)
# plt.show()
plt.savefig('hmm.png')

In [53]:
# 模型保存
# 保存方式一：保存模型
from sklearn.externals import joblib
joblib.dump(model, './hmm_{}.m'.format(n))

['./hmm_30.m']

In [54]:
# 模型保存
# 保存方式二：保存隐状态(预测值)
import pickle
states = model.predict(sample)
pickle.dump(states, open('./hmm_states_{}.pkl'.format(n), 'wb'))