In [13]:
# %load gundong_class_tensor.py
'''
    进行动态预测的代码，用张量优化的版本
'''
import pandas as pd
import numpy as np
import matplotlib.pylab as plt
import matplotlib.dates as mdate
from numpy import *
from matplotlib.pylab import rcParams
plt.rcParams['axes.unicode_minus']=False
rcParams['font.sans-serif'] = 'kaiti'

from statsmodels.tsa.stattools import adfuller
import statsmodels.api as sm
import statsmodels.stats.diagnostic
from statsmodels.tsa.api import VAR


class gundong_tensor():
    def __init__(self, data, gundong_time, k_lag):
        self.row = data.shape[0] # 行长度
        self.column = data.shape[1] # 列长度
        self.data = data
        self.gundong_time = gundong_time # 滚动选择的时间
        self.k_lag = k_lag
        self.save_data_coef = np.zeros((self.row-self.gundong_time+1, self.column, self.k_lag*self.column))
        self.save_data_cov = np.zeros((self.row-self.gundong_time+1, self.column, self.column))
        self.save_data_result = np.zeros((self.row-self.gundong_time+1, self.column, self.column))

    def VAR(self):
        '''
        实现滚动计算 k-lag 的 VAR 模型
        并且保存矩阵的系数以及相关系数矩阵
        实现了 k-lag>1 时的向量值回归模型

        '''
        for i in range(self.gundong_time, self.row+1,1):
            datai = self.data.iloc[i-self.gundong_time:i,:]
            model = VAR(datai)
            # 滞后 k_lag 个单位计算
            results = model.fit(self.k_lag)
            coef = results.params
            self.save_data_coef[i-self.gundong_time,:,:]= coef.iloc[1:1+self.k_lag*self.column,:].T
            self.save_data_cov[i-self.gundong_time,:,:] = results.sigma_u


    def calculate_multiply(self):
        # 初始的 A_0,...A_{1-p}

        # 第一个分块矩阵是单位阵
        matrix_identity = np.zeros((self.k_lag*self.column, self.column))
        matrix_identity[0:self.column,:] = np.identity(self.column)
        matrix_identity = np.expand_dims(matrix_identity,0).repeat(self.row-self.gundong_time+1, axis=0)
        matrix_left = np.matmul(matrix_identity, self.save_data_coef)
        matrix_right = np.zeros((self.k_lag*self.column, self.k_lag*self.column))
        for j in range(1,self.k_lag):
            matrix_right[j*self.column:(j+1)*self.column, (j-1)*self.column:j*self.column] = np.identity(self.column)
        matrix_right = np.expand_dims(matrix_right,0).repeat(self.row-self.gundong_time+1, axis=0)
        matrix_multiply = matrix_left+matrix_right
        return matrix_multiply


    def cal_overflow(self, predict_time):
        '''
            适用于不同 k_lag 的向量自回归模型
            
            张量乘法运算 a*b*c 维张量 matmul a*c*d 维张量结果是 a*b*d 维张量

            Args:
                predict_time: 预测天数
        '''
        self.predict_time = predict_time
        # 初始的 A_h 矩阵 
        self.A_h = np.zeros((self.row-self.gundong_time+1, self.k_lag*self.column, self.column))
        self.A_h[:, 0:self.column,:] = np.identity(self.column)
        # 得到
        matrix_multiply = self.calculate_multiply()
        temp = np.matmul(self.A_h[:,0:self.column,:],self.save_data_cov)
        sum_top = temp*temp
        # 得到一个对角阵
        sigma_jj = self.save_data_cov.diagonal(axis1=1, axis2=2)
        sigma_jj = np.apply_along_axis(np.diag, 1, sigma_jj)
        # A_h * cov * A_h'
        temp_bottom = np.matmul(temp, self.A_h[:,0:self.column,:].transpose(0,2,1))
        # 每行元素都是对角线元素
        temp_bottom = temp_bottom.diagonal(axis1=1, axis2=2)[:,np.newaxis].transpose(0,2,1).repeat(self.column,2)
        # * sigma_jj
        sum_bottom = np.matmul(temp_bottom, sigma_jj)
        for h in range(self.predict_time-1):
            self.A_h = np.matmul(matrix_multiply, self.A_h)
            temp = np.matmul(self.A_h[:,0:self.column,:], self.save_data_cov)
            sum_top = sum_top + temp*temp
            temp_bottom = np.matmul(temp, self.A_h[:,0:self.column,:].transpose(0,2,1))
            temp_bottom = temp_bottom.diagonal(axis1=1, axis2=2)[:,np.newaxis].transpose(0,2,1).repeat(self.column,2)
            sum_bottom = sum_bottom + np.matmul(temp_bottom, sigma_jj)

        self.save_data_result = sum_top/sum_bottom

        def standard_overflow(self):
            '''
            计算溢出指数的比重


            '''
            pass


    def save_data(self, path):
        np.save(path+'save_data_coef',self.save_data_coef)
        np.save(path+'save_data_cov',self.save_data_cov)
        np.save(path+'save_data_result',self.save_data_result)



In [60]:
data = pd.read_csv('liehe.csv', index_col=0)

In [61]:
var_lag = var_lag_order(data)

In [62]:
model = VAR(data)



In [63]:
result = model.select_order()
result.summary()

0,1,2,3,4
,AIC,BIC,FPE,HQIC
0.0,-131.8,-131.6*,5.939e-58*,-131.7*
1.0,-131.4,-128.7,8.491e-58,-130.3
2.0,-130.8,-125.5,1.592e-57,-128.7
3.0,-130.2,-122.4,2.916e-57,-127.1
4.0,-129.9,-119.6,4.129e-57,-125.8
5.0,-129.5,-116.6,7.112e-57,-124.3
6.0,-129.4,-114.0,9.249e-57,-123.2
7.0,-129.2,-111.3,1.449e-56,-122.0
8.0,-129.0,-108.6,2.371e-56,-120.8


In [64]:
gundong_data = gundong_tensor(data, gundong_time=244, k_lag=8)

In [65]:
gundong_data.VAR()



In [66]:
data

Unnamed: 0_level_0,yield_rate_000001.SZ,yield_rate_600000.SH,yield_rate_600015.SH,yield_rate_600016.SH,yield_rate_600036.SH,yield_rate_601166.SH,yield_rate_601288.SH,yield_rate_601328.SH,yield_rate_601398.SH,yield_rate_601818.SH,yield_rate_601939.SH,yield_rate_601988.SH,yield_rate_601998.SH
trade_date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
20191231,-0.007242,0.002431,0.002614,0.000000,-0.006609,0.002024,-0.008065,-0.001773,-0.001698,0.004556,0.000000,-0.005391,0.000000
20191230,-0.003608,0.001623,0.002621,0.006380,-0.000792,-0.010020,-0.002681,0.003559,-0.001695,-0.011261,-0.002759,0.005420,0.001623
20191227,0.009715,0.002441,0.005270,0.003200,-0.006299,0.000000,0.005391,0.003571,0.006826,0.002257,0.002766,0.002717,0.003257
20191226,0.010429,0.004085,0.005298,0.003210,0.008203,0.010121,0.010899,0.005386,0.005146,0.013730,0.008368,0.005464,0.006557
20191225,-0.006098,-0.003257,-0.005270,-0.004792,-0.005788,-0.007035,-0.002717,-0.001792,-0.005119,-0.002283,-0.002782,-0.002725,-0.004894
...,...,...,...,...,...,...,...,...,...,...,...,...,...
20190108,-0.008214,-0.002004,-0.005369,-0.001736,-0.011755,0.003955,-0.011204,0.003431,-0.007634,-0.005208,0.000000,-0.008403,0.001845
20190107,-0.001026,0.002008,-0.005340,-0.003460,0.000392,-0.001974,-0.005571,-0.005119,-0.005693,0.000000,-0.003145,-0.002793,-0.003676
20190104,0.050647,0.015291,0.019048,0.019400,0.025322,0.023569,0.014124,0.013841,0.013462,0.024000,0.015974,0.008451,0.018727
20190103,0.009793,0.011340,0.005472,0.003540,0.012617,0.007463,0.005682,0.019400,0.000000,0.027397,0.001600,0.002825,0.001876


In [67]:
gundong_data.cal_overflow(predict_time=1)

In [68]:
gundong_data.save_data_result.shape

(1, 13, 13)

In [69]:
result = gundong_data.save_data_result
sum_result = result.sum(axis=0)

In [70]:
sum_line = np.sum(result, axis=1)
result_divide = np.divide(result, sum_line)
result_divide

array([[[0.14566739, 0.06377802, 0.0637879 , 0.06124612, 0.08214986,
         0.06676311, 0.04897168, 0.05567716, 0.05514476, 0.05799665,
         0.05766518, 0.05477753, 0.05668133],
        [0.07306224, 0.12715704, 0.06171911, 0.07852262, 0.0745936 ,
         0.07697073, 0.06438097, 0.08156225, 0.06832879, 0.07976059,
         0.06641114, 0.06218226, 0.06621246],
        [0.07721193, 0.06521444, 0.12034173, 0.0852364 , 0.06588512,
         0.06860081, 0.07708043, 0.07236293, 0.07326954, 0.0773083 ,
         0.07531322, 0.08228545, 0.08986159],
        [0.08118552, 0.09086002, 0.0933424 , 0.10989106, 0.07126584,
         0.08050554, 0.08658413, 0.08928933, 0.08023278, 0.0865201 ,
         0.08131362, 0.08841138, 0.08725703],
        [0.08266477, 0.06552292, 0.05477153, 0.05409972, 0.14476005,
         0.0769425 , 0.05476393, 0.0600001 , 0.05931886, 0.05809478,
         0.05883019, 0.05243231, 0.05356966],
        [0.07921941, 0.07972577, 0.06724781, 0.07206441, 0.09072933,
         0.

In [71]:
result_divide.shape

(1, 13, 13)

In [72]:
df = pd.DataFrame(result_divide[0])

In [73]:
df.to_csv('bizhong2.csv')

In [74]:
df

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12
0,0.145667,0.063778,0.063788,0.061246,0.08215,0.066763,0.048972,0.055677,0.055145,0.057997,0.057665,0.054778,0.056681
1,0.073062,0.127157,0.061719,0.078523,0.074594,0.076971,0.064381,0.081562,0.068329,0.079761,0.066411,0.062182,0.066212
2,0.077212,0.065214,0.120342,0.085236,0.065885,0.068601,0.07708,0.072363,0.07327,0.077308,0.075313,0.082285,0.089862
3,0.081186,0.09086,0.093342,0.109891,0.071266,0.080506,0.086584,0.089289,0.080233,0.08652,0.081314,0.088411,0.087257
4,0.082665,0.065523,0.054772,0.0541,0.14476,0.076942,0.054764,0.06,0.059319,0.058095,0.05883,0.052432,0.05357
5,0.079219,0.079726,0.067248,0.072064,0.090729,0.122763,0.067345,0.07562,0.070478,0.074767,0.073207,0.066951,0.076811
6,0.058748,0.067419,0.076391,0.078358,0.065287,0.068085,0.121428,0.071736,0.087176,0.069651,0.08556,0.096074,0.07143
7,0.069352,0.088685,0.074465,0.083904,0.074271,0.079382,0.074486,0.116944,0.077778,0.085145,0.077521,0.074624,0.083478
8,0.068442,0.074028,0.075127,0.075122,0.073164,0.073718,0.090192,0.077498,0.117367,0.069081,0.094648,0.088021,0.077241
9,0.059958,0.07198,0.066028,0.067478,0.059686,0.065142,0.060025,0.070668,0.057542,0.140902,0.054344,0.048204,0.069137


In [75]:
result_divide_temp = result_divide[0]
for x in range(13):
    result_divide_temp[x,x] =0

In [76]:
result_divide_temp

array([[0.        , 0.06377802, 0.0637879 , 0.06124612, 0.08214986,
        0.06676311, 0.04897168, 0.05567716, 0.05514476, 0.05799665,
        0.05766518, 0.05477753, 0.05668133],
       [0.07306224, 0.        , 0.06171911, 0.07852262, 0.0745936 ,
        0.07697073, 0.06438097, 0.08156225, 0.06832879, 0.07976059,
        0.06641114, 0.06218226, 0.06621246],
       [0.07721193, 0.06521444, 0.        , 0.0852364 , 0.06588512,
        0.06860081, 0.07708043, 0.07236293, 0.07326954, 0.0773083 ,
        0.07531322, 0.08228545, 0.08986159],
       [0.08118552, 0.09086002, 0.0933424 , 0.        , 0.07126584,
        0.08050554, 0.08658413, 0.08928933, 0.08023278, 0.0865201 ,
        0.08131362, 0.08841138, 0.08725703],
       [0.08266477, 0.06552292, 0.05477153, 0.05409972, 0.        ,
        0.0769425 , 0.05476393, 0.0600001 , 0.05931886, 0.05809478,
        0.05883019, 0.05243231, 0.05356966],
       [0.07921941, 0.07972577, 0.06724781, 0.07206441, 0.09072933,
        0.        , 0.06734

In [81]:
liehe = result_divide_temp.sum(axis = 0)

In [82]:
liehe.shape

(13,)

In [83]:
hanghe = result_divide_temp.sum(axis =1)

In [84]:
hanghe

array([0.72463931, 0.85370676, 0.90963016, 1.0167677 , 0.73101128,
       0.89416471, 0.89591437, 0.94309243, 0.93628095, 0.75019092,
       0.94349329, 0.87235295, 0.89969991])

In [86]:
hanghe

array([0.72463931, 0.85370676, 0.90963016, 1.0167677 , 0.73101128,
       0.89416471, 0.89591437, 0.94309243, 0.93628095, 0.75019092,
       0.94349329, 0.87235295, 0.89969991])

In [87]:
liehe

array([0.85433261, 0.87284296, 0.87965827, 0.89010894, 0.85523995,
       0.87723705, 0.8785723 , 0.88305558, 0.88263288, 0.85909832,
       0.88338579, 0.8761091 , 0.87867096])

In [88]:
print(liehe)

[0.85433261 0.87284296 0.87965827 0.89010894 0.85523995 0.87723705
 0.8785723  0.88305558 0.88263288 0.85909832 0.88338579 0.8761091
 0.87867096]


In [89]:
df_1 = pd.DataFrame(liehe)
df_1.to_csv('liehe2.csv')

In [90]:
df_2 = pd.DataFrame(hanghe)
df_2.to_csv('hanghe2.csv')