In [24]:
# %load gundong_class.py
import pandas as pd
import numpy as np
import matplotlib.pylab as plt
import matplotlib.dates as mdate
from numpy import *
from matplotlib.pylab import rcParams
plt.rcParams['axes.unicode_minus']=False
rcParams['font.sans-serif'] = 'kaiti'

from statsmodels.tsa.stattools import adfuller
import statsmodels.api as sm
import statsmodels.stats.diagnostic
from statsmodels.tsa.api import VAR


class gundong():
    def __init__(self, data, gundong_time, k_lag):
        self.row = data.shape[0] # 行长度
        self.column = data.shape[1] # 列长度
        self.data = data
        self.gundong_time = gundong_time # 滚动选择的时间
        self.k_lag = k_lag
        self.save_data_coef = np.zeros((self.column, self.k_lag*self.column, self.row-self.gundong_time+1))
        self.save_data_cov = np.zeros((self.column, self.column, self.row-self.gundong_time+1))
        self.save_data_result = np.zeros((self.column, self.column, self.row-self.gundong_time+1))

        
    
    def VAR(self):
        '''
        实现滚动计算 k-lag 的 VAR 模型
        并且保存矩阵的系数以及相关系数矩阵
        实现了 k-lag>1 时的向量值回归模型
        '''
        for i in range(self.gundong_time, self.row+1,1):
            datai = self.data.iloc[i-self.gundong_time:i,:]
            model = VAR(datai)
            # 滞后 k_lag 个单位计算
            results = model.fit(self.k_lag)
            coef = results.params
            self.save_data_coef[:,:,i-self.gundong_time]= coef.iloc[1:1+self.k_lag*self.column,:].T
            self.save_data_cov[:,:,i-self.gundong_time] = results.sigma_u


    def calculate_A(self, h, coef):
        '''
        从小到大矩阵迭代
        '''
        A_h = mat(np.zeros((self.k_lag*self.column, self.column)))
        A_h[0:self.column, :] = mat(np.identity(self.column))
        matrix_identity = mat(np.zeros((self.k_lag*self.column, self.column)))
        matrix_identity[0:self.column, :] = mat(np.identity(self.column))
        matrix_left = matrix_identity*coef
        matrix_right = mat(np.zeros((self.k_lag*self.column, self.k_lag*self.column)))
        for j in range(1,self.k_lag):
            matrix_right[j*self.column:(j+1)*self.column, (j-1)*self.column:j*self.column] = mat(np.identity(self.column))
        matrix_multiple = matrix_left+matrix_right
        for i in range(h):
            A_h = matrix_multiple*A_h
        return A_h[0:self.column, :]


    def cal_overflow(self, predict_time):
        '''
        适用于不同 k_lag 的向量自回归模型

        predict_time 代表动态预测的天数

        ??? 循环可做矩阵运算优化
        '''
        self.predict_time = predict_time
        for n in range(self.row-self.gundong_time+1):
            coef_data = self.save_data_coef[:,:,n]
            Covariance_mat = self.save_data_cov[:,:,n]
            for i in range(self.column):
                ei = mat(eye(self.column, self.column, dtype=int))[:,i]
                for j in range(self.column):
                    sum_top = 0
                    sum_bottom = 0
                    sigma_jj = Covariance_mat[j,j]
                    ej = mat(eye(self.column, self.column, dtype=int))[:,j]
                    for h in range(self.predict_time):
                        A_h = self.calculate_A(h, coef=coef_data)
                        sum_bottom += ei.T*A_h*Covariance_mat*A_h.T*ei
                        W = ei.T * A_h * Covariance_mat*ej
                        sum_top += 1/sigma_jj * W * W
                    result = sum_top/sum_bottom
                    self.save_data_result[i,j,n] = result
        



    def standard_overflow(self):
        '''
        计算溢出指数的比重

        ??? 循环可做矩阵运算优化
        '''
        for i in range(self.row-self.gundong_time+1):
            sum_line = np.sum(self.save_data_result[:,:,i], axis=1)
            self.save_data_result[:,:,i] = np.divide(self.save_data_result[:,:,i], sum_line)

                
    def save_data(self, path):
        np.save(path+'save_data_coef',self.save_data_coef)
        np.save(path+'save_data_cov',self.save_data_cov)
        np.save(path+'save_data_result',self.save_data_result)
    

    def overflow_matrix(self):
        '''
        对角为 0 的滚动矩阵
        '''
        self.gundongdata = np.zeros(self.row-self.gundong_time+1)
        for i in range(self.row-self.gundong_time+1):
            xishu_i = self.ave_data_result[:,:,i]
            for x in range(self.column):#对角变为0，求非对角元素
                xishu_i[x,x] = 0
            liehe = xishu_i.sum(axis = 0)#列和
            ave = liehe.mean()
            self.gundongdata[i] = ave
        np.save("save_data_result1",self.gundongdata)
    
    def final_data(self):
        '''
        '''
        self.gundongdata1 = pd.DataFrame(columns = ['values'])
        self.gundongdata1['values'] = self.gundongdata[0:self.row-self.gundong_time+1-self.predict_time]
        self.gundongdata1.index = pd.to_datetime(self.data.index[self.gundong_time+self.predict_time-1:])

    def plot_industry(self):
        '''
        '''
        pass


## 标准处理过程

In [3]:
file_path='./now/zong.csv'
data = pd.read_csv(file_path, index_col = 0)

In [25]:
gundong_data = gundong(data, gundong_time=360, k_lag=3)

### 滚动计算VAR模型

In [26]:
gundong_data.VAR()











































In [None]:
# 处理报错信息
# No frequency information was provided, so inferred frequency B will be used.
# 查找文档

In [27]:
gundong_data.cal_overflow(predict_time=10)

In [30]:
gundong_data.save_data(path='./result_data/')

In [10]:
coef = gundong_data.save_data_coef[:,:,1]
column = 11
k_lag = 3


A_h = mat(np.zeros((k_lag*column, column)))
A_h[0:column,:] = mat(np.identity(column))
matrix_identity = mat(np.zeros((k_lag*column, column)))
matrix_identity[0:column, :] = mat(np.identity(column))
matrix_left = matrix_identity*coef
matrix_right = mat(np.zeros((k_lag*column, k_lag*column)))
for j in range(1,k_lag):
    matrix_right[j*column:(j+1)*column, (j-1)*column:j*column] = mat(np.identity(column))
matrix_multiple = matrix_left+matrix_right
for i in range(h):
    A_h = matrix_multiple*A_h

ValueError: shapes (33,11) and (33,11) not aligned: 11 (dim 1) != 33 (dim 0)

In [11]:
A_h = mat(np.zeros((k_lag*column, column)))
A_h[0:column,:] = mat(np.identity(column))
A_h.shape

(33, 11)

In [21]:
def calculate_A(h, coef, column, k_lag):
    '''
    从小到大矩阵迭代
    '''
    A_h = mat(np.zeros((k_lag*column, column)))
    A_h[0:column,0:column] = mat(np.identity(column))
    matrix_identity = mat(np.zeros((k_lag*column, column)))
    matrix_identity[0:column,0:column] = mat(np.identity(column))
    matrix_left = matrix_identity*coef
    matrix_right = mat(np.zeros((k_lag*column, k_lag*column)))
    for j in range(1,k_lag):
        matrix_right[j*column:(j+1)*column, (j-1)*column:j*column] = mat(np.identity(column))
    matrix_multiple = matrix_left+matrix_right
    for i in range(h):
        A_h = matrix_multiple*A_h
    return A_h[0:column,0:column]

calculate_A(10, data_1, 11, 3)

ValueError: shape too large to be a matrix.

In [20]:
column = 4
k_lag = 3
matrix_right = mat(np.zeros((k_lag*column, k_lag*column)))
for j in range(1,k_lag):
    matrix_right[j*column:(j+1)*column, (j-1)*column:j*column] = mat(np.identity(column))
print(matrix_right)

[[0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0.]]


In [None]:
for i in range

In [7]:
data_1[:,:,0]

array([[ 2.43942863e-01,  1.03080796e-01,  1.67031703e-02,
         5.94420937e-02,  1.24038916e-01, -9.14926696e-02,
         1.62302062e-01,  3.62636116e-01,  1.07294894e-02,
         1.09550023e-02, -2.18468114e-03],
       [ 7.43991885e-02,  5.43425353e-01,  4.47058268e-03,
         2.00040928e-02,  9.57530372e-02,  3.37531467e-01,
         5.85417643e-02,  4.75838580e-01, -3.91606320e-02,
        -2.11674689e-02, -9.92692912e-03],
       [-1.64131083e-02,  9.76760020e-04,  4.24858920e-01,
        -3.41482310e-02,  1.92444547e-02,  7.93198630e-02,
         1.35198899e-01, -2.51824264e-02, -1.26374867e-02,
        -8.71378282e-03, -5.40945545e-03],
       [ 3.16933869e-02,  8.33926611e-02,  9.68249515e-02,
         1.60207152e-01,  1.08333977e-01,  4.02689289e-01,
         1.92722689e-01,  1.96521530e-01,  3.22120201e-02,
        -1.45463644e-02,  9.48990328e-03],
       [ 1.05096144e-01,  6.29262881e-02,  3.17984748e-02,
         2.30942898e-02,  2.63542826e-01,  3.73679789e-01,
  

### 计算动态预测天数

In [84]:
for i in range(1):
    print(i)

0


In [None]:
help(VAR)

In [43]:
import pandas as pd
mdata = sm.datasets.macrodata.load_pandas().data
dates = mdata[['year', 'quarter']].astype(int).astype(str)
quarterly = dates["year"] + "Q" + dates["quarter"]
from statsmodels.tsa.base.datetools import dates_from_str
quarterly = dates_from_str(quarterly)
mdata = mdata[['realgdp','realcons','realinv']]
mdata.index = pd.DatetimeIndex(quarterly)
data = np.log(mdata).diff().dropna()
model = VAR(data)



In [44]:
result = model.fit(2)

In [52]:
print(result.coefs)
print(result.coefs[0])
print(result.coefs[1])

[[[-0.27943474  0.67501575  0.03321945]
  [-0.10046798  0.26863955  0.02573873]
  [-1.97097367  4.41416233  0.22547895]]

 [[ 0.00822108  0.29045763 -0.00732091]
  [-0.12317393  0.23249944  0.02350376]
  [ 0.38078585  0.80028092 -0.12407906]]]
[[-0.27943474  0.67501575  0.03321945]
 [-0.10046798  0.26863955  0.02573873]
 [-1.97097367  4.41416233  0.22547895]]
[[ 0.00822108  0.29045763 -0.00732091]
 [-0.12317393  0.23249944  0.02350376]
 [ 0.38078585  0.80028092 -0.12407906]]


In [59]:
np.vstack((result.coefs[0], result.coefs[1]))

array([[-0.27943474,  0.67501575,  0.03321945],
       [-0.10046798,  0.26863955,  0.02573873],
       [-1.97097367,  4.41416233,  0.22547895],
       [ 0.00822108,  0.29045763, -0.00732091],
       [-0.12317393,  0.23249944,  0.02350376],
       [ 0.38078585,  0.80028092, -0.12407906]])

In [58]:
np.reshape(result.coefs,(-1,3))

array([[-0.27943474,  0.67501575,  0.03321945],
       [-0.10046798,  0.26863955,  0.02573873],
       [-1.97097367,  4.41416233,  0.22547895],
       [ 0.00822108,  0.29045763, -0.00732091],
       [-0.12317393,  0.23249944,  0.02350376],
       [ 0.38078585,  0.80028092, -0.12407906]])

In [47]:
result.params

Unnamed: 0,realgdp,realcons,realinv
const,0.001527,0.00546,-0.023903
L1.realgdp,-0.279435,-0.100468,-1.970974
L1.realcons,0.675016,0.26864,4.414162
L1.realinv,0.033219,0.025739,0.225479
L2.realgdp,0.008221,-0.123174,0.380786
L2.realcons,0.290458,0.232499,0.800281
L2.realinv,-0.007321,0.023504,-0.124079


In [48]:
result.acorr()

array([[[1.        , 0.6593946 , 0.81503441],
        [0.6593946 , 1.        , 0.27693827],
        [0.81503441, 0.27693827, 1.        ]],

       [[0.32126369, 0.46421875, 0.21091837],
        [0.28242651, 0.29600049, 0.24063122],
        [0.30193966, 0.50553871, 0.16805264]],

       [[0.24469142, 0.34502873, 0.13211172],
        [0.21382485, 0.28185737, 0.16217926],
        [0.18543602, 0.2705785 , 0.08642157]]])