In [49]:
import os
import math
import pandas as pd
import numpy as np
from scipy.optimize import minimize 

In [50]:
## initial value
def GUESS(N,p,q):
    #载荷因子参数
    beta = np.ones([N-1,1],dtype = int) #因为GDP的载荷因子被设为1，所以beta估计值少1
    theta = beta
    if p>0:
        # common factor的参数
        phi_f = np.zeros([1,p])
        vphi_f = phi_f.reshape(-1,1)
        #vphi_f = np.zeros([p,1])
        theta = np.concatenate((theta,vphi_f),axis=0)
    if q >0:
        # 异质因子的参数
        phi_u = np.zeros([N,q])
        vphi_u = phi_u.reshape(-1,1)
        theta = np.concatenate((theta,vphi_u),axis=0)
    # 共同因子的方差估计
    dsig_11 = 0.5* np.ones([1,1])
    # 异质因子的方差估计
    vsig_22 = 0.5* np.ones([N,1])
    theta = np.row_stack((theta,dsig_11,vsig_22)) #所有需要估计的参数组成一个列向量
    # print(theta.shape, theta.ndim)  # theta属性为 matrix
    return theta

In [51]:
# Dynamic Factor Model
def DFM(theta,N,p,q):
    cbeta = N-1
    cphi_f = p
    cphi_u = N*q 
    cphi = cphi_f + cphi_u 
    ctheta = len(theta)        #row
        
    beta = np.row_stack((1,theta[0:cbeta])) # 切片tehta的第1行到cbeta+1-1行
    vphi_f = [ ]
    if p>0:
        vphi_f = theta[cbeta:cbeta+cphi_f].T
    if q>0:
        element = np.array(theta[cbeta+cphi_f:cbeta+cphi_f+N])
        n = len(element)
        mphi_u = np.zeros([n,n])
        for r in range(n):
            mphi_u[r,r] = element[r]
    if q>1:
        for i in range(2,q+1):
            start_index = cbeta+cphi_f+(i-1)*N
            end_index = cbeta+cphi_f+i*N
            element = np.array(theta[start_index:end_index])
            n=len(element)
            for r in range(n):
                amphi_u_emp = np.zeros([n,n])
                amphi_u_emp[r,r] = element[r]
            mphi_u = np.hstack((mphi_u,amphi_u_emp))
    #print(mphi_u.shape)
    element = np.array(theta[cbeta+cphi])
    dsig_11 = np.fabs(np.diag(element))
    element = np.array(theta[cbeta+cphi+1:ctheta])
    msig_22 = np.fabs(np.diag(element))
    return beta,vphi_f,mphi_u,dsig_11, msig_22

In [52]:
# state-space representation
def SSR(N,N_1,p,q,beta,vphi_f,mphi_u,dsig_11,msig_22):
    N_2 = N -N_1
    cbeta = N
    cbeta_1 = N_1
    csta = 5+5*N
        
    F = np.zeros([csta,csta])
    df = pd.DataFrame(F)
    df.loc[1:4,0:3] = np.eye(4) # row1-4 column0-3 (including the ending points)
    df.loc[5+N:5+5*N-1,5:5+4*N-1] = np.identity(4*N)
    if p>0:
        df.loc[0,0:p-1]=vphi_f
    if q>0:
        F[5:5+N,5:5+(q*N)]=mphi_u 
    #print(F)
        
    G = np.zeros([csta,1+N])  
    G[0][0]=1
    df = pd.DataFrame(G)
    df.loc[5:5+N-1,1:1+N-1]=np.identity(N)
    #print(G)
        
    H_1 = [0]
    if N_1>0:
        beta1=beta[0:cbeta_1]   # 切片包含起止节点不包含终止节点
        H_1 = np.zeros([N_1,csta])
        H_1[:,0:1]=(1/3)*beta1
        H_1[:,1:2]=(2/3)*beta1
        H_1[:,2:3]=beta1
        H_1[:,3:4]=(2/3)*beta1
        H_1[:,4:5]=(1/3)*beta1
        H_1[:,5:5+N_1]=(1/3)*np.identity(N_1)
        H_1[:,5+N:5+N+N_1]=(2/3)*np.identity(N_1)
        H_1[:,5+2*N:5+2*N+N_1]=np.identity(N_1)
        H_1[:,5+3*N:5+3*N+N_1]=(2/3)*np.identity(N_1)
        H_1[:,5+4*N:5+4*N+N_1]=(1/3)*np.identity(N_1)
        
    beta2=beta[cbeta_1:cbeta]  # cebta_1 = N_1
    H_2 = np.zeros([N_2,csta])
    H_2[:,0:1]=beta2
    H_2[:,5+N-N_2:5+N]= np.identity(N_2)
    
    msig_vv = np.identity(1+N)
    msig_vv[0][0]= dsig_11[0][0]
    msig_vv[1:1+N,1:1+N]=msig_22
    return F,G,H_1,H_2,msig_vv

In [53]:
#Kalman Filter 
def KF(Y,F,G,H_1,H_2,msig_vv):
    N = Y.shape[0]
    N_1 = np.sum(np.isnan(Y[:,0]))
    cobs = Y.shape[1]
    csta = 5 + 5*N
    
    lnl = np.zeros([cobs,1])
    index_u = np.zeros([cobs,1])
    s_u = np.zeros([csta,1])
    p_u = np.zeros([csta,csta])
    
    for t in range(cobs+1):
        #prediction 
        s_p = F @ s_u
        p_p = F @ p_u @F.T + G @sig_vv @G.T
        
        #log-likelihood
        y0 = np.array(Y[:,t])
        y = y0.reshape(-1,1)
        y = np.matrix(y)
        H = H_2
        sig_ww = np.zeros([N,N])
        if N_1 >0:
            H = np.vstack((H_1,H_2))
            if np.any(np.isnan(y)):
                y[0:N_1-1+1] = np.zeros([N_1,1])
                rows, cols = H_1.shape
                H_zero = np.zeros([rows,cols])
                H = np.vstack((H_zero,H_2))
                sig_ww[0:N_1-1+1,0:N_1-1+1] = np.identity(N_1)
        e=y-H @ s_p
        sig_ee = H @ p_p @ H.T +sig_ww
        pi_value = math.pi
        det_sig_ee_value = np.linalg.det(sig_ee)
        inv_sig_ee = np.linalg.inv(sig_ee)
        lnl[t]=-(N/2)* math.log(2*pi_value)-(1/2)* math.log(det_sig_ee_value)-(1/2)* (e.T) @ inv_sig_ee @ e
    
        # updating
        gain = p_p @ H.T @ np.linalg.inv(H @ p_p @ H.T+sig_ww) 
        s_u = s_p +gain @ (y-H @ s_p)
        p_u = p_p - gain @ H @ p_p
        index_u[t,:]=s_u[0,:]
    return lnl, index_u

In [54]:
def loglikelihood(Y,theta,p,q):
    try:
        N = Y.shape[0]
        N_1 = np.sum(np.isnan(Y[:,0]))
        
        beta,vphi_f,mphi_u,dsig_11, msig_22 = DFM(theta,N,p,q)
        F,G,H_1,H_2,msig_vv = SSR(N,N_1,p,q,beta,vphi_f,mphi_u,dsig_11,msig_22)
        lnl, index_u = KF(Y,F,G,H_1,H_2,msig_vv)
        return lnl
    except Exceeption as e:
        print(f"An error occured:{e}")
        return False

In [76]:
def estimate(Y,p,q):
    N = Y.shape[0]
    cobs = Y.shape[1]

    beta,vphi_f,mphi_u,dsig_11, msig_22 = DFM(theta,N,p,q)
    F,G,H_1,H_2,msig_vv = SSR(N,N_1,p,q,beta,vphi_f,mphi_u,dsig_11,msig_22)
    lnl, index_u = KF(Y,F,G,H_1,H_2,msig_vv)
    vtheta = GUESS(N,p,q)
    result = minimize(-lnl,vtheta,method='BFGS')
    vtheta = result.x
    optimal_lnl_value = -result.fun
    

    dlnl = cobs * optimal_lnl_value 
    cpar = N+p+N*q+N
    dAIC = (dlnl - cpar)/cobs
    dBIC = (dlnl-cpar*log(cobs)/2)/cobs
    print(p,q,dlnl,dAIC,dBIC)
    
    return vtheta

In [77]:
def index_u(Y,p,q,vtheta):
    N = Y.shape[0]
    N_1 = np.sum(np.isnan(Y[:,0]))
        
    beta,vphi_f,mphi_u,dsig_11, msig_22 = DFM(theta,N,p,q)
    F,G,H_1,H_2,msig_vv = SSR(N,N_1,p,q,beta,vphi_f,mphi_u,dsig_11,msig_22)
    lnl, index_u = KF(Y,F,G,H_1,H_2,msig_vv)

    return index_u

In [78]:
def smoothing(Y,F,G,H_1,H_2,msig_vv):
    # matrix operation function 
    def vech(matrix):
        """
        提取矩阵的下三角部分（包括对角线），并将其转换为列向量。
    
        参数:
        matrix (numpy.ndarray): 输入的矩阵。
    
        返回:
        numpy.ndarray: 下三角部分的列向量表示。
        """
        n = matrix.shape[0]
        k = n * (n + 1) // 2  # 计算下三角部分的元素个数
        vech_vector = np.zeros(k)  # 初始化列向量
        count = 0  # 用于记录当前填充到列向量的位置
    
        # 遍历矩阵的下三角部分
        for i in range(n):
            for j in range(i + 1):
                vech_vector[count] = matrix[i, j]  # 将元素赋值到列向量中
                count += 1  # 更新列向量的填充位置
    
        return vech_vector
    
    def unvech(vector,n):
        mat = np.zeros([n,n])
        k = 0
        for i in range(n):
            for j in range(i+1):
                mat[i,j] = vector[k]
                k+=1
        return mat
  
    lnl = np.zeros([cobs,1])
    index_u = np.zeros([cobs,1])
    s_u = np.zeros([csta,1])
    p_u = np.zeros([csta,csta])
    
    vs_u = np.zeros([csta,1])
    mp_u = np.zeros([csta,csta])
    ms_p = np.zeros([csta,cobs])
    mvechP_p = np.zeros([csta*(csta+1)//2,cobs])
    
    for t in range(cobs):
        #prediction 
        vs_p = F @ vs_u
        mp_p = F @ mp_u @F.T + G @sig_vv @G.T
        ms_p[:,t]= vs_p.flatten()
        
        mvechP_p[:,t]= vech(mp_p)
        
        #updating 
        y = np.matrix(Y[:,t])
        y = y.reshape(-1,1)
        H = H_2
        sig_ww = np.zeros([N,N])
        if N_1 >0:
            H = np.vstack((H_1,H_2))
            if np.any(np.isnan(y)):
                y[0:N_1] = np.zeros([N_1,1])
                rows, cols = H_1.shape
                H_zero = np.zeros([rows,cols])
                H = np.vstack((H_zero,H_2))
                sig_ww[0:N_1,0:N_1] = np.identity(N_1)
    
        gain = mp_p @ H.T @ np.linalg.inv(H @ mp_p @ H.T+sig_ww) 
        vs_u = vs_p +gain @ (y-H @ vs_p)
        mp_u = mp_p - gain @ H @ mp_p
    
    #smoothing
    ms_s = np.zeros([csta,cobs])
    vr = np.zeros([csta,1])
    for t in range(cobs-1,-1,-1):
    
        vs_p = ms_p[:,t]
        n=mp_p.shape[0]
        mp_p = unvech(mvechP_p[:,t],n) ###### operation function definition 
    
        y=np.matrix(Y[:,t])
        y = y.reshape(-1,1)
        H = H_2
        sig_ww = np.zeros([N,N])
        if N_1>0:
            H = np.vstack((H_1,H_2))
            if np.any(np.isnan(y)):
                y[0:N_1] = np.zeros([N_1,1])
                rows, cols = H_1.shape
                H_zero = np.zeros([rows,cols])
                H = np.vstack((H_zero,H_2))
                sig_ww[0:N_1,0:N_1] = np.identity(N_1)
        gain = mp_p @ H.T @ np.linalg.inv(H @ mp_p @ H.T+sig_ww) 
        vr= H.T * np.linalg.inv(H @ mp_p @ H.T+sig_ww) *(y-H@s_p)*(y-H@vs_p)+(np.identity(csta)-H.T@gain)@F.T@vr
        ms_s[:,t]=vs_p + mp_p@vr
    return ms_s

In [79]:
def index_s(Y,p,q,vtheta):
    N = Y.shape[0]
    N_1 = np.sum(np.isnan(Y[:,0]))
        
    beta,vphi_f,mphi_u,dsig_11, msig_22 = DFM(vtheta,N,p,q)
    F,G,H_1,H_2,msig_vv = SSR(N,N_1,p,q,beta,vphi_f,mphi_u,dsig_11,msig_22)
    ms_s = smoothing(Y,F,G,H_1,H_2,msig_vv)
    ms_s0 = ms_s[0,:].T

    return ms_s0

In [80]:
class MFLA:
    def __init__(self):
        pass

    def MFLA_GUESS(self,N,p,q):
        return theta

    def MFLA_DFM(self,theta,N,p,q):
        return beta,vphi_f,mphi_u,dsig_11, msig_22
    
    def MFLA_SSR(self,N,N_1,p,q,beta,vphi_f,mphi_u,dsig_11,msig_22):
        return F,G,H_1,H_2,msig_vv
    
    def MFLA_KF(self,Y,F,G,H_1,H_2,msig_vv):
         return lnl, index_u
    
    def MFLA_loglikelihood(self,Y,theta,p,q):
        return lnl
    
    def MFLA_estimate(self,Y,p,q):
        return vtheta
    
    def MFLA_index_u(self,Y,p,q,vtheta):
        return index_u
    
    def MFLA_smoothing(self,Y,F,G,H_1,H_2,msig_vv):
        return ms_s
    
    def MFLA_index_s(self,Y,p,q,vtheta):
        return ms_s0

In [81]:
data= pd.read_excel('F:\MixedData\MM03\mm-data\BCIQ1M4.xlsx',index_col=False) 
df=pd.DataFrame(data)
data= df.drop(labels={'Unnamed: 0'},axis=1)
df=pd.DataFrame(data)
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 503 entries, 0 to 502
Data columns (total 5 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   Var1    167 non-null    float64
 1   Var2    503 non-null    float64
 2   Var3    503 non-null    float64
 3   Var4    503 non-null    float64
 4   Var5    503 non-null    float64
dtypes: float64(5)
memory usage: 19.8 KB


  data= pd.read_excel('F:\MixedData\MM03\mm-data\BCIQ1M4.xlsx',index_col=False)


In [82]:
df.head

<bound method NDFrame.head of          Var1      Var2      Var3      Var4      Var5
0         NaN  0.120633  0.250161  1.652600  1.121523
1         NaN  0.413237  0.584695  1.145076  0.965076
2         NaN  0.285547  0.430699  1.822943  0.918743
3         NaN  0.360128  0.507179  1.206764  1.067494
4    1.735812  0.118588  0.061138 -0.169616  0.038774
..        ...       ...       ...       ...       ...
498       NaN -0.231215  0.086230 -0.171440  0.036163
499 -0.511831 -0.025795 -0.109271 -0.200173 -0.588950
500       NaN -0.109253  0.213255 -0.668158 -0.398875
501       NaN -0.082846  0.151029 -0.612676 -0.690411
502 -0.372030 -0.117695  0.118659 -0.735628  0.394413

[503 rows x 5 columns]>

In [83]:
Y = df.values 
N = Y.shape[0]
N_1 = np.sum(np.isnan(Y[:,0]))
p=1
q=2

In [84]:
vtheta = MFLA_estimate(Y,p,q)
vindex_u = MFLA_index_u(Y,p,q,vtheta)
vindex_s = MFLA_index_s(Y,p,q,vtheta)

print (vtheta,vindex_u,vindex_s)

NameError: name 'MFLA_estimate' is not defined