In [1]:
def dm_test(actual_lst, pred1_lst, pred2_lst, h = 1, crit="MSE", power = 2):
    # Routine for checking errors
    def error_check():
        rt = 0
        msg = ""
        # Check if h is an integer
        if (not isinstance(h, int)):
            rt = -1
            msg = "The type of the number of steps ahead (h) is not an integer."
            return (rt,msg)
        # Check the range of h
        if (h < 1):
            rt = -1
            msg = "The number of steps ahead (h) is not large enough."
            return (rt,msg)
        len_act = len(actual_lst)
        len_p1  = len(pred1_lst)
        len_p2  = len(pred2_lst)
        # Check if lengths of actual values and predicted values are equal
        if (len_act != len_p1 or len_p1 != len_p2 or len_act != len_p2):
            rt = -1
            msg = "Lengths of actual_lst, pred1_lst and pred2_lst do not match."
            return (rt,msg)
        # Check range of h
        if (h >= len_act):
            rt = -1
            msg = "The number of steps ahead is too large."
            return (rt,msg)
        # Check if criterion supported
        if (crit != "MSE" and crit != "MAPE" and crit != "MAD" and crit != "poly"):
            rt = -1
            msg = "The criterion is not supported."
            return (rt,msg)  
        # Check if every value of the input lists are numerical values
        from re import compile as re_compile
        comp = re_compile("^\d+?\.\d+?$")  
        def compiled_regex(s):
            """ Returns True is string is a number. """
            if comp.match(s) is None:
                return s.isdigit()
            return True
        for actual, pred1, pred2 in zip(actual_lst, pred1_lst, pred2_lst):
            is_actual_ok = compiled_regex(str(abs(actual)))
            is_pred1_ok = compiled_regex(str(abs(pred1)))
            is_pred2_ok = compiled_regex(str(abs(pred2)))
            if (not (is_actual_ok and is_pred1_ok and is_pred2_ok)):  
                msg = "An element in the actual_lst, pred1_lst or pred2_lst is not numeric."
                rt = -1
                return (rt,msg)
        return (rt,msg)
    
    # Error check
    error_code = error_check()
    # Raise error if cannot pass error check
    if (error_code[0] == -1):
        raise SyntaxError(error_code[1])
        return
    # Import libraries
    from scipy.stats import t
    import collections
    import pandas as pd
    import numpy as np
    
    # Initialise lists
    e1_lst = []
    e2_lst = []
    d_lst  = []
    
    # convert every value of the lists into real values
    actual_lst = pd.Series(actual_lst).apply(lambda x: float(x)).tolist()
    pred1_lst = pd.Series(pred1_lst).apply(lambda x: float(x)).tolist()
    pred2_lst = pd.Series(pred2_lst).apply(lambda x: float(x)).tolist()
    
    # Length of lists (as real numbers)
    T = float(len(actual_lst))
    
    # construct d according to crit
    if (crit == "MSE"):
        for actual,p1,p2 in zip(actual_lst,pred1_lst,pred2_lst):
            e1_lst.append((actual - p1)**2)
            e2_lst.append((actual - p2)**2)
        for e1, e2 in zip(e1_lst, e2_lst):
            d_lst.append(e1 - e2)
    elif (crit == "MAD"):
        for actual,p1,p2 in zip(actual_lst,pred1_lst,pred2_lst):
            e1_lst.append(abs(actual - p1))
            e2_lst.append(abs(actual - p2))
        for e1, e2 in zip(e1_lst, e2_lst):
            d_lst.append(e1 - e2)
    elif (crit == "MAPE"):
        for actual,p1,p2 in zip(actual_lst,pred1_lst,pred2_lst):
            e1_lst.append(abs((actual - p1)/actual))
            e2_lst.append(abs((actual - p2)/actual))
        for e1, e2 in zip(e1_lst, e2_lst):
            d_lst.append(e1 - e2)
    elif (crit == "poly"):
        for actual,p1,p2 in zip(actual_lst,pred1_lst,pred2_lst):
            e1_lst.append(((actual - p1))**(power))
            e2_lst.append(((actual - p2))**(power))
        for e1, e2 in zip(e1_lst, e2_lst):
            d_lst.append(e1 - e2)    
    
    # Mean of d        
    mean_d = pd.Series(d_lst).mean()
    
    # Find autocovariance and construct DM test statistics
    def autocovariance(Xi, N, k, Xs):
        autoCov = 0
        T = float(N)
        for i in np.arange(0, N-k):
              autoCov += ((Xi[i+k])-Xs)*(Xi[i]-Xs)
        return (1/(T))*autoCov
    gamma = []
    for lag in range(0,h):
        gamma.append(autocovariance(d_lst,len(d_lst),lag,mean_d)) # 0, 1, 2
    V_d = (gamma[0] + 2*sum(gamma[1:]))/T
    DM_stat=V_d**(-0.5)*mean_d
    harvey_adj=((T+1-2*h+h*(h-1)/T)/T)**(0.5)
    DM_stat = harvey_adj*DM_stat
    # Find p-value
    p_value = 2*t.cdf(-abs(DM_stat), df = T - 1)
    
    # Construct named tuple for return
    dm_return = collections.namedtuple('dm_return', 'DM p_value')
    
    rt = dm_return(DM = DM_stat, p_value = p_value)
    
    return rt

In [32]:
import pandas as pd
import numpy as np
import array

In [10]:
actual=pd.read_csv('C:/Users/sas/Desktop/big data analysis/actual.csv',encoding='UTF-8')
ma = pd.read_csv('C:/Users/sas/Desktop/big data analysis/method/movingaverage.csv',encoding='UTF-8')
prophet = pd.read_csv('C:/Users/sas/Desktop/big data analysis/method/prophet.csv',encoding='UTF-8')
rbf = pd.read_csv('C:/Users/sas/Desktop/big data analysis/method/supportvectorregression_RBF_Kernel.csv',encoding='UTF-8')

lstm = pd.read_csv('C:/Users/sas/Desktop/big data analysis/method/lstm.csv',encoding='cp949')

In [79]:
stocks = ['삼성전자', 'SK하이닉스', 'NAVER', '씨젠', '우리들휴브레인', '현대차', 'DGB금융지주', '미스터블루', '셀트리온', '데일리블록체인', '소리바다', '한화솔루션', '아모레퍼시픽', 'CJ대한통운', 'GS건설']

**MA vs LSTM**

In [83]:
test_statistics1 = []
pvalue1 = []
result1 = []

for i in range(0,15):
    actual_lst = actual.iloc[:,(i+1)]
    ma_lst = ma.iloc[:,i]
    lstm_lst = lstm.iloc[:,i]

    rt = dm_test(actual_lst,ma_lst,lstm_lst,h = 1, crit="MAPE")
    if rt[1] < 0.05:
        result = "귀무가설 기각"
    else:
        result = "귀무가설 채택"
        
    test_statistics1.append(rt[0])
    pvalue1.append(rt[1])
    result1.append(result)

my_dict = {"통계량값" : test_statistics1 , "P-value" : pvalue1 , "검정결과" : result1  }
pd.DataFrame(my_dict, index = stocks)

Unnamed: 0,통계량값,P-value,검정결과
삼성전자,-3.11114,0.002583372,귀무가설 기각
SK하이닉스,19.516014,3.7120420000000003e-32,귀무가설 기각
NAVER,41.56433,6.283349e-56,귀무가설 기각
씨젠,18.469187,1.3845129999999999e-30,귀무가설 기각
우리들휴브레인,12.656597,8.626462e-21,귀무가설 기각
현대차,15.02023,5.371991e-25,귀무가설 기각
DGB금융지주,7.536632,6.498088e-11,귀무가설 기각
미스터블루,9.235813,3.023595e-14,귀무가설 기각
셀트리온,7.615419,4.564167e-11,귀무가설 기각
데일리블록체인,9.652084,4.619869e-15,귀무가설 기각


**Prophet VS LSTM**

In [81]:
test_statistics2 = []
pvalue2 = []
result2 = []

for i in range(0,15):
    actual_lst = actual.iloc[:,(i+1)]
    prophet_lst = ma.iloc[:,i]
    lstm_lst = lstm.iloc[:,i]

    rt = dm_test(actual_lst,ma_lst,lstm_lst,h = 1, crit="MAPE")
    if rt[1] < 0.05:
        result = "귀무가설 기각"
    else:
        result = "귀무가설 채택"
        
    test_statistics2.append(rt[0])
    pvalue2.append(rt[1])
    result2.append(result)
    
my_dict = {"통계량값" : test_statistics2 , "P-value" : pvalue2 , "검정결과" : result2  }
pd.DataFrame(my_dict, index = stocks)

Unnamed: 0,통계량값,P-value,검정결과
삼성전자,22.518898,2.2652119999999997e-36,귀무가설 기각
SK하이닉스,163.604757,8.274696e-103,귀무가설 기각
NAVER,275.841604,6.336119000000001e-121,귀무가설 기각
씨젠,20.515778,1.31668e-33,귀무가설 기각
우리들휴브레인,70.602379,7.832037e-74,귀무가설 기각
현대차,507.345821,4.395192e-142,귀무가설 기각
DGB금융지주,242.360634,1.960269e-116,귀무가설 기각
미스터블루,89.884148,4.0635570000000003e-82,귀무가설 기각
셀트리온,351.784078,2.287987e-129,귀무가설 기각
데일리블록체인,37.261641,2.572581e-52,귀무가설 기각


**rbf vs LSTM**

In [82]:
test_statistics3 = []
pvalue3 = []
result3 = []

for i in range(0,15):
    actual_lst = actual.iloc[:,(i+1)]
    rbf_lst = ma.iloc[:,i]
    lstm_lst = lstm.iloc[:,i]

    rt = dm_test(actual_lst,ma_lst,lstm_lst,h = 1, crit="MAPE")
    if rt[1] < 0.05:
        result = "귀무가설 기각"
    else:
        result = "귀무가설 채택"
        
    test_statistics3.append(rt[0])
    pvalue3.append(rt[1])
    result3.append(result)
    
my_dict = {"통계량값" : test_statistics3 , "P-value" : pvalue3 , "검정결과" : result3  }
pd.DataFrame(my_dict, index = stocks)

Unnamed: 0,통계량값,P-value,검정결과
삼성전자,22.518898,2.2652119999999997e-36,귀무가설 기각
SK하이닉스,163.604757,8.274696e-103,귀무가설 기각
NAVER,275.841604,6.336119000000001e-121,귀무가설 기각
씨젠,20.515778,1.31668e-33,귀무가설 기각
우리들휴브레인,70.602379,7.832037e-74,귀무가설 기각
현대차,507.345821,4.395192e-142,귀무가설 기각
DGB금융지주,242.360634,1.960269e-116,귀무가설 기각
미스터블루,89.884148,4.0635570000000003e-82,귀무가설 기각
셀트리온,351.784078,2.287987e-129,귀무가설 기각
데일리블록체인,37.261641,2.572581e-52,귀무가설 기각
