In [1]:
import collections
import random
import pandas as pd
import math
import torch
from torch import nn
import numpy as np
import matplotlib.pyplot as plt
import os
import datetime
from sklearn.linear_model import LinearRegression
import cvxpy as cp
from joblib import Parallel, delayed

In [2]:
def MAE(pred, true):
    return np.mean(np.abs(pred - true))

def MSE(pred, true):
    return np.mean((pred - true) ** 2)

def RMSE(pred, true):
    return np.sqrt(MSE(pred, true))

def MAPE(pred, true):
    return np.mean(np.abs((pred - true) / true))

def MSPE(pred, true):
    return np.mean(np.square((pred - true) / true))

def DA(pred, true):
    da = (pred[1:] - true[:-1]) * (true[1:] - pred[:-1]) > 0
    return da.mean()

def metric(pred, true):
    mae = MAE(pred, true)
    mse = MSE(pred, true)
    rmse = RMSE(pred, true)
    mape = MAPE(pred, true)
    mspe = MSPE(pred, true)
    da = DA(pred, true)
    
    return mae, mse, rmse, mape, mspe, da

In [3]:
root_path=r'datasets/'
data_path=r'gefcom2017.csv'
df = pd.read_csv(os.path.join(root_path, data_path), encoding='gb18030')
df['trend'] = range(1,len(df)+1)
# df = df[['trend','ts', 'zone', 'demand', 'drybulb', 'dewpnt', ]]
df = df[['ts', 'trend', 'zone', 'demand', 'drybulb', 'dewpnt' ]]
df['ts'] = pd.to_datetime(df['ts'])

df['RH'] = 100 * ( ((17.27 * ((df['dewpnt'] - 32 )/1.8) ) / (
    ((df['dewpnt'] - 32 )/1.8) + 237.3)).apply(math.exp) / (
    (17.27 * ((df['drybulb'] - 32 )/1.8) ) / (
         ((df['drybulb'] - 32 )/1.8) + 237.3)).apply(math.exp) )

df = df.drop(columns=['dewpnt'])

df['drybulb^2'] = df['drybulb'].apply(lambda x: x**2)
df['drybulb^3'] = df['drybulb'].apply(lambda x: x**3)

df['RH^2'] = df['RH'].apply(lambda x: x**2)
df['RH^3'] = df['RH'].apply(lambda x: x**3)

df['month'] = df['ts'].apply(lambda x: x.month - 1)
df['week'] = df['ts'].apply(lambda x: x.weekday())
df['hour'] = df['ts'].apply(lambda x: x.hour)

start = pd.Timestamp(2008, 1, 1)
end = pd.Timestamp(2017, 1, 1)
df = df.loc[df['ts'].apply(lambda x: start<= x < end )]

data_path=r'datasets/ISONE2017-2024.csv'

df2 = pd.read_csv(data_path, encoding='gb18030')
df2['ts'] = df2['ts'].apply(lambda x: pd.Timestamp(x) )

df2['RH'] = 100 * ( ((17.27 * ((df2['dewpnt'] - 32 )/1.8) ) / (
    ((df2['dewpnt'] - 32 )/1.8) + 237.3)).apply(math.exp) / (
    (17.27 * ((df2['drybulb'] - 32 )/1.8) ) / (
         ((df2['drybulb'] - 32 )/1.8) + 237.3)).apply(math.exp) )

df2 = df2.drop(columns=['dewpnt'])

df2['drybulb^2'] = df2['drybulb'].apply(lambda x: x**2)
df2['drybulb^3'] = df2['drybulb'].apply(lambda x: x**3)

df2['RH^2'] = df2['RH'].apply(lambda x: x**2)
df2['RH^3'] = df2['RH'].apply(lambda x: x**3)

df2 = df2[['ts',  'zone', 'demand', 'drybulb', 'RH', 'drybulb^2', 'drybulb^3', 'RH^2', 'RH^3', 'month', 'week', 'hour']]

In [4]:

def get_zone_train(df1, df2, zone, train_start, train_end, valid_start, valid_end, test_start, test_end, log=False, num_hour = 3, num_day = 1, num_hour_RH = 3, num_day_RH = 1, RH = False):
    if zone != 'MASS':
        df_ct1 = df1.groupby('zone').get_group(zone)
        df_ct2 = df2.groupby('zone').get_group(zone)
        df_ct = pd.concat([df_ct1, df_ct2])
    else:
        df_ct = df1.groupby('zone').get_group(zone)

    df_ct = df_ct.drop(columns=['zone'])
    df_ct = df_ct.loc[df_ct['ts'].apply(lambda x: train_start - datetime.timedelta(days=240) <= x < test_end )]
    
    df_ct['trend'] = range(1,len(df_ct)+1)
    df_ct = df_ct[['trend', 'ts' , 'demand', 'drybulb', 'RH', 'drybulb^2', 'drybulb^3', 'RH^2', 'RH^3', 'month', 'week', 'hour']]
    
    if RH != True:
        df_ct = df_ct.drop(columns=['RH', 'RH^2', 'RH^3'])

    df_ct = pd.get_dummies(df_ct, columns = ['month', 'hour', 'week'])
    
    for i in range(num_hour):
        df_ct['drybulb_lag{}'.format(i+1)] = df_ct['drybulb'].shift(axis=0, periods=i+1)
        df_ct['drybulb^2_lag{}'.format(i+1)] = df_ct['drybulb^2'].shift(axis=0, periods=i+1)
        df_ct['drybulb^3_lag{}'.format(i+1)] = df_ct['drybulb^3'].shift(axis=0, periods=i+1)

    for k in range(num_day):
        df_ct['drybulb^a_lag{}'.format(k)] = 0
        for j in range(24):
            df_ct['drybulb^a_lag{}'.format(k)] += df_ct['drybulb'].shift(axis=0, periods= 24 * k + j + 1)

        df_ct['drybulb^a_lag{}'.format(k)] /= 24

        df_ct['drybulb^a^2_lag{}'.format(k)] = df_ct['drybulb^a_lag{}'.format(k)].apply(lambda x: x**2)
        df_ct['drybulb^a^3_lag{}'.format(k)] = df_ct['drybulb^a_lag{}'.format(k)].apply(lambda x: x**3)

    if RH == True:
        for i in range(num_hour_RH):
            df_ct['RH_lag{}'.format(i+1)] = df_ct['RH'].shift(axis=0, periods=i+1)
            df_ct['RH^2_lag{}'.format(i+1)] = df_ct['RH^2'].shift(axis=0, periods=i+1)
            df_ct['RH^3_lag{}'.format(i+1)] = df_ct['RH^3'].shift(axis=0, periods=i+1)

        for k in range(num_day_RH):
            df_ct['RH^a_lag{}'.format(k)] = 0
            for j in range(24):
                df_ct['RH^a_lag{}'.format(k)] += df_ct['RH'].shift(axis=0, periods= 24 * k + j + 1)

            df_ct['RH^a_lag{}'.format(k)] /= 24

            df_ct['RH^a^2_lag{}'.format(k)] = df_ct['RH^a_lag{}'.format(k)].apply(lambda x: x**2)
            df_ct['RH^a^3_lag{}'.format(k)] = df_ct['RH^a_lag{}'.format(k)].apply(lambda x: x**3)
    
    train_data = df_ct.loc[df_ct['ts'].apply(lambda x: train_start <= x < train_end)]
    valid_data = df_ct.loc[df_ct['ts'].apply(lambda x: valid_start <= x < valid_end)]
    test_data = df_ct.loc[df_ct['ts'].apply(lambda x: test_start <= x < test_end)]
    
    if log:
        print("Total Set, (Nums, Variables):", df_ct.shape)
    
    X_train, y_train = train_data.drop(columns=['ts', 'demand']
                                      ).values.copy(), train_data['demand'].values.copy()
    X_valid, y_valid = valid_data.drop(columns=['ts', 'demand']
                                      ).values.copy(), valid_data['demand'].values.copy()
    X_test, y_test = test_data.drop(columns=['ts', 'demand']
                                   ).values.copy(), test_data['demand'].values.copy()
    if log:
        print("zone: {}, \n\t Train start: {}, \n\t Train end: {}".format(zone, train_start, train_end))
        print("zone: {}, \n\t Valid start: {}, \n\t Valid end: {}".format(zone, valid_start, valid_end))
        print("zone: {}, \n\t Test start: {}, \n\t Test end: {}".format(zone, test_start, test_end))
        print("Train set:", X_train.shape)
        print("Valid set:", X_valid.shape)
        print("Test set:", X_test.shape)
    
    return X_train, y_train, X_valid, y_valid, X_test, y_test

In [5]:
class AltPoly():
    def __init__(self, n4=1, n3=1, n2=1 ,n1=1, device = 'cuda'):
        # 定义自变量
        self.n1 = n1
        self.n2 = n2
        self.n3 = n3
        self.n4 = n4
        # 判断设备信息
        if device == ('cuda') or ('cpu'):
            print('you are using:',device)
        else:
            raise Exception('please select the correct device name, "cuda" or "cpu"')
        self.device = device
        
    # 初始化输入变量
    def initial_input(self, X_train, y_train, X_test, y_test):
        self.n = X_train.shape[0] # 有n个样本
        self.m = X_train.shape[1] # 有m个特征
        self.n_test = X_test.shape[0] # 有n个样本
        self.X = torch.tensor(X_train).to(torch.device(self.device))
        self.y = torch.tensor(y_train).to(torch.device(self.device))
        self.X_test = torch.tensor(X_test).to(torch.device(self.device))

        # test set numpy
        self.y_test = y_test
        self.y_train = y_train

    def initial_param(self):
        std = 1
        self.w41 = torch.normal(0, std, (self.m - 1, self.n4), device = torch.device(self.device), dtype = torch.float64)
        self.w42 = torch.normal(0, std, (self.m - 1, self.n4), device = torch.device(self.device), dtype = torch.float64)
        self.w43 = torch.normal(0, std, (self.m - 1, self.n4), device = torch.device(self.device), dtype = torch.float64)
        self.w44 = torch.normal(0, std, (self.m - 1, self.n4), device = torch.device(self.device), dtype = torch.float64)

        self.w31 = torch.normal(0, std, (self.m - 1, self.n3), device = torch.device(self.device), dtype = torch.float64)
        self.w32 = torch.normal(0, std, (self.m - 1, self.n3), device = torch.device(self.device), dtype = torch.float64)
        self.w33 = torch.normal(0, std, (self.m - 1, self.n3), device = torch.device(self.device), dtype = torch.float64)

        self.w21 = torch.normal(0, std, (self.m - 1, self.n2), device = torch.device(self.device), dtype = torch.float64)
        self.w22 = torch.normal(0, std, (self.m - 1, self.n2), device = torch.device(self.device), dtype = torch.float64)

        self.w11 = torch.normal(0, std, (self.m, 1), device = torch.device(self.device), dtype = torch.float64)
        self.w00 = torch.zeros(1, device = torch.device(self.device), dtype=torch.float64)

    def net4(self, w1, w2, w3, w4, X):
        tem1 = ( X[:,1:] @ w1 ).sum(axis = 1).reshape(-1, 1)
        tem2 = ( X[:,1:] @ w2 ).sum(axis = 1).reshape(-1, 1)
        tem3 = ( X[:,1:] @ w3 ).sum(axis = 1).reshape(-1, 1)
        tem4 = ( X[:,1:] @ w4 ).sum(axis = 1).reshape(-1, 1)
        tem = ( tem1 * tem2 * tem3 * tem4 ).sum(axis = 1).reshape(-1)
        return tem, tem1, tem2, tem3, tem4

    def net3(self, w1, w2, w3, X):
        tem1 = ( X[:,1:] @ w1 ).sum(axis = 1).reshape(-1, 1)
        tem2 = ( X[:,1:] @ w2 ).sum(axis = 1).reshape(-1, 1)
        tem3 = ( X[:,1:] @ w3 ).sum(axis = 1).reshape(-1, 1)
        tem = ( tem1 * tem2 * tem3).reshape(-1)
        return tem, tem1, tem2, tem3

    def net2(self, w1, w2, X):
        tem1 = ( X[:,1:] @ w1 ).sum(axis = 1).reshape(-1, 1)
        tem2 = ( X[:,1:] @ w2 ).sum(axis = 1).reshape(-1, 1)
        tem = ( tem1 * tem2).reshape(-1)
        return tem, tem1, tem2

    def net1(self, w11, w00, X):
        X11 = ( X @ w11 ).reshape(-1)
        return X11 + w00

    def net(self, w41, w42, w43, w44, w31, w32, w33, w21, w22, w11, w00, X):
        return self.net4(w41, w42, w43, w44, X)[0] + self.net3(w31, w32, w33, X)[0] + self.net2(w21, w22, X)[0] + self.net1(w11, w00, X)
    
    # 优化过程，输入迭代次数
    def optimize(self, num_subepoch = 60, num_epoch = 60, printstride = 10, steplag = False, valid = False):
        
        losslist = [float('inf') for i in range(10)]
        
        for EPOCH in range(num_epoch):
            
            f4, f41, f42, f43, f44 = self.net4(self.w41, self.w42, self.w43, self.w44, self.X)
            f3, f31, f32, f33 = self.net3(self.w31, self.w32, self.w33, self.X)
            f2, f21, f22 = self.net2(self.w21, self.w22, self.X)
            f1 = self.net1(self.w11, self.w00, self.X)
            f = (f1 + f2 + f3 + f4)

            w11_grad =  ( ( ( 2 * (f - self.y) ).reshape(-1,1) * self.X ).sum(axis = [0]) ).reshape(-1,1) / ( self.n )
            w00_grad = ( ( 2 * (f - self.y) ).sum() ) / ( self.n )
            w21_grad =  ( ( ( 2 * (f - self.y) ).reshape(-1,1) * f22 * self.X[:,1:] ).sum(axis = [0]) ).reshape(-1, 1) / ( self.n )
            w31_grad =  ( ( ( 2 * (f - self.y) ).reshape(-1,1) * f32 * f33 * self.X[:,1:] ).sum(axis = [0]) ).reshape(-1, 1) / ( self.n )
            w41_grad =  ( ( ( 2 * (f - self.y) ).reshape(-1,1) * f42 * f43 * f44 * self.X[:,1:]).sum(axis = [0]) ).reshape(-1, 1) / ( self.n )
            dw11 = [-w11_grad, 0]
            dw00 = [-w00_grad, 0]
            dw21 = [-w21_grad, 0]
            dw31 = [-w31_grad, 0]
            dw41 = [-w41_grad, 0]
            
            gklist = [0 ,0 , (torch.norm(w11_grad))**2 + (torch.norm(w00_grad))**2 + (torch.norm(w21_grad))**2  
                      + (torch.norm(w31_grad))**2 + (torch.norm(w41_grad))**2 ]
            
            for epoch in range(num_subepoch):

                lr = gklist[-1] / ( 2 * ( (self.net1(dw11[0], dw00[0], self.X ) 
                                           + self.net2(dw21[0], self.w22, self.X )[0] 
                                              + self.net3(dw31[0], self.w32, self.w33, self.X )[0] 
                                              + self.net4(dw41[0], self.w42, self.w43, self.w44, self.X )[0] )**2 ).sum() / (self.n) )
                self.w11 += lr * dw11[0] 
                self.w00 += lr * dw00[0] 
                self.w21 += lr * dw21[0]
                self.w31 += lr * dw31[0]
                self.w41 += lr * dw41[0]
                                   
                f4, f41, f42, f43, f44 = self.net4(self.w41, self.w42, self.w43, self.w44, self.X)
                f3, f31, f32, f33 = self.net3(self.w31, self.w32, self.w33, self.X)
                f2, f21, f22 = self.net2(self.w21, self.w22, self.X)
                f1 = self.net1(self.w11, self.w00, self.X)
                f = (f1 + f2 + f3 + f4)

                w11_grad =  ( ( ( 2 * (f - self.y) ).reshape(-1,1) * self.X ).sum(axis = [0]) ).reshape(-1,1) / ( self.n )
                w00_grad = ( ( 2 * (f - self.y) ).sum() ) / ( self.n )
                w21_grad =  ( ( ( 2 * (f - self.y) ).reshape(-1,1) * f22 * self.X[:,1:] ).sum(axis = [0]) ).reshape(-1, 1) / ( self.n )
                w31_grad =  ( ( ( 2 * (f - self.y) ).reshape(-1,1) * f32 * f33 * self.X[:,1:] ).sum(axis = [0]) ).reshape(-1, 1) / ( self.n )
                w41_grad =  ( ( ( 2 * (f - self.y) ).reshape(-1,1) * f42 * f43 * f44 * self.X[:,1:]).sum(axis = [0]) ).reshape(-1, 1) / ( self.n )
                
                gklist.append( (torch.norm(w11_grad))**2 + (torch.norm(w00_grad))**2 + (torch.norm(w21_grad))**2  
                      + (torch.norm(w31_grad))**2 + (torch.norm(w41_grad))**2 )
                
                beta = gklist[-1] / gklist[-2]

                dw11[1] = dw11[0]
                dw11[0] = -w11_grad + beta * dw11[1]
                
                dw00[1] = dw00[0]
                dw00[0] = -w00_grad + beta * dw00[1]
                
                dw21[1] = dw21[0]
                dw21[0] = - w21_grad + beta * dw21[1]
                
                dw31[1] = dw31[0]
                dw31[0] = - w31_grad + beta * dw31[1]
                
                dw41[1] = dw41[0]
                dw41[0] = - w41_grad + beta * dw41[1]
                
            # 第二轮   
            f4, f41, f42, f43, f44 = self.net4(self.w41, self.w42, self.w43, self.w44, self.X)
            f3, f31, f32, f33 = self.net3(self.w31, self.w32, self.w33, self.X)
            f2, f21, f22 = self.net2(self.w21, self.w22, self.X)
            f1 = self.net1(self.w11, self.w00, self.X)
            f = (f1 + f2 + f3 + f4)

            w22_grad =  ( ( ( 2 * (f - self.y) ).reshape(-1,1) * f21 * self.X[:,1:] ).sum(axis = [0]) ).reshape(-1, 1) / ( self.n )
            w32_grad =  ( ( ( 2 * (f - self.y) ).reshape(-1,1) * f31 * f33 * self.X[:,1:] ).sum(axis = [0]) ).reshape(-1, 1) / ( self.n )
            w42_grad =  ( ( ( 2 * (f - self.y) ).reshape(-1,1) * f41 * f43 * f44 * self.X[:,1:]).sum(axis = [0]) ).reshape(-1, 1) / ( self.n )
            dw22 = [-w22_grad, 0]
            dw32 = [-w32_grad, 0]
            dw42 = [-w42_grad, 0]
            
            gklist = [ 0 ,0 , (torch.norm(w22_grad))**2 + (torch.norm(w32_grad))**2 + (torch.norm(w42_grad))**2 ]
            
            for epoch in range(num_subepoch):

                lr = gklist[-1] / ( ( 2 * ( self.net2(dw22[0], self.w21, self.X )[0] 
                                            + self.net3(dw32[0], self.w31, self.w33, self.X )[0] 
                                            + self.net4(dw42[0], self.w41, self.w43, self.w44, self.X )[0] )**2 ).sum() / (self.n) )
                self.w22 += lr * dw22[0]
                self.w32 += lr * dw32[0]
                self.w42 += lr * dw42[0]
                                   
                f4, f41, f42, f43, f44 = self.net4(self.w41, self.w42, self.w43, self.w44, self.X)
                f3, f31, f32, f33 = self.net3(self.w31, self.w32, self.w33, self.X)
                f2, f21, f22 = self.net2(self.w21, self.w22, self.X)
                f1 = self.net1(self.w11, self.w00, self.X)
                f = (f1 + f2 + f3 + f4)

                w22_grad =  ( ( ( 2 * (f - self.y) ).reshape(-1,1) * f21 * self.X[:,1:] ).sum(axis = [0]) ).reshape(-1, 1) / ( self.n )
                w32_grad =  ( ( ( 2 * (f - self.y) ).reshape(-1,1) * f31 * f33 * self.X[:,1:] ).sum(axis = [0]) ).reshape(-1, 1) / ( self.n )
                w42_grad =  ( ( ( 2 * (f - self.y) ).reshape(-1,1) * f41 * f43 * f44 * self.X[:,1:]).sum(axis = [0]) ).reshape(-1, 1) / ( self.n )
                
                gklist.append( (torch.norm(w22_grad))**2 + (torch.norm(w32_grad))**2 + (torch.norm(w42_grad))**2 )
                
                beta = gklist[-1] / gklist[-2]
                
                dw22[1] = dw22[0]
                dw22[0] = - w22_grad + beta * dw22[1]
                
                dw32[1] = dw32[0]
                dw32[0] = - w32_grad + beta * dw32[1]
                
                dw42[1] = dw42[0]
                dw42[0] = - w42_grad + beta * dw42[1]
            
            
            if steplag == True:
                if ( ( EPOCH + 1 ) % printstride == 0 ) and ( (epoch + 1 ) % num_subepoch == 0 ) :

                    predict_y_test = self.net(self.w41, self.w42, self.w43, self.w44, self.w31, self.w32, 
                     self.w33, self.w21, self.w22, self.w11, self.w00, self.X_test).to(torch.device('cpu')).numpy()

                    predict_y_train = self.net(self.w41, self.w42, self.w43, self.w44, self.w31, self.w32, 
                     self.w33, self.w21, self.w22, self.w11, self.w00, self.X).to(torch.device('cpu')).numpy()

                    test_RMSE = RMSE(predict_y_test.reshape(self.y_test.shape), self.y_test)
                    train_MSE = MSE(predict_y_train.reshape(self.y_train.shape), self.y_train)
                    test_MAPE = MAPE(predict_y_test.reshape(self.y_test.shape), self.y_test)

                    self.test_MAPE = test_MAPE
                    self.test_RMSE = test_RMSE
                    self.predict_y_test = predict_y_test
                    self.predict_y_train = predict_y_train

                    print('epoch',epoch+1,'w11, w00','EPOCH',EPOCH + 1,
                          '\n TEST RMSE', test_RMSE,
                          '\n TRAIN MSE',train_MSE,
                          '\n TEST MAPE',100*test_MAPE)
                    print(" ")
                    
            # 第三轮
            f4, f41, f42, f43, f44 = self.net4(self.w41, self.w42, self.w43, self.w44, self.X)
            f3, f31, f32, f33 = self.net3(self.w31, self.w32, self.w33, self.X)
            f2, f21, f22 = self.net2(self.w21, self.w22, self.X)
            f1 = self.net1(self.w11, self.w00, self.X)
            f = (f1 + f2 + f3 + f4)

            w33_grad =  ( ( ( 2 * (f - self.y) ).reshape(-1,1) * f31 * f32 * self.X[:,1:] ).sum(axis = [0]) ).reshape(-1, 1) / ( self.n )
            w43_grad =  ( ( ( 2 * (f - self.y) ).reshape(-1,1) * f41 * f42 * f44 * self.X[:,1:]).sum(axis = [0]) ).reshape(-1, 1) / ( self.n )
            dw33 = [-w33_grad, 0]
            dw43 = [-w43_grad, 0]
            
            gklist = [ 0 ,0 , (torch.norm(w33_grad))**2 + (torch.norm(w43_grad))**2 ]
            
            for epoch in range(num_subepoch):

                lr = gklist[-1] / ( ( 2 * ( self.net3(dw33[0], self.w31, self.w32, self.X )[0] 
                                            + self.net4(dw43[0], self.w41, self.w42, self.w44, self.X )[0] )**2 ).sum() / (self.n) )
                self.w33 += lr * dw33[0]
                self.w43 += lr * dw43[0]
                                   
                f4, f41, f42, f43, f44 = self.net4(self.w41, self.w42, self.w43, self.w44, self.X)
                f3, f31, f32, f33 = self.net3(self.w31, self.w32, self.w33, self.X)
                f2, f21, f22 = self.net2(self.w21, self.w22, self.X)
                f1 = self.net1(self.w11, self.w00, self.X)
                f = (f1 + f2 + f3 + f4)

                w33_grad =  ( ( ( 2 * (f - self.y) ).reshape(-1,1) * f31 * f32 * self.X[:,1:] ).sum(axis = [0]) ).reshape(-1, 1) / ( self.n )
                w43_grad =  ( ( ( 2 * (f - self.y) ).reshape(-1,1) * f41 * f42 * f44 * self.X[:,1:]).sum(axis = [0]) ).reshape(-1, 1) / ( self.n )
                
                gklist.append( (torch.norm(w33_grad))**2 + (torch.norm(w43_grad))**2 )
                
                beta = gklist[-1] / gklist[-2]
                
                dw33[1] = dw33[0]
                dw33[0] = - w33_grad + beta * dw33[1]
                
                dw43[1] = dw43[0]
                dw43[0] = - w43_grad + beta * dw43[1]
            
            
            if steplag == True:
                if ( ( EPOCH + 1 ) % printstride == 0 ) and ( (epoch + 1 ) % num_subepoch == 0 ) :

                    predict_y_test = self.net(self.w41, self.w42, self.w43, self.w44, self.w31, self.w32, 
                     self.w33, self.w21, self.w22, self.w11, self.w00, self.X_test).to(torch.device('cpu')).numpy()

                    predict_y_train = self.net(self.w41, self.w42, self.w43, self.w44, self.w31, self.w32, 
                     self.w33, self.w21, self.w22, self.w11, self.w00, self.X).to(torch.device('cpu')).numpy()

                    test_RMSE = RMSE(predict_y_test.reshape(self.y_test.shape), self.y_test)
                    train_MSE = MSE(predict_y_train.reshape(self.y_train.shape), self.y_train)
                    test_MAPE = MAPE(predict_y_test.reshape(self.y_test.shape), self.y_test)

                    self.test_MAPE = test_MAPE
                    self.test_MSE = test_RMSE
                    self.predict_y_test = predict_y_test
                    self.predict_y_train = predict_y_train

                    print('epoch',epoch+1,'w11, w00','EPOCH',EPOCH + 1,
                          '\n TEST RMSE', test_RMSE,
                          '\n TRAIN MSE',train_MSE,
                          '\n TEST MAPE',100*test_MAPE)
                    print(" ")
                    
            # 第四轮
            f4, f41, f42, f43, f44 = self.net4(self.w41, self.w42, self.w43, self.w44, self.X)
            f3, f31, f32, f33 = self.net3(self.w31, self.w32, self.w33, self.X)
            f2, f21, f22 = self.net2(self.w21, self.w22, self.X)
            f1 = self.net1(self.w11, self.w00, self.X)
            f = (f1 + f2 + f3 + f4)

            w44_grad =  ( ( ( 2 * (f - self.y) ).reshape(-1,1) * f41 * f42 * f43 * self.X[:,1:]).sum(axis = [0]) ).reshape(-1, 1) / ( self.n )
            dw44 = [-w44_grad, 0]
            
            gklist = [ 0 ,0 , (torch.norm(w44_grad))**2 ]
            
            for epoch in range(num_subepoch):

                lr = gklist[-1] / ( ( 2 * ( self.net4(dw44[0], self.w41, self.w42, self.w43, self.X )[0] )**2 ).sum() / (self.n) )
                self.w44 += lr * dw44[0]
                                   
                f4, f41, f42, f43, f44 = self.net4(self.w41, self.w42, self.w43, self.w44, self.X)
                f3, f31, f32, f33 = self.net3(self.w31, self.w32, self.w33, self.X)
                f2, f21, f22 = self.net2(self.w21, self.w22, self.X)
                f1 = self.net1(self.w11, self.w00, self.X)
                f = (f1 + f2 + f3 + f4)
                
                w44_grad =  ( ( ( 2 * (f - self.y) ).reshape(-1,1) * f41 * f42 * f43 * self.X[:,1:]).sum(axis = [0]) ).reshape(-1, 1) / ( self.n )
                
                gklist.append( (torch.norm(w44_grad) )**2 )
                
                beta = gklist[-1] / gklist[-2]
                
                dw44[1] = dw44[0]
                dw44[0] = - w44_grad + beta * dw44[1]
            
            
            if steplag == True:
                if ( ( EPOCH + 1 ) % printstride == 0 ) and ( (epoch + 1 ) % num_subepoch == 0 ) :

                    predict_y_test = self.net(self.w41, self.w42, self.w43, self.w44, self.w31, self.w32, 
                     self.w33, self.w21, self.w22, self.w11, self.w00, self.X_test).to(torch.device('cpu')).numpy()

                    predict_y_train = self.net(self.w41, self.w42, self.w43, self.w44, self.w31, self.w32, 
                     self.w33, self.w21, self.w22, self.w11, self.w00, self.X).to(torch.device('cpu')).numpy()

                    test_RMSE = RMSE(predict_y_test.reshape(self.y_test.shape), self.y_test)
                    train_MSE = MSE(predict_y_train.reshape(self.y_train.shape), self.y_train)
                    test_MAPE = MAPE(predict_y_test.reshape(self.y_test.shape), self.y_test)

                    self.test_MAPE = test_MAPE
                    self.test_RMSE = test_RMSE
                    self.predict_y_test = predict_y_test
                    self.predict_y_train = predict_y_train

                    print('epoch',epoch+1,'w11, w00','EPOCH',EPOCH + 1,
                          '\n TEST RMSE', test_RMSE,
                          '\n TRAIN MSE',train_MSE,
                          '\n TEST MAPE',100*test_MAPE)
                    print(" ")
            
            predict_y_test = self.net(self.w41, self.w42, self.w43, self.w44, self.w31, self.w32, 
             self.w33, self.w21, self.w22, self.w11, self.w00, self.X_test)
            
            if valid == True:
                losslist.append( ( ((predict_y_test.to(torch.device('cpu')).numpy()
                                    ).reshape(self.y_test.shape) - self.y_test)**2 ).sum()/self.n )
                if (losslist[-1] > losslist[-2]) and (losslist[-2] > losslist[-3]) and (losslist[-3] > losslist[-4]):   
                    self.epoch = EPOCH - 3
                    break
            
            if ( ( EPOCH + 1 ) % printstride == 0 ) and ( (epoch + 1 ) % num_subepoch == 0 ) :

                predict_y_test = self.net(self.w41, self.w42, self.w43, self.w44, self.w31, self.w32, 
                    self.w33, self.w21, self.w22, self.w11, self.w00, self.X_test).to(torch.device('cpu')).numpy()

                predict_y_train = self.net(self.w41, self.w42, self.w43, self.w44, self.w31, self.w32, 
                    self.w33, self.w21, self.w22, self.w11, self.w00, self.X).to(torch.device('cpu')).numpy()

                test_RMSE = RMSE(predict_y_test.reshape(self.y_test.shape), self.y_test)
                train_MSE = MSE(predict_y_train.reshape(self.y_train.shape), self.y_train)
                test_MAPE = MAPE(predict_y_test.reshape(self.y_test.shape), self.y_test)

                self.test_MAPE = test_MAPE
                self.test_MSE = test_RMSE
                self.predict_y_test = predict_y_test
                self.predict_y_train = predict_y_train

                print('epoch',epoch+1,'EPOCH',EPOCH + 1,
                        '\n TEST RMSE', test_RMSE,
                        '\n TRAIN MSE',train_MSE,
                        '\n TEST MAPE',100*test_MAPE)
                print(" ")
                
        predict_y_test = self.net(self.w41, self.w42, self.w43, self.w44, self.w31, self.w32, 
         self.w33, self.w21, self.w22, self.w11, self.w00, self.X_test).to(torch.device('cpu')).numpy()

        predict_y_train = self.net(self.w41, self.w42, self.w43, self.w44, self.w31, self.w32, 
         self.w33, self.w21, self.w22, self.w11, self.w00, self.X).to(torch.device('cpu')).numpy()

        test_RMSE = RMSE(predict_y_test.reshape(self.y_test.shape), self.y_test)
        train_MSE = MSE(predict_y_train.reshape(self.y_train.shape), self.y_train)
        test_MAPE = MAPE(predict_y_test.reshape(self.y_test.shape), self.y_test)

        self.test_MAPE = test_MAPE
        self.test_RMSE = test_RMSE
        self.predict_y_test = predict_y_test
        self.predict_y_train = predict_y_train

        print('epoch',epoch+1,'EPOCH',EPOCH + 1,
                '\n TEST RMSE', test_RMSE,
                '\n TRAIN MSE',train_MSE,
                '\n TEST MAPE',100*test_MAPE)
        print(" ")
        
        if valid == True:
            self.epoch = EPOCH + 1

In [None]:
import warnings
warnings.filterwarnings('ignore', category=pd.errors.PerformanceWarning)

import sys
sys.path.append("..")
from pathlib import Path
import pickle

Path(f"./results").mkdir(parents=True, exist_ok=True)

def AltPolyRepeat(seed, train_features, train_labels, valid_features, valid_labels, num_subepoch, num_epoch, printstride, device):

    # set seed
    seed = int(seed)
    print('seed', seed)
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    
    model = AltPoly(1, 1, 1 ,1, device = device)
    model.initial_input(train_features, train_labels, valid_features, valid_labels)
    model.initial_param()
    model.optimize(num_subepoch = num_subepoch, num_epoch = num_epoch, printstride = printstride, steplag = False, valid = False)
    return model.predict_y_test


start_year = 2015
num_subepoch = 20
num_epoch = 100
import time
printstride = 200
ensemble_times = 30

df_list_baseline_results = []
seed_group0 = 0
run_times = 10

seed_list = (np.arange(run_times * ensemble_times).reshape(run_times, ensemble_times)).astype(int)


for seed_group in range(seed_group0, seed_group0 + run_times):

    path = f"./results/Repeated-{seed_group}.pkl"
    
    saved_dict = {}

    for zone in ['CT', 'ME', 'NEMASSBOST', 'NH', 'RI', 'SEMASS', 'TOTAL','VT', 'WCMASS']:
        
        X_train, y_train, X_valid, y_valid, X_test, y_test = get_zone_train(
            df, df2, zone=zone,
            train_start = pd.Timestamp(start_year, 1, 1),
            train_end = pd.Timestamp(start_year + 2, 1, 1),
            valid_start = pd.Timestamp(start_year + 2, 1, 1),
            valid_end = pd.Timestamp(start_year + 3, 1, 1),
            test_start = pd.Timestamp(start_year + 3, 1, 1),
            test_end = pd.Timestamp(start_year + 4, 1, 1),
            log=True, num_hour = 24, num_day = 7, 
            num_hour_RH = 3, num_day_RH = 1, RH = True
        )

        train_features = X_train.astype(np.float64)
        valid_features = X_valid.astype(np.float64)
        test_features = X_test.astype(np.float64)

        train_labels = y_train.astype(np.float64)
        valid_labels = y_valid.astype(np.float64)
        test_labels = y_test.astype(np.float64)

        [m_train, n_train] = train_features.shape
        [m_valid, n_valid] = valid_features.shape
        [m_test, n_test] = test_features.shape

        # normalization
        X_all = np.concatenate((train_features, valid_features, test_features), axis = 0)
        m = X_all.shape[0]
        mins = np.min(X_all[0:(m_train + m_valid), :], axis = 0)
        maxs = np.max(X_all[0:(m_train + m_valid), :], axis = 0)

        for i in range(m):
            X_all[i,:] = ( X_all[i,:] - mins ) / ( maxs - mins )

        train_features = X_all[:m_train, :]
        valid_features = X_all[m_train:(m_train + m_valid), :]
        test_features = X_all[(m_train + m_valid) : , : ]

        # Since the validation set is unnecessary, it is merged with the training set.

        X_train_valid = np.concatenate((train_features, valid_features), axis = 0)
        y_train_valid = np.concatenate((train_labels, valid_labels), axis = 0)      

        time_start = time.time()

        finalresult = Parallel(n_jobs=1)(
            delayed(AltPolyRepeat)(seed, X_train_valid, y_train_valid, test_features, test_labels, num_subepoch, num_epoch, printstride, device = 'cuda') for seed in seed_list[seed_group])

        time_end = time.time()

        pre_test_ensemble = np.array(finalresult).reshape(ensemble_times, -1).mean(axis = 0)

        test_RMSE = RMSE(pre_test_ensemble.reshape(test_labels.shape), test_labels)
        test_MAPE = MAPE(pre_test_ensemble.reshape(test_labels.shape), test_labels)*100

        Daily_peak_test_RMSE = RMSE( np.max(pre_test_ensemble.reshape(-1, 24), axis = 1), np.max(test_labels.reshape(-1, 24), axis = 1) )
        Daily_peak_test_MAPE = MAPE( np.max(pre_test_ensemble.reshape(-1, 24), axis = 1), np.max(test_labels.reshape(-1, 24), axis = 1) )*100

        df_list_baseline_results.append({
            'zone': zone,
            'test_RMSE': test_RMSE,
            'test_MAPE': test_MAPE,
            'Daily_peak_test_RMSE': Daily_peak_test_RMSE,
            'Daily_peak_test_MAPE': Daily_peak_test_MAPE,
            'seed_group': seed_group
        })

        pre_metric = metric(pre_test_ensemble.reshape(test_labels.shape), test_labels)
        print(pre_metric)

        sub_saved_dict = {}
        sub_saved_dict["seed_group"] = seed_group
        sub_saved_dict["emsemble_times"] = ensemble_times
        sub_saved_dict["test_RMSE"] = test_RMSE
        sub_saved_dict["test_MAPE"] = test_MAPE
        sub_saved_dict["Daily_peak_test_RMSE"] = Daily_peak_test_RMSE
        sub_saved_dict["Daily_peak_test_MAPE"] = Daily_peak_test_MAPE
        sub_saved_dict["compute time"] = time_end - time_start
        sub_saved_dict["pre_test_load"] = pre_test_ensemble.reshape(test_labels.shape)
        sub_saved_dict["pre_metric"] = pre_metric

        # Checkpoint
        saved_dict[f"zone_{zone}"] = sub_saved_dict
        with open(path, "wb") as f:
            pickle.dump(saved_dict,
                        f,
                        protocol=pickle.HIGHEST_PROTOCOL)

        print('model rersult', 
            '\n test RMSE', test_RMSE,
            '\n test MAPE', test_MAPE,
            '\n Daily_peak_test_RMSE', Daily_peak_test_RMSE,
            '\n Daily_peak_test_MAPE', Daily_peak_test_MAPE )
        print(" ")

        # 假设模型训练完成后，进行显存清理
        torch.cuda.empty_cache()

# Process results for experiment
df_results = pd.DataFrame(df_list_baseline_results)
test_rmse_df = df_results.groupby(['zone']).agg(["mean", "std"])["test_RMSE"]
test_mape_df = df_results.groupby(['zone']).agg(["mean", "std"])["test_MAPE"]
Daily_peak_test_rmse_df = df_results.groupby(['zone']).agg(["mean", "std"])["Daily_peak_test_RMSE"]
Daily_peak_test_mape_df = df_results.groupby(['zone']).agg(["mean", "std"])["Daily_peak_test_MAPE"]

print("Test RMSE of experiment")
print(test_rmse_df)
print("Test MAPE of experiment")
print(test_mape_df)
print("Test Daily peak RMSE of experiment")
print(Daily_peak_test_rmse_df)
print("Test Daily peak MAPE of experiment")
print(Daily_peak_test_mape_df)

Total Set, (Nums, Variables): (40819, 157)
zone: CT, 
	 Train start: 2015-01-01 00:00:00, 
	 Train end: 2017-01-01 00:00:00
zone: CT, 
	 Valid start: 2017-01-01 00:00:00, 
	 Valid end: 2018-01-01 00:00:00
zone: CT, 
	 Test start: 2018-01-01 00:00:00, 
	 Test end: 2019-01-01 00:00:00
Train set: (17540, 155)
Valid set: (8760, 155)
Test set: (8760, 155)
seed 30
you are using: cuda
epoch 20 EPOCH 100 
 TEST RMSE 165.23225133209738 
 TRAIN MSE 10913.570967577078 
 TEST MAPE 3.75153181839364
 
seed 31
you are using: cuda
epoch 20 EPOCH 100 
 TEST RMSE 149.16305539413665 
 TRAIN MSE 9997.083341916026 
 TEST MAPE 3.5981394441274177
 
seed 32
you are using: cuda
epoch 20 EPOCH 100 
 TEST RMSE 157.79549974714035 
 TRAIN MSE 9847.874562836689 
 TEST MAPE 3.6972016360286784
 
seed 33
you are using: cuda
epoch 20 EPOCH 100 
 TEST RMSE 148.7859151257312 
 TRAIN MSE 11097.7558160339 
 TEST MAPE 3.517687023994192
 
seed 34
you are using: cuda
epoch 20 EPOCH 100 
 TEST RMSE 155.587714238159 
 TRAIN MSE

In [81]:
df_results

Unnamed: 0,zone,test_RMSE,test_MAPE,Daily_peak_test_RMSE,Daily_peak_test_MAPE,seed_group
0,CT,138.292549,3.207360,151.639844,2.678079,0
1,ME,62.677603,3.717380,70.758929,3.675196,0
2,NEMASSBOST,113.932112,3.058870,134.707306,2.951097,0
3,NH,41.310698,2.328099,44.172534,2.146251,0
4,RI,30.818148,2.372297,36.731889,2.347978,0
...,...,...,...,...,...,...
85,RI,30.835606,2.378872,36.608726,2.324690,9
86,SEMASS,75.386980,3.475959,78.971432,2.723443,9
87,TOTAL,500.260843,2.750693,543.485142,2.277680,9
88,VT,44.608818,6.119910,31.885541,3.455638,9


In [None]:
start_year = 2015
num_subepoch = 20
num_epoch = 100
import time
printstride = 200
ensemble_times = 100

df_list_baseline_results = []
seed_group0 = 0
run_times = 100

seed_list = (np.arange(run_times * ensemble_times).reshape(run_times, ensemble_times)).astype(int)

path = f"./results/EnsembleGraph.pkl"

saved_dict_ens = {}

zone = 'CT'

X_train, y_train, X_valid, y_valid, X_test, y_test = get_zone_train(
    df, df2, zone=zone,
    train_start = pd.Timestamp(start_year, 1, 1),
    train_end = pd.Timestamp(start_year + 2, 1, 1),
    valid_start = pd.Timestamp(start_year + 2, 1, 1),
    valid_end = pd.Timestamp(start_year + 3, 1, 1),
    test_start = pd.Timestamp(start_year + 3, 1, 1),
    test_end = pd.Timestamp(start_year + 4, 1, 1),
    log=True, num_hour = 24, num_day = 7, 
    num_hour_RH = 3, num_day_RH = 1, RH = True
)

train_features = X_train.astype(np.float64)
valid_features = X_valid.astype(np.float64)
test_features = X_test.astype(np.float64)

train_labels = y_train.astype(np.float64)
valid_labels = y_valid.astype(np.float64)
test_labels = y_test.astype(np.float64)

[m_train, n_train] = train_features.shape
[m_valid, n_valid] = valid_features.shape
[m_test, n_test] = test_features.shape

# normalization
X_all = np.concatenate((train_features, valid_features, test_features), axis = 0)
m = X_all.shape[0]
mins = np.min(X_all[0:(m_train + m_valid), :], axis = 0)
maxs = np.max(X_all[0:(m_train + m_valid), :], axis = 0)

for i in range(m):
    X_all[i,:] = ( X_all[i,:] - mins ) / ( maxs - mins )

train_features = X_all[:m_train, :]
valid_features = X_all[m_train:(m_train + m_valid), :]
test_features = X_all[(m_train + m_valid) : , : ]

X_train_valid = np.concatenate((train_features, valid_features), axis = 0)
y_train_valid = np.concatenate((train_labels, valid_labels), axis = 0)      

for seed_group in range(seed_group0, seed_group0 + run_times):

    pre_test_ensemble_list = []
    saved_dict_sub = {}
    
    for i in range(ensemble_times):
        saved_dict_single = {}
        seed = seed_list[seed_group][i]

        time_start = time.time()
        predict_y_test = AltPolyRepeat(seed, X_train_valid, y_train_valid, test_features, test_labels, num_subepoch, num_epoch, printstride, device = 'cuda')
        time_end = time.time()

        pre_test_ensemble_list.append(predict_y_test)
        pre_tem = np.array(pre_test_ensemble_list).mean(axis = 0) 

        ens_test_RMSE = RMSE(pre_tem.reshape(test_labels.shape), test_labels)
        ens_test_MAPE = MAPE(pre_tem.reshape(test_labels.shape), test_labels)*100

        ens_Daily_peak_test_RMSE = RMSE( np.max(pre_tem.reshape(-1, 24), axis = 1), np.max(test_labels.reshape(-1, 24), axis = 1) )
        ens_Daily_peak_test_MAPE = MAPE( np.max(pre_tem.reshape(-1, 24), axis = 1), np.max(test_labels.reshape(-1, 24), axis = 1) )*100

        single_test_RMSE = RMSE(predict_y_test.reshape(test_labels.shape), test_labels)
        single_test_MAPE = MAPE(predict_y_test.reshape(test_labels.shape), test_labels)*100

        single_Daily_peak_test_RMSE = RMSE( np.max(predict_y_test.reshape(-1, 24), axis = 1), np.max(test_labels.reshape(-1, 24), axis = 1) )
        single_Daily_peak_test_MAPE = MAPE( np.max(predict_y_test.reshape(-1, 24), axis = 1), np.max(test_labels.reshape(-1, 24), axis = 1) )*100

        saved_dict_single['seed_group'] = seed_group
        saved_dict_single['ensemble times'] = i

        saved_dict_single['times'] = time_end - time_start

        saved_dict_single['ens_test_RMSE'] = ens_test_RMSE
        saved_dict_single['ens_test_MAPE'] = ens_test_MAPE
        saved_dict_single['ens_Daily_peak_test_RMSE'] = ens_Daily_peak_test_RMSE
        saved_dict_single['ens_Daily_peak_test_MAPE'] = ens_Daily_peak_test_MAPE

        saved_dict_single['single_test_RMSE'] = single_test_RMSE
        saved_dict_single['single_test_MAPE'] = single_test_MAPE
        saved_dict_single['single_Daily_peak_test_RMSE'] = single_Daily_peak_test_RMSE
        saved_dict_single['single_Daily_peak_test_MAPE'] = single_Daily_peak_test_MAPE

        saved_dict_sub[f"ensemble time_{i}"] = saved_dict_single

        print('seed_group', seed_group)
        print('ensemble times', i)
        print('conduct time', time_end - time_start)
        print( 'Single RMSE', single_test_RMSE )
        print( 'Single Daily_peak_test_MAPE', single_Daily_peak_test_MAPE )
        print( 'Ensemble RMSE', ens_test_RMSE )
        print( 'Ensemble Daily_peak_test_MAPE', ens_Daily_peak_test_MAPE )

    # 假设模型训练完成后，进行显存清理
    torch.cuda.empty_cache()

    # Checkpoint
    saved_dict_ens[f"seed_group_{seed_group}"] = saved_dict_sub
    with open(path, "wb") as f:
        pickle.dump(saved_dict_ens,
                    f,
                    protocol=pickle.HIGHEST_PROTOCOL)