In [None]:
import torch
from torch.utils.data import DataLoader

from config import Config

from utils.dataTools import *
# 你可以在model里修改,决定使用LSTM还是GRU
from utils.models import CNNBiLSTM
from utils.trainer import CNNBiLstm_evaluate,CNNBiLstmtrain

if __name__ == '__main__':
    config = Config()

    print("Data loading...")
    # 序列数据
    dataset = mydataReader("./dataProcessed/testData.csv")

    # 创建X/Y
    # 划分训练集和测试集，70% 作为训练集,10%作为验证集,20%作为测试集
    (train_X ,train_Y ), (val_X,val_Y), (test_X ,test_Y )= dataset.split(lookback=config.lookback,trainSet_ratio=0.7,valSet_ratio=0.1)

    # 创建Pytorch使用的dataset
    trainSet = custom_dataset(train_X,train_Y)
    valSet = custom_dataset(val_X,val_Y)
    testSet = custom_dataset(test_X,test_Y)

    train_loader = DataLoader(trainSet, batch_size = config.batch_size,
                              shuffle=False, pin_memory=True, num_workers=4, drop_last=True)
    val_loader = DataLoader(valSet, batch_size = config.batch_size,
                              shuffle=False, pin_memory=True, num_workers=4, drop_last=False)
    test_loader = DataLoader(testSet, batch_size = config.batch_size,
                            shuffle=False, pin_memory=True, num_workers=4, drop_last=False)


    print("Model loading...")
    model = CNNBiLSTM(hidden_size=12,num_layers=1).to(config.device)
    criterion = torch.nn.MSELoss()
    optimizer = torch.optim.AdamW(model.parameters(),
                                 lr=config.learning_rate, weight_decay=config.weight_decay)

    print("Training...")
    model = CNNBiLstmtrain(model,
                  trainloader=train_loader,
                  valloader=val_loader,
                  criterion=criterion,
                  optimizer=optimizer,
                  config=config)

    print("Testing...", round(len(testSet)/config.batch_size))
    y_pre,y = CNNBiLstm_evaluate(model, test_loader, config)


In [None]:
pd.DataFrame({'y_true':y,'y_pre':y_pre}).to_csv("./result/CNN-BiLSTMpre-best.csv",index=False)


CNN-BiLSTM-middle
r2Score:  0.9732221421565376
meanSquaredError:  0.0013827450061916385
RMSE:  0.037185279428715315
meanAbsoluteError:  0.013599438024481905

## LightGBM

In [None]:
from config import Config
from utils.dataTools import *

if __name__ == '__main__':
    config = Config()

    print("Data loading...")
    # 序列数据
    dataset = mydataReader("./dataProcessed/testData.csv")

    # 创建X/Y
    # 划分训练集和测试集，70% 作为训练集,10%作为验证集,20%作为测试集
    (train_X ,train_Y ), (val_X,val_Y), (test_X ,test_Y )= dataset.split(lookback=config.lookback,trainSet_ratio=0.7,valSet_ratio=0.1)

In [None]:
# 加载lightGBM模型
from sklearn.metrics import mean_squared_error,mean_absolute_error
from sklearn.metrics import r2_score
import lightgbm as lgb
import numpy as np

# 定义模型
model = lgb.LGBMRegressor(objective='regression', num_leaves=32, learning_rate=0.05, n_estimators=100,random_state=42)

# 训练
model.fit(train_X.squeeze(),train_Y.squeeze())
test_predict=model.predict(test_X.squeeze())
train_predict = model.predict(train_X.squeeze())

# MSE
msetest=mean_squared_error(test_Y.squeeze(),test_predict)
maetest=mean_absolute_error(test_Y.squeeze(),test_predict)

print("测试集MSELoss: ",msetest)
print("测试集MAELoss: ",maetest)
print("测试集RMSELoss: ",np.sqrt(msetest))
# print(msetrain)

# R2-score
r2Test = r2_score(test_Y.squeeze(),test_predict)
print("测试集r2-score: ",r2Test)

# 画出实际结果和预测的结果
import matplotlib.pyplot as plt
y = test_Y.squeeze()
y_pre = test_predict
pd.DataFrame({'y_true':y,'y_pre':y_pre}).to_csv("./result/LightGBMpre-best.csv",index=False)
plt.plot(range(len(y[:1000])),y_pre[:1000],color = 'red',linewidth = 1.5,linestyle = '-.',label='prediction')
plt.plot(range(len(y[:1000])),y[:1000],color = 'blue',linewidth = 1.5,linestyle = '-', label='real')
plt.legend(loc='best')

树-80
测试集MSELoss:  0.0005582242
测试集MAELoss:  0.008747728
测试集RMSELoss:  0.023626769
测试集r2-score:  0.9891895839209156