In [1]:
import pandas as pd
import numpy as np
import os
import joblib
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset
from data_loader import StockDataset
from model.GHATModel import GAT
from config import Config

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

def build_adj():
    # connection = [
    # (1, 0),
    # (9, 0), (12, 0), 
    # (8, 9), (8, 12), (5, 9), (11, 12), 
    # (4, 5), (4, 8), (7, 8), (7, 11), (10, 11),
    # (3, 4), (3, 7), (6, 7), (6, 10), (2, 3), (2, 6)]
    
    # 无向图
    connection = [
        (1, 0), (0, 1),
        (9, 0), (12, 0), (0, 9), (0, 12),
        (8, 9), (8, 12), (5, 9), (11, 12), (9, 8), (12, 8), (9, 5), (12, 11),
        (4, 5), (4, 8), (7, 8), (7, 11), (10, 11), (5, 4), (8, 4), (8, 7), (11, 7), (11, 10),
        (3, 4), (3, 7), (6, 7), (6, 10), (2, 3), (2, 6), (4, 3), (7, 3), (7, 6), (10, 6), (3, 2), (6, 2)
        ]
    adj_matrix = torch.zeros(13, 13).float()
    for source, target in connection:
        adj_matrix[source][target] = 1
    return adj_matrix

In [19]:
input_path, output_path = './data/volume/0308/Input/000046_3_3_inputs.npy', './data/volume/0308/Output/000046_3_3_output.npy'
conf = Config(input_path= input_path)

input_data = np.load(input_path, allow_pickle= True)
output_data = np.load(output_path, allow_pickle= True)
train_val_size = int(input_data.shape[0]* 0.8)
input_train, input_test = input_data[:train_val_size], input_data[train_val_size:]
output_train, output_test = output_data[:train_val_size], output_data[train_val_size:]

train_dataset = StockDataset(input_data= input_train, output_data= output_train, train_length= conf.train_length, pred_length= conf.pred_length,
                                train_features= conf.train_features, pred_features= conf.pred_features)
test_dataset = StockDataset(input_data= input_test, output_data= output_test, train_length= conf.train_length, pred_length= conf.pred_length, 
                            train_features= conf.train_features, pred_features= conf.pred_features)

train_dataloader = DataLoader(train_dataset, batch_size= conf.batch_size, shuffle= False, drop_last=True)
test_dataloader = DataLoader(test_dataset, conf.batch_size, shuffle= False, drop_last= True)

# laod model
model = GAT(n_feat= len(conf.train_features), n_hid= conf.n_hid, out_features= len(conf.pred_features), 
            pred_length= conf.pred_length, n_heads= conf.n_head)
model = model.to(device= device)
criterion = nn.L1Loss().to(device)

state_dict = torch.load('./saved_models/000046/000046_model_train_best.pt')
model.load_state_dict(state_dict)
model.eval()

batches = len(train_dataloader)
loss = 0.0
for batch_i, (input_data, output_data) in enumerate(train_dataloader):
    input_data, output_data = input_data.to(device, non_blocking= True), output_data.to(device, non_blocking= True).float()
    pred = model(input_data, build_adj())
    loss += criterion(pred, output_data).item()

# test

In [53]:
pred_dir = './pred/'
scaler_dir = './data/volume/0308/Scaler/'

for path in os.listdir(pred_dir):
    if path.endswith('.csv'):
        date_suffix = path[-10:-4]
        scaler_path = os.path.join(scaler_dir, f'{date_suffix}.m')
        
        stand = joblib.load(scaler_path)
        data = pd.read_csv(os.path.join(pred_dir, path))
        
        data.iloc[:, 0] = stand.transform(data.iloc[:, 0].values.reshape(-1, 1)).flatten().astype(float)
        data.iloc[:, 1] = stand.transform(data.iloc[:, 1].values.reshape(-1, 1)).flatten().astype(float)
        
        aps_value = np.abs(data.iloc[:, 0] - data.iloc[:, 1])
        mape = np.mean(aps_value/data.iloc[:, 1])
        # mean_difference = np.mean(np.abs(data.iloc[:, 0] - data.iloc[:, 1])/data.iloc[:, 1])
        print(f"{path} com is {mape}")

model_pred_300263.csv com is 0.935416413980514
model_pred_002882.csv com is 1.550863929888626
model_pred_002841.csv com is -1.0267474556341747
model_pred_002282.csv com is 3.756918435396213
model_pred_300174.csv com is -0.4021384301654137
model_pred_000998.csv com is -0.12983024485678749
model_pred_000951.csv com is -1.3434334328673543
model_pred_000046.csv com is -0.46476788085150367
model_pred_300133.csv com is 0.8758025108458504
model_pred_000753.csv com is 1.9249466546347918


In [3]:
output = pd.read_csv('./pred/model_pred_000046.csv')
output.describe()

Unnamed: 0,000046-pred,000046-true
count,3840.0,3840.0
mean,-0.51162,-0.11466
std,0.175292,0.73602
min,-2.684278,-0.7951
25%,-0.515311,-0.530425
50%,-0.460229,-0.32155
75%,-0.429461,0.019775
max,-0.372771,10.5247
