In [70]:
import sys
import os
sys.path.append('../')
from utils.tools import *
from src.timellm import TimeLLM
from src.baselines.PatchTST import Model as PatchTST
from exp.exp_long_term_forecasting import *
import torch
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from torch.utils.data import Dataset, DataLoader
import torch.nn as nn
from sklearn.preprocessing import StandardScaler
import time

In [71]:
# Load data and set model parameters

data_name = 'electricity/electricity.csv'

with open('../dataset/prompt_bank/ECL.txt', 'r', encoding='utf-8') as f:
    description = f.read().strip()

# 加载数据
data = pd.read_csv(f'../dataset/{data_name}', parse_dates=['date'], index_col='date')
print("数据集描述:", description)
print(data.head())

# 设置目标列、输入序列长度和预测长度
target_col = 'OT'
seq_len = 96
horizon = 96
batch_size = 64
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# 标准化数据
scaler = StandardScaler()
feature_cols = ['OT']
scaled_data = scaler.fit_transform(data[feature_cols].values)
time_series_data = scaled_data[-seq_len:]
time_series_data = torch.tensor(time_series_data, dtype=torch.float32)

数据集描述: Measurements of electric power consumption in one household with a one-minute sampling rate over a period of almost 4 years. Different electrical quantities and some sub-metering values are available.This archive contains 2075259 measurements gathered in a house located in Sceaux (7km of Paris, France) between December 2006 and November 2010 (47 months).
                        0     1      2      3      4       5     6       7  \
date                                                                         
2016-07-01 02:00:00  14.0  69.0  234.0  415.0  215.0  1056.0  29.0   840.0   
2016-07-01 03:00:00  18.0  92.0  312.0  556.0  292.0  1363.0  29.0  1102.0   
2016-07-01 04:00:00  21.0  96.0  312.0  560.0  272.0  1240.0  29.0  1025.0   
2016-07-01 05:00:00  20.0  92.0  312.0  443.0  213.0   845.0  24.0   833.0   
2016-07-01 06:00:00  22.0  91.0  312.0  346.0  190.0   647.0  16.0   733.0   

                         8      9  ...    311    312      313     314     315  \
date    

In [72]:
# 可视化时间序列数据
plt.figure(figsize=(12, 6))
plt.plot(time_series_data.numpy(), label=target_col)
plt.title("Electricity demand time series data.")
plt.xlabel("Time Step")
plt.ylabel("Standardized value")
plt.legend()
plt.show()


In [73]:
# 创建滑动窗口数据集
def create_windows(data, target_col, seq_len, horizon):
    X, y = [], []
    for i in range(len(data) - seq_len - horizon + 1):
        X.append(data[i:i + seq_len].values)
        y.append(data[i + seq_len:i + seq_len + horizon][target_col].values)
    return np.array(X), np.array(y)

X, y = create_windows(data, 'OT', seq_len, horizon)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, shuffle=False)
print("训练集形状:", X_train.shape, y_train.shape)
print("测试集形状:", X_test.shape, y_test.shape)


训练集形状: (20890, 96, 321) (20890, 96)
测试集形状: (5223, 96, 321) (5223, 96)


In [74]:
# 创建数据集
class TimeSeriesDataset(Dataset):
    def __init__(self, X, y):
        self.X = torch.tensor(X, dtype=torch.float32)
        self.y = torch.tensor(y, dtype=torch.float32)
    
    def __len__(self):
        return len(self.X)
    
    def __getitem__(self, idx):
        return self.X[idx], self.y[idx]

train_dataset = TimeSeriesDataset(X_train, y_train)
test_dataset = TimeSeriesDataset(X_test, y_test)
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

In [75]:
# 定义训练和测试函数
def train_model(model, train_loader, criterion, optimizer, num_epochs):
    model.train()
    for epoch in range(num_epochs):
        epoch_loss = 0
        for batch_X, batch_y in train_loader:
            batch_X, batch_y = batch_X.to(device), batch_y.to(device)
            optimizer.zero_grad()
            predictions = model(batch_X, description, horizon, seq_len)
            predicted_values = predictions[:, -horizon:, 0]
            loss = criterion(predicted_values, batch_y)
            loss.backward()
            optimizer.step()
            epoch_loss += loss.item()
        print(f"Epoch [{epoch + 1}/{num_epochs}], Loss: {epoch_loss / len(train_loader):.4f}")

def evaluate_model(model, test_loader, criterion):
    model.eval()
    test_loss = 0
    with torch.no_grad():
        for batch_X, batch_y in test_loader:
            batch_X, batch_y = batch_X.to(device), batch_y.to(device)
            predictions = model(batch_X, description, horizon, seq_len)
            predicted_values = predictions[:, -horizon:, 0]
            loss = criterion(predicted_values, batch_y)
            test_loss += loss.item()
    avg_loss = test_loss / len(test_loader)
    print(f"测试集平均损失: {avg_loss:.4f}")
    return avg_loss

criterion = nn.MSELoss()
d_model = 128
#d_model = X_train.shape[2]  # 特征维度
print(X)

[[[  14.   69.  234. ... 1558.  182. 2162.]
  [  18.   92.  312. ... 2177.  253. 2835.]
  [  21.   96.  312. ... 2193.  218. 2764.]
  ...
  [  63.  120.  121. ... 2300.  838. 3436.]
  [  55.  109.  332. ... 2527.  790. 3417.]
  [  46.   98.  327. ... 2561.  714. 3514.]]

 [[  18.   92.  312. ... 2177.  253. 2835.]
  [  21.   96.  312. ... 2193.  218. 2764.]
  [  20.   92.  312. ... 1315.  195. 2735.]
  ...
  [  55.  109.  332. ... 2527.  790. 3417.]
  [  46.   98.  327. ... 2561.  714. 3514.]
  [  32.   92.  326. ... 2355.  558. 3369.]]

 [[  21.   96.  312. ... 2193.  218. 2764.]
  [  20.   92.  312. ... 1315.  195. 2735.]
  [  22.   91.  312. ... 1378.  191. 2721.]
  ...
  [  46.   98.  327. ... 2561.  714. 3514.]
  [  32.   92.  326. ... 2355.  558. 3369.]
  [  58.   88.  328. ... 2486.  520. 3223.]]

 ...

 [[  12.   92.    8. ... 2861. 1064. 3018.]
  [  12.   89.    8. ... 3395.  941. 2888.]
  [  10.   83.    8. ... 3251. 1148. 2886.]
  ...
  [  11.   98.    8. ... 2390.  201. 306

In [None]:
# # PatchTST 模型训练与测试
# class PatchTSTWrapper(nn.Module):
#     def __init__(self, configs):
#         super(PatchTSTWrapper, self).__init__()
#         self.model = PatchTST(configs).to(device)
    
#     def forward(self, x_enc):
#         # PatchTST 模型不需要 description 和其他无关参数
#         return self.model.forecast(x_enc, None, None, None)

# PatchTST 配置
class Configs:
    def __init__(self, task_name='long_term_forecast', 
                 is_training=True, model_id='test',model='PatchTST', 
                 data='custom',root_path='../dataset/electricity/',
                 data_path='electricity.csv',features='M',target='OT', 
                 freq='h',checkpoints='./checkpoints/', seq_len=96, 
                 label_len=48, pred_len=96, seasonal_patterns='Monthly',
                 inverse=True, mask_rate = 0.25, nomaly_ratio=0.25, expand=2,
                 d_conv=4, top_k=5, num_kernels=6, enc_in=7, dec_in=7, c_out=7,
                 d_model=512, n_heads=8, e_layers=2, d_layers=1, d_ff=2048, moving_avg=25, factor=1,
                 distil=False, dropout=0.1, embed='timeF', activation='gelu', channel_independence=1,
                 decomp_method='moving_avg', use_norm=1, down_sampling_layers=0, down_sampling_window=1,
                 down_sampling_method=None, seg_len=48, num_workers=10, itr=1, batch_size=32, train_epochs=10, 
                 patience=3, learning_rate=0.0001, des='test', loss='MSE', lradj='type1',use_amp=True,
                 use_gpu=True, gpu=0, use_multi_gpu=True, devices='0,1,2,3', p_hidden_dims=[128,128], p_hidden_layers=2,
                 use_dtw=False, augmentation_ratio=0, seed=2, jitter=False, scaling=False, permutation=False, randompermutation=False,
                 magwarp=False, timewarp=False, windowwarp=False, windowslice=False, rotation=False, spawner=False, dtwwarp=False,
                 shapedtwwarp=False,wdba=False,discdtw=False,discsdtw=False,extra_tag="", **kwargs):

        self.task_name = task_name
        self.is_training = is_training
        self.model_id = model_id
        self.model = model
        self.data = data
        self.root_path = root_path
        self.data_path = data_path
        self.features = features
        self.target = target
        self.freq = freq
        self.checkpoints = checkpoints
        self.seq_len = seq_len
        self.label_len = label_len
        self.pred_len = pred_len
        self.seasonal_patterns = seasonal_patterns
        self.inverse = inverse
        self.mask_rate = mask_rate
        self.nomaly_ratio = nomaly_ratio
        self.expand = expand
        self.d_conv = d_conv
        self.top_k = top_k
        self.num_kernels = num_kernels
        self.enc_in = enc_in
        self.dec_in = dec_in
        self.c_out = c_out
        self.d_model = d_model
        self.n_heads = n_heads
        self.e_layers = e_layers
        self.d_layers = d_layers
        self.d_ff = d_ff
        self.moving_avg = moving_avg
        self.factor = factor
        self.distil = distil
        self.dropout = dropout
        self.embed = embed
        self.activation = activation
        self.channel_independence = channel_independence
        self.decomp_method = decomp_method
        self.use_norm = use_norm
        self.down_sampling_layers = down_sampling_layers
        self.down_sampling_window = down_sampling_window
        self.down_sampling_method = down_sampling_method
        self.seg_len = seg_len
        self.num_workers = num_workers
        self.itr = itr
        self.batch_size = batch_size
        self.train_epochs = train_epochs
        self.patience = patience
        self.learning_rate = learning_rate
        self.des = des
        self.loss = loss
        self.lradj = lradj
        self.use_amp = use_amp
        self.use_gpu = use_gpu
        self.gpu = gpu
        self.use_multi_gpu = use_multi_gpu
        self.devices = devices
        self.p_hidden_dims = p_hidden_dims
        self.p_hidden_layers = p_hidden_layers
        self.use_dtw = use_dtw
        self.augmentation_ratio = augmentation_ratio
        self.seed = seed
        self.jitter = jitter
        self.scaling = scaling
        self.permutation = permutation
        self.randompermutation = randompermutation
        self.magwarp = magwarp
        self.timewarp = timewarp
        self.windowwarp = windowwarp
        self.windowslice = windowslice
        self.rotation = rotation
        self.spawner = spawner
        self.dtwwarp = dtwwarp
        self.shapedtwwarp = shapedtwwarp
        self.wdba = wdba
        self.discdtw = discdtw
        self.discsdtw = discsdtw
        self.extra_tag = extra_tag

# 初始化 PatchTST 配置
args = Configs()

if args.use_gpu and args.use_multi_gpu:
    args.devices = args.devices.replace(' ', '')
    device_ids = args.devices.split(',')
    args.device_ids = [int(id_) for id_ in device_ids]
    args.gpu = args.device_ids[0]

for ii in range(args.itr):
    exp = Exp_Long_Term_Forecast(args)  # set experiments
    setting = '{}_{}_{}_{}_ft{}_sl{}_ll{}_pl{}_dm{}_nh{}_el{}_dl{}_df{}_expand{}_dc{}_fc{}_eb{}_dt{}_{}_{}'.format(
                    args.task_name,
                    args.model_id,
                    args.model,
                    args.data,
                    args.features,
                    args.seq_len,
                    args.label_len,
                    args.pred_len,
                    args.d_model,
                    args.n_heads,
                    args.e_layers,
                    args.d_layers,
                    args.d_ff,
                    args.expand,
                    args.d_conv,
                    args.factor,
                    args.embed,
                    args.distil,
                    args.des, ii)

    print('>>>>>>>start training : {}>>>>>>>>>>>>>>>>>>>>>>>>>>'.format(setting))
    exp.train(setting)

    print('>>>>>>>testing : {}<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<'.format(setting))
    exp.test(setting)
    torch.cuda.empty_cache()

Use GPU: cuda:0
>>>>>>>start training : long_term_forecast_test_PatchTST_custom_ftM_sl96_ll48_pl96_dm512_nh8_el2_dl1_df2048_expand2_dc4_fc1_ebtimeF_dtFalse_test_0>>>>>>>>>>>>>>>>>>>>>>>>>>
train 18221
val 2537
test 5165


In [None]:
# Time-LLM 模型训练与测试
time_llm = TimeLLM(d_model=d_model, n_heads=3, attention_dropout=0.1).to(device)
prediction_layer = nn.Linear(d_model, 1).to(device)

class TimeSeriesPredictor(nn.Module):
    def __init__(self, time_llm, prediction_layer):
        super(TimeSeriesPredictor, self).__init__()
        self.time_llm = time_llm
        self.prediction_layer = prediction_layer
    
    def forward(self, time_series_data, description, pred_len, seq_len):
        multi_modal_embedding, _ = self.time_llm(time_series_data, description, pred_len, seq_len)
        predictions = self.prediction_layer(multi_modal_embedding)
        return predictions

model_llm = TimeSeriesPredictor(time_llm, prediction_layer).to(device)
optimizer = torch.optim.Adam(model_llm.parameters(), lr=1e-4)

# 训练 Time-LLM
train_model(model_llm, train_loader, criterion, optimizer, num_epochs=10)
# 测试 Time-LLM
evaluate_model(model_llm, test_loader, criterion)

  state_dict = torch.load(resolved_archive_file, map_location="cpu")


Generated Text Prompt:
<|start_prompt|>Dataset description: Measurements of electric power consumption in one household with a one-minute sampling rate over a period of almost 4 years. Different electrical quantities and some sub-metering values are available.This archive contains 2075259 measurements gathered in a house located in Sceaux (7km of Paris, France) between December 2006 and November 2010 (47 months).Task description: forecast the next 96 steps given the previous 96 steps information; Input statistics: min value 8.0, max value 62.0, median value 10.0, the trend of input is downward, top 5 lags are : [0, 72, 24, 48, 1]<|<end_prompt>|>
<|start_prompt|>Dataset description: Measurements of electric power consumption in one household with a one-minute sampling rate over a period of almost 4 years. Different electrical quantities and some sub-metering values are available.This archive contains 2075259 measurements gathered in a house located in Sceaux (7km of Paris, France) betwe

RuntimeError: mat1 and mat2 shapes cannot be multiplied (6144x321 and 128x126)

In [None]:
# 预测结果可视化
def plot_predictions(model, dataset, title):
    sample_X, sample_y = dataset[0]
    sample_X = sample_X.unsqueeze(0).to(device)
    sample_y = sample_y.to(device)
    
    model.eval()
    with torch.no_grad():
        prediction = model(sample_X, description, horizon, seq_len)
        predicted_values = prediction[:, -horizon:, 0].cpu().numpy()
        true_values = sample_y.cpu().numpy()
    
    plt.figure(figsize=(12, 6))
    plt.plot(range(horizon), true_values, label="真实值", marker='o')
    plt.plot(range(horizon), predicted_values, label="预测值", marker='x')
    plt.title(title)
    plt.xlabel("预测步数")
    plt.ylabel("OT 值")
    plt.legend()
    plt.show()

# 可视化 Time-LLM 和 PatchTST 的预测结果
plot_predictions(model_llm, test_dataset, "Time-LLM 预测结果")
plot_predictions(model_patchtst, test_dataset, "PatchTST 预测结果")

In [None]:
# 保存模型
torch.save(model_llm.state_dict(), 'time_llm_model.pth')
torch.save(model_patchtst.state_dict(), 'patchtst_model.pth')
print("模型已保存。")

In [None]:
exit(0) #下面是原本的代码

In [None]:
# # 定义预测层
# prediction_layer = torch.nn.Linear(d_model, 1).to(device)  # 单变量预测

# # 定义完整模型
# class TimeSeriesPredictor(nn.Module):
#     def __init__(self, time_llm, prediction_layer):
#         super(TimeSeriesPredictor, self).__init__()
#         self.time_llm = time_llm
#         self.prediction_layer = prediction_layer
    
#     def forward(self, time_series_data, description, pred_len, seq_len):
#         multi_modal_embedding, _ = self.time_llm(time_series_data, description, pred_len, seq_len)
#         predictions = self.prediction_layer(multi_modal_embedding)
#         return predictions
    
# # 将模型和预测层移动到设备
# device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# time_llm.to(device)
# prediction_layer.to(device)

# # 实例化完整模型
# model = TimeSeriesPredictor(time_llm, prediction_layer).to(device)
# model.train()

In [None]:
from torch.utils.data import Dataset, DataLoader

class TimeSeriesDataset(Dataset):
    def __init__(self, X, y):
        self.X = torch.tensor(X, dtype=torch.float32)
        self.y = torch.tensor(y, dtype=torch.float32)
    
    def __len__(self):
        return len(self.X)
    
    def __getitem__(self, idx):
        return self.X[idx], self.y[idx]

# 创建数据集
train_dataset = TimeSeriesDataset(X_train, y_train)
test_dataset = TimeSeriesDataset(X_test, y_test)

# 创建数据加载器
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

In [None]:
# 训练代码
num_epochs = 10  # 根据需要调整轮数

# 定义损失函数和优化器
criterion = torch.nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)

for epoch in range(num_epochs):
    model.train()
    epoch_loss = 0
    for batch_X, batch_y in train_loader:
        batch_X = batch_X.to(device)  # [B, L, D]
        batch_y = batch_y.to(device)  # [B, horizon]
        
        optimizer.zero_grad()
        
        # 前向传播
        predictions = model(batch_X, description, horizon, seq_len)  # [B, L, 1]
        
        # 提取预测的最后 pred_len 步
        predicted_values = predictions[:, -horizon:, 0]  # [B, horizon]
        
        # 计算损失
        loss = criterion(predicted_values, batch_y)
        
        # 反向传播和优化
        loss.backward()
        optimizer.step()
        
        epoch_loss += loss.item()
    
    avg_loss = epoch_loss / len(train_loader)
    print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {avg_loss:.4f}")

In [None]:
# 模型评估
model.eval()
test_loss = 0
with torch.no_grad():
    for batch_X, batch_y in test_loader:
        batch_X = batch_X.to(device)
        batch_y = batch_y.to(device)
        
        predictions = model(batch_X, description, horizon, seq_len)  # [B, L, 1]
        predicted_values = predictions[:, -horizon:, 0]  # [B, horizon]
        
        loss = criterion(predicted_values, batch_y)
        test_loss += loss.item()

avg_test_loss = test_loss / len(test_loader)
print(f"测试集平均损失: {avg_test_loss:.4f}")

In [None]:
# 预测结果可视化

# 选择一个测试样本
sample_X, sample_y = test_dataset[0]
sample_X = sample_X.unsqueeze(0).to(device)  # [1, 300, 7]
sample_y = sample_y.to(device)  # [horizon]

# 进行预测
model.eval()
with torch.no_grad():
    prediction = model(sample_X, description, horizon, seq_len)  # [1, 300, 1]
    predicted_values = prediction[:, -horizon:, 0].squeeze(0).cpu().numpy()  # [horizon]
    true_values = sample_y.cpu().numpy()  # [horizon]

# 绘制预测结果与真实值对比
plt.figure(figsize=(12, 6))
plt.plot(range(horizon), true_values, label="真实值", marker='o')
plt.plot(range(horizon), predicted_values, label="预测值", marker='x')
plt.title("预测结果与真实值对比")
plt.xlabel("预测步数")
plt.ylabel("OT 值")
plt.legend()
plt.show()

In [None]:
# 保存模型
torch.save(model.state_dict(), 'time_series_predictor.pth')
print("模型已保存。")

# 加载模型（示例）
# model.load_state_dict(torch.load('time_series_predictor.pth'))
# model.eval()