# Informer Autoformer DLinear Scripts

The aim of this code is to run the experiment for Informer Autoformer and DLinear. The execution happens by calling run.py with the necessary arguments. Each model is run 2 times to ensure reliability of the results. The data is scaled and splitted automatically within the process. Then the results are processed in the following way in the below code.
1. Capture the ouput of the scripts.
2. Filter the output to the path where the results for each model is saved.
3. Open the metrics.npy under this path and extract the mae and mse for each of the two model repetitions and calculate the overall mean. The final relevant mean mae and mse results are under the file transformer_results_averaged_over_iterations.txt.
4. Informer
5. Autoformer
6. DLinear

In [5]:
import numpy as np
import os
import pandas as pd
import re
from statistics import mean
from statistics import stdev
import subprocess

In [6]:
parent_directory = os.path.abspath(os.path.join(os.getcwd(), os.pardir))
script_path = os.path.join(parent_directory, "run.py")

## 1. Capture the output of the script

In [7]:
def run_and_capture_script_output(script_path, script_arguments):
    try:
        # Execute the script and capture the output
        command = ["python", "-u", script_path] + script_arguments
        output = subprocess.check_output(command, universal_newlines=True)
    except subprocess.CalledProcessError as e:
        output = e.output  # If there's an error, capture the error message

    return output

In [8]:
def extract_settings_prints(output_lines):
    prefix = "testing : long_term_forecast_"  # Hardcoded prefix to extract
    return [line.strip() for line in output_lines if prefix in line]

## 2. Extract MAE and MSE

In [9]:
def pattern_matching(setting_prints):

    # Now there are the prints related to the "testing : long_term_forecast_"
    for i in range (len(settings_prints)):
        input_string = settings_prints[i]
        # Define a regular expression pattern to match the desired part
        pattern = r"testing : (long_term_forecast_[^<]+)<<<<<<<<<<<<<<<<<<<<<<<<<<<<<"

        # Use re.search to find the match
        match = re.search(pattern, input_string)
        # Extract the matched part
        if match:
            extracted_part = match.group(1)
            settings_prints[i] = extracted_part
            print("Extracted Part:", settings_prints[i])
        else:
            print("No match found.")
            print("Setting Print:", setting_print)
    return settings_prints

## 3. Average MAE and MSE

In [10]:
def summarize_results(matched_patterns):
    mse=[]
    mae=[]
    for i in range(len(matched_patterns)):
        metrics = np.load('./results/'+matched_patterns[i]+'/metrics.npy')
        mse.append(metrics[1])
        mae.append(metrics[0])
    np_mse=np.array(mse)
    np_mae=np.array(mae)
    
    std_mse=np.std(np_mse)
    std_mae=np.std(np_mae)
    
    overall_mse=mean(mse)
    overall_mae=mean(mae)
    # Create a text file if it doesn't exist or open it for writing
    file_path = "transformer_results_averaged_over_iterations.txt"  
    with open(file_path, "a") as file:
        file.write("Settings Prints: {}\n".format(settings_prints[0]))
        file.write("Overall MSE: {:.6f}\n".format(overall_mse))
        file.write("Overall MAE: {:.6f}\n".format(overall_mae))
        file.write("Std. dev. MSE: {:.6f}\n".format(std_mse))
        file.write("Std. dev. MSE: {:.6f}\n".format(std_mae))
    print("Results saved to:", file_path)

## 4. Autoformer

In [None]:
# This calls the run.py with the relevant parameters for the multivariate setting.
# List of data paths and prediction lengths for the grid search
data_paths = ["df_all_columns.csv", "df_most_important_columns.csv", "df_only_generation_columns.csv"]
prediction_lengths = ["24", "48", "96", "192"]
for data_path in data_paths:
    full_data_path = "../../../01_datasets/" + data_path
    df = pd.read_csv(full_data_path)
    num_columns = len(df.columns)
    for pred_len in prediction_lengths:
        # Define the script arguments as a list
        model_id = f"_{pred_len}_{data_path.replace('.csv', '')}"  # Create the model_id
        script_arguments = [
            "--task_name", "long_term_forecast",
            "--is_training", "1", #True
            "--root_path", "../../../01_datasets/",
            "--data_path", data_path,
            "--model_id", model_id,
            "--model", "Autoformer",
            "--data", "custom", # This ensures a 70%,10%,20% train,val,test split see data_provider/data_loader.py
            "--features", "M", # Multivariate
            "--seq_len", "96",
            "--label_len", "48",
            "--pred_len", pred_len,
            "--e_layers", "2", # Hyperparameters as in original model
            "--d_layers", "5",
            "--factor", "5",
            "--enc_in", str((num_columns)-1),
            "--dec_in", str((num_columns)-1),
            "--c_out", str((num_columns)-1),
            "--des", "Exp",
            "--itr", "2"
        ]

        script_output = run_and_capture_script_output(script_path, script_arguments)

        print("Captured Output:")
        print(script_output)

        # Extract and save the prints related to the settings variable
        settings_prints = extract_settings_prints(script_output.splitlines())

        matched_patterns = pattern_matching(settings_prints)

        summarize_results(matched_patterns)


In [None]:
# This calls the run.py with the relevant parameters for the univariate setting.
# List of columns and prediction lengths for the grid search
columns = ["DE_load_actual_entsoe_transparency", "DE_solar_generation_actual", "DE_wind_generation_actual"]
prediction_lengths = ["24", "48", "96", "192"]
for column in columns:
    for pred_len in prediction_lengths:
        # Define the script arguments as a list
        model_id = f"_{pred_len}_{column}"  # Create the model_id
        script_arguments = [
            "--task_name", "long_term_forecast",
            "--is_training", "1",
            "--root_path", "../../../01_datasets/",
            "--data_path", "df_most_important_columns.csv",
            "--model_id", model_id,
            "--model", "Autoformer",
            "--data", "custom",
            "--features", "S",
            "--target", str(column),
            "--seq_len", "96",
            "--label_len", "48",
            "--pred_len", pred_len,
            "--e_layers", "2",
            "--d_layers", "5",
            "--factor", "5",
            "--enc_in", "1",
            "--dec_in", "1",
            "--c_out", "1",
            "--des", "Exp",
            "--itr", "2"
        ]

        script_output = run_and_capture_script_output(script_path, script_arguments)

        print("Captured Output:")
        print(script_output)

        # Extract and save the prints related to the settings variable
        settings_prints = extract_settings_prints(script_output.splitlines())

        matched_patterns = pattern_matching(settings_prints)

        summarize_results(matched_patterns)


## 5. Informer

In [7]:
# List of data paths and prediction lengths for the grid search
data_paths = ["df_all_columns.csv", "df_most_important_columns.csv", "df_only_generation_columns.csv"]
prediction_lengths = ["24", "48", "96", "192"]
for data_path in data_paths:
    full_data_path = "../../../01_datasets/" + data_path
    df = pd.read_csv(full_data_path)
    num_columns = len(df.columns)
    for pred_len in prediction_lengths:
        # Define the script arguments as a list
        model_id = f"_{pred_len}_{data_path.replace('.csv', '')}"  # Create the model_id
        script_arguments = [
            "--task_name", "long_term_forecast",
            "--is_training", "1",
            "--root_path", "../../../01_datasets/",
            "--data_path", data_path,
            "--model_id", model_id,
            "--model", "Informer",
            "--data", "custom",
            "--features", "M",
            "--seq_len", "96",
            "--label_len", "48",
            "--pred_len", pred_len,
            "--e_layers", "2",
            "--d_layers", "5",
            "--factor", "5",
            "--enc_in", str((num_columns)-1),
            "--dec_in", str((num_columns)-1),
            "--c_out", str((num_columns)-1),
            "--des", "Exp",
            "--itr", "2"
        ]

        script_output = run_and_capture_script_output(script_path, script_arguments)

        print("Captured Output:")
        print(script_output)

        # Extract and save the prints related to the settings variable
        settings_prints = extract_settings_prints(script_output.splitlines())

        matched_patterns = pattern_matching(settings_prints)

        summarize_results(matched_patterns)


Captured Output:
Args in experiment:
Namespace(activation='gelu', anomaly_ratio=0.25, batch_size=32, c_out=29, checkpoints='./checkpoints/', d_ff=2048, d_layers=5, d_model=512, data='custom', data_path='df_all_columns.csv', dec_in=29, des='Exp', devices='0,1,2,3', distil=True, dropout=0.1, e_layers=2, embed='timeF', enc_in=29, factor=5, features='M', freq='h', gpu=0, inverse=False, is_training=1, itr=2, label_len=48, learning_rate=0.0001, loss='MSE', lradj='type1', mask_rate=0.25, model='Informer', model_id='_24_df_all_columns', moving_avg=25, n_heads=8, num_kernels=6, num_workers=10, output_attention=False, p_hidden_dims=[128, 128], p_hidden_layers=2, patience=3, pred_len=24, root_path='../dataset/Open-Power-System/', seasonal_patterns='Monthly', seq_len=96, target='OT', task_name='long_term_forecast', top_k=5, train_epochs=10, use_amp=False, use_gpu=True, use_multi_gpu=False)
Use GPU: cuda:0
>>>>>>>start training : long_term_forecast__24_df_all_columns_Informer_custom_ftM_sl96_ll48_p

In [12]:
# List of columns and prediction lengths for the grid search
columns = ["DE_load_actual_entsoe_transparency", "DE_solar_generation_actual", "DE_wind_generation_actual"]
prediction_lengths = ["24", "48", "96", "192"]
for column in columns:
    for pred_len in prediction_lengths:
        # Define the script arguments as a list
        model_id = f"_{pred_len}_{column}"  # Create the model_id
        script_arguments = [
            "--task_name", "long_term_forecast",
            "--is_training", "1",
            "--root_path", "../../../01_datasets/",
            "--data_path", "df_most_important_columns.csv",
            "--model_id", model_id,
            "--model", "Informer",
            "--data", "custom",
            "--features", "S",
            "--target", str(column),
            "--seq_len", "96",
            "--label_len", "48",
            "--pred_len", pred_len,
            "--e_layers", "2",
            "--d_layers", "5",
            "--factor", "5",
            "--enc_in", "1",
            "--dec_in", "1",
            "--c_out", "1",
            "--des", "Exp",
            "--itr", "2"
        ]

        script_output = run_and_capture_script_output(script_path, script_arguments)

        print("Captured Output:")
        print(script_output)

        # Extract and save the prints related to the settings variable
        settings_prints = extract_settings_prints(script_output.splitlines())

        matched_patterns = pattern_matching(settings_prints)

        summarize_results(matched_patterns)


Captured Output:
Args in experiment:
Namespace(activation='gelu', anomaly_ratio=0.25, batch_size=32, c_out=1, checkpoints='./checkpoints/', d_ff=2048, d_layers=5, d_model=512, data='custom', data_path='df_most_important_columns.csv', dec_in=1, des='Exp', devices='0,1,2,3', distil=True, dropout=0.1, e_layers=2, embed='timeF', enc_in=1, factor=5, features='S', freq='h', gpu=0, inverse=False, is_training=1, itr=2, label_len=48, learning_rate=0.0001, loss='MSE', lradj='type1', mask_rate=0.25, model='Informer', model_id='_24_DE_solar_generation_actual', moving_avg=25, n_heads=8, num_kernels=6, num_workers=10, output_attention=False, p_hidden_dims=[128, 128], p_hidden_layers=2, patience=3, pred_len=24, root_path='../dataset/Open-Power-System/', seasonal_patterns='Monthly', seq_len=96, target='DE_solar_generation_actual', task_name='long_term_forecast', top_k=5, train_epochs=10, use_amp=False, use_gpu=True, use_multi_gpu=False)
Use GPU: cuda:0
>>>>>>>start training : long_term_forecast__24_DE

## 6. DLinear

In [19]:
# List of data paths and prediction lengths for the grid search
data_paths = ["df_all_columns.csv", "df_most_important_columns.csv", "df_only_generation_columns.csv"]
prediction_lengths = ["24", "48", "96", "192"]
for data_path in data_paths:
    full_data_path = "../../../01_datasets/" + data_path
    df = pd.read_csv(full_data_path)
    num_columns = len(df.columns)
    for pred_len in prediction_lengths:
        # Define the script arguments as a list
        model_id = f"_{pred_len}_{data_path.replace('.csv', '')}"  # Create the model_id
        script_arguments = [
            "--task_name", "long_term_forecast",
            "--is_training", "1",
            "--root_path", "../../../01_datasets/",
            "--data_path", data_path,
            "--model_id", model_id,
            "--model", "DLinear",
            "--data", "custom",
            "--features", "M",
            "--seq_len", "96",
            "--label_len", "48",
            "--pred_len", pred_len,
            "--e_layers", "2",
            "--d_layers", "5",
            "--factor", "5",
            "--enc_in", str((num_columns)-1),
            "--dec_in", str((num_columns)-1),
            "--c_out", str((num_columns)-1),
            "--des", "Exp",
            "--itr", "2"
        ]

        script_output = run_and_capture_script_output(script_path, script_arguments)

        # Print the captured output
        print("Captured Output:")
        print(script_output)

        # Extract and save the prints related to the settings variable
        settings_prints = extract_settings_prints(script_output.splitlines())

        matched_patterns = pattern_matching(settings_prints)

        summarize_results(matched_patterns)


Captured Output:
Args in experiment:
Namespace(activation='gelu', anomaly_ratio=0.25, batch_size=32, c_out=29, checkpoints='./checkpoints/', d_ff=2048, d_layers=5, d_model=512, data='custom', data_path='df_all_columns.csv', dec_in=29, des='Exp', devices='0,1,2,3', distil=True, dropout=0.1, e_layers=2, embed='timeF', enc_in=29, factor=5, features='M', freq='h', gpu=0, inverse=False, is_training=1, itr=2, label_len=48, learning_rate=0.0001, loss='MSE', lradj='type1', mask_rate=0.25, model='DLinear', model_id='_24_df_all_columns', moving_avg=25, n_heads=8, num_kernels=6, num_workers=10, output_attention=False, p_hidden_dims=[128, 128], p_hidden_layers=2, patience=3, pred_len=24, root_path='../dataset/Open-Power-System/', seasonal_patterns='Monthly', seq_len=96, target='OT', task_name='long_term_forecast', top_k=5, train_epochs=10, use_amp=False, use_gpu=True, use_multi_gpu=False)
Use GPU: cuda:0
>>>>>>>start training : long_term_forecast__24_df_all_columns_DLinear_custom_ftM_sl96_ll48_pl2

In [None]:
# List of columns and prediction lengths for the grid search
columns = ["DE_load_actual_entsoe_transparency", "DE_solar_generation_actual", "DE_wind_generation_actual"]
prediction_lengths = ["24", "48", "96", "192"]
for column in columns:
    for pred_len in prediction_lengths:
        # Define the script arguments as a list
        model_id = f"_{pred_len}_{column}"  # Create the model_id
        script_arguments = [
            "--task_name", "long_term_forecast",
            "--is_training", "1",
            "--root_path", "../../../01_datasets/",
            "--data_path", "df_most_important_columns.csv",
            "--model_id", model_id,
            "--model", "DLinear",
            "--data", "custom",
            "--features", "S",
            "--target", str(column),
            "--seq_len", "96",
            "--label_len", "48",
            "--pred_len", pred_len,
            "--e_layers", "2",
            "--d_layers", "5",
            "--factor", "5",
            "--enc_in", "1",
            "--dec_in", "1",
            "--c_out", "1",
            "--des", "Exp",
            "--itr", "2"
        ]

        script_output = run_and_capture_script_output(script_path, script_arguments)

        print("Captured Output:")
        print(script_output)

        # Extract and save the prints related to the settings variable
        settings_prints = extract_settings_prints(script_output.splitlines())

        matched_patterns = pattern_matching(settings_prints)

        summarize_results(matched_patterns)


Captured Output:
Args in experiment:
Namespace(activation='gelu', anomaly_ratio=0.25, batch_size=32, c_out=1, checkpoints='./checkpoints/', d_ff=2048, d_layers=5, d_model=512, data='custom', data_path='df_most_important_columns.csv', dec_in=1, des='Exp', devices='0,1,2,3', distil=True, dropout=0.1, e_layers=2, embed='timeF', enc_in=1, factor=5, features='S', freq='h', gpu=0, inverse=False, is_training=1, itr=2, label_len=48, learning_rate=0.0001, loss='MSE', lradj='type1', mask_rate=0.25, model='DLinear', model_id='_24_DE_load_actual_entsoe_transparency', moving_avg=25, n_heads=8, num_kernels=6, num_workers=10, output_attention=False, p_hidden_dims=[128, 128], p_hidden_layers=2, patience=3, pred_len=24, root_path='../dataset/Open-Power-System/', seasonal_patterns='Monthly', seq_len=96, target='DE_load_actual_entsoe_transparency', task_name='long_term_forecast', top_k=5, train_epochs=10, use_amp=False, use_gpu=True, use_multi_gpu=False)
Use GPU: cuda:0
>>>>>>>start training : long_term_