In [73]:
import csv
import os

def csv2dict(file):
    dicts = []
    with open(file, mode='r') as f:
        csv_reader = csv.DictReader(f)
        for row in csv_reader:
            new_dict = {}
            for key, value in row.items():
                try:
                    new_dict[key] = float(value)
                except ValueError:
                    new_dict[key] = value
            dicts.append(new_dict)
    return dicts

def load_training_data(features_location, performance_location):
    feature_dicts = csv2dict(features_location)
    performance_matrix = csv2dict(performance_location)
    algorithms = [list(algorithm.keys()) for algorithm in performance_matrix]
    return feature_dicts, performance_matrix, algorithms

def file_name(file_dir):
    for root, dirs, files in os.walk(file_dir):
        return files

features_locations = file_name("data/feature_extraction")
performance_locations = file_name("data/performance")

features_locations.sort()
performance_locations.sort()

X = []
y = []

for i in range (len(features_locations)):
    feature_dicts, performance_matrix, algorithms = load_training_data("data/feature_extraction/"+features_locations[i], "data/performance/"+performance_locations[i])
    
    temp_X = list(feature_dicts[0].values())
    temp_X_clean = [0.0 if val == '' else val for val in temp_X]
    X.append(temp_X_clean)
    
    temp_y = list(performance_matrix[0].values())
    y.append(temp_y)

In [74]:
from keras.models import Model
from keras.layers import Input, Dense
from keras import regularizers
import numpy as np

In [75]:
X = np.array(X)
y = np.array(y)

from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

input_dim = X.shape[1]
encoding_dim = 100

input_layer = Input(shape=(input_dim,))
# encoded = Dense(encoding_dim, activation='relu', activity_regularizer=regularizers.l1(1e-5))(input_layer)
encoded = Dense(encoding_dim, activation='relu')(input_layer)
decoded = Dense(input_dim, activation='sigmoid')(encoded)

autoencoder = Model(inputs=input_layer, outputs=decoded)

encoder = Model(inputs=input_layer, outputs=encoded)

autoencoder.compile(optimizer='adam', loss='mean_squared_error')

autoencoder.fit(X, X, epochs=100, batch_size=256, shuffle=True, validation_split=0.2)

X_train_encoded = encoder.predict(X)

Epoch 1/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 308ms/step - loss: 951928949462045556736.0000 - val_loss: 2880785485492740161536.0000
Epoch 2/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 21ms/step - loss: 951928949462045556736.0000 - val_loss: 2880785485492740161536.0000
Epoch 3/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 20ms/step - loss: 951929019830789734400.0000 - val_loss: 2880785485492740161536.0000
Epoch 4/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 20ms/step - loss: 951929019830789734400.0000 - val_loss: 2880785485492740161536.0000
Epoch 5/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 20ms/step - loss: 951928949462045556736.0000 - val_loss: 2880785485492740161536.0000
Epoch 6/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 51ms/step - loss: 951928879093301379072.0000 - val_loss: 2880785485492740161536.0000
Epoch 7/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━

In [77]:
np.savetxt('X_selected_autoencoder-deep-learning.csv', X_train_encoded, delimiter=',', fmt='%f')

In [78]:
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error

rf = RandomForestRegressor(n_estimators=100, random_state=42)
rf.fit(X_train_encoded, y)

In [79]:
def prediction(rf, new_features_location, algorithms):
    new_feature_dicts = csv2dict(new_features_location)
    new_X = [list(new_feature_dict.values()) for new_feature_dict in new_feature_dicts]
    new_X_clean = [[0.0 if val == '' else val for val in row] for row in new_X]
    new_X_clean = np.array(new_X_clean)
    X_test_encoded = encoder.predict(new_X_clean)
    
    predicted_performance = rf.predict(X_test_encoded)
    even_items = predicted_performance[:, ::2]
    odd_items = predicted_performance[:, 1::2]
    
    # Find the best algorithm for MSE and MAE
    best_algorithm_mse = np.argmin(even_items)
    best_algorithm_mae = np.argmin(odd_items)
    
    # Find the second best algorithm for MSE and MAE
    temp_mse = np.copy(even_items)
    temp_mae = np.copy(odd_items)
    temp_mse[0][best_algorithm_mse] = np.inf
    temp_mae[0][best_algorithm_mae] = np.inf
    second_best_algorithm_mse = np.argmin(temp_mse)
    second_best_algorithm_mae = np.argmin(temp_mae)
    
    print('Predicted performance:', predicted_performance)
    algorithm = [[], []]
    algorithm[0] = algorithms[0][::2]
    algorithm[1] = algorithms[0][1::2]

    print("Predicted MSEs:", even_items)
    print("Best algorithm for MSE:", algorithm[0][best_algorithm_mse])
    print("Second best algorithm for MSE:", algorithm[0][second_best_algorithm_mse])
    print("Predicted MAEs:", odd_items)
    print("Best algorithm for MAE:", algorithm[1][best_algorithm_mae])
    print("Second best algorithm for MAE:", algorithm[1][second_best_algorithm_mae])
    
    return

### For the `etth1` dataset:

In [80]:
print(prediction(rf, "data/feature_extraction/etth1_96_features.csv", algorithms))

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 15ms/step
Predicted performance: [[0.4153  0.41535 0.39156 0.40628 0.42137 0.43005 0.42341 0.43166 0.40896
  0.43083 0.49788 0.50026 0.42003 0.43185 0.46686 0.46465 0.47325 0.47291
  0.43035 0.44391]]
Predicted MSEs: [[0.4153  0.39156 0.42137 0.42341 0.40896 0.49788 0.42003 0.46686 0.47325
  0.43035]]
Best algorithm for MSE:  GPHT_MSE
Second best algorithm for MSE:  SimMTM_MSE
Predicted MAEs: [[0.41535 0.40628 0.43005 0.43166 0.43083 0.50026 0.43185 0.46465 0.47291
  0.44391]]
Best algorithm for MAE:  GPHT_MAE
Second best algorithm for MAE:  GPHT'_MAE
None


In [81]:
print(prediction(rf, "data/feature_extraction/etth1_192_features.csv", algorithms))

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step
Predicted performance: [[0.4153  0.41535 0.39156 0.40628 0.42137 0.43005 0.42341 0.43166 0.40896
  0.43083 0.49788 0.50026 0.42003 0.43185 0.46686 0.46465 0.47325 0.47291
  0.43035 0.44391]]
Predicted MSEs: [[0.4153  0.39156 0.42137 0.42341 0.40896 0.49788 0.42003 0.46686 0.47325
  0.43035]]
Best algorithm for MSE:  GPHT_MSE
Second best algorithm for MSE:  SimMTM_MSE
Predicted MAEs: [[0.41535 0.40628 0.43005 0.43166 0.43083 0.50026 0.43185 0.46465 0.47291
  0.44391]]
Best algorithm for MAE:  GPHT_MAE
Second best algorithm for MAE:  GPHT'_MAE
None


In [82]:
print(prediction(rf, "data/feature_extraction/etth1_336_features.csv", algorithms))

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 12ms/step
Predicted performance: [[0.4153  0.41535 0.39156 0.40628 0.42137 0.43005 0.42341 0.43166 0.40896
  0.43083 0.49788 0.50026 0.42003 0.43185 0.46686 0.46465 0.47325 0.47291
  0.43035 0.44391]]
Predicted MSEs: [[0.4153  0.39156 0.42137 0.42341 0.40896 0.49788 0.42003 0.46686 0.47325
  0.43035]]
Best algorithm for MSE:  GPHT_MSE
Second best algorithm for MSE:  SimMTM_MSE
Predicted MAEs: [[0.41535 0.40628 0.43005 0.43166 0.43083 0.50026 0.43185 0.46465 0.47291
  0.44391]]
Best algorithm for MAE:  GPHT_MAE
Second best algorithm for MAE:  GPHT'_MAE
None


In [83]:
print(prediction(rf, "data/feature_extraction/etth1_720_features.csv", algorithms))

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step
Predicted performance: [[0.43004 0.42488 0.4055  0.4151  0.4363  0.44034 0.43508 0.43816 0.41976
  0.43772 0.51644 0.51302 0.43202 0.43922 0.48258 0.4751  0.4943  0.48682
  0.44802 0.45758]]
Predicted MSEs: [[0.43004 0.4055  0.4363  0.43508 0.41976 0.51644 0.43202 0.48258 0.4943
  0.44802]]
Best algorithm for MSE:  GPHT_MSE
Second best algorithm for MSE:  SimMTM_MSE
Predicted MAEs: [[0.42488 0.4151  0.44034 0.43816 0.43772 0.51302 0.43922 0.4751  0.48682
  0.45758]]
Best algorithm for MAE:  GPHT_MAE
Second best algorithm for MAE:  GPHT'_MAE
None


### For the `etth2` dataset:

In [84]:
print(prediction(rf, "data/feature_extraction/etth2_96_features.csv", algorithms))

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step
Predicted performance: [[0.39222661 0.40903446 0.38477961 0.40526989 0.39465989 0.41881282
  0.38392536 0.41730564 0.38461571 0.40990132 0.42875561 0.44470425
  0.37782704 0.41123368 0.41226532 0.43159757 0.41430054 0.44420796
  0.51142639 0.4906035 ]]
Predicted MSEs: [[0.39222661 0.38477961 0.39465989 0.38392536 0.38461571 0.42875561
  0.37782704 0.41226532 0.41430054 0.51142639]]
Best algorithm for MSE:  Supervised_PatchTST_MSE
Second best algorithm for MSE:  FPT_MSE
Predicted MAEs: [[0.40903446 0.40526989 0.41881282 0.41730564 0.40990132 0.44470425
  0.41123368 0.43159757 0.44420796 0.4906035 ]]
Best algorithm for MAE:  GPHT_MAE
Second best algorithm for MAE:  GPHT'_MAE
None


In [85]:
print(prediction(rf, "data/feature_extraction/etth2_192_features.csv", algorithms))

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step
Predicted performance: [[0.39222661 0.40903446 0.38477961 0.40526989 0.39465989 0.41881282
  0.38392536 0.41730564 0.38461571 0.40990132 0.42875561 0.44470425
  0.37782704 0.41123368 0.41226532 0.43159757 0.41430054 0.44420796
  0.51142639 0.4906035 ]]
Predicted MSEs: [[0.39222661 0.38477961 0.39465989 0.38392536 0.38461571 0.42875561
  0.37782704 0.41226532 0.41430054 0.51142639]]
Best algorithm for MSE:  Supervised_PatchTST_MSE
Second best algorithm for MSE:  FPT_MSE
Predicted MAEs: [[0.40903446 0.40526989 0.41881282 0.41730564 0.40990132 0.44470425
  0.41123368 0.43159757 0.44420796 0.4906035 ]]
Best algorithm for MAE:  GPHT_MAE
Second best algorithm for MAE:  GPHT'_MAE
None


In [86]:
print(prediction(rf, "data/feature_extraction/etth2_336_features.csv", algorithms))

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step
Predicted performance: [[0.39222661 0.40903446 0.38477961 0.40526989 0.39465989 0.41881282
  0.38392536 0.41730564 0.38461571 0.40990132 0.42875561 0.44470425
  0.37782704 0.41123368 0.41226532 0.43159757 0.41430054 0.44420796
  0.51142639 0.4906035 ]]
Predicted MSEs: [[0.39222661 0.38477961 0.39465989 0.38392536 0.38461571 0.42875561
  0.37782704 0.41226532 0.41430054 0.51142639]]
Best algorithm for MSE:  Supervised_PatchTST_MSE
Second best algorithm for MSE:  FPT_MSE
Predicted MAEs: [[0.40903446 0.40526989 0.41881282 0.41730564 0.40990132 0.44470425
  0.41123368 0.43159757 0.44420796 0.4906035 ]]
Best algorithm for MAE:  GPHT_MAE
Second best algorithm for MAE:  GPHT'_MAE
None


In [87]:
print(prediction(rf, "data/feature_extraction/etth2_720_features.csv", algorithms))

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step
Predicted performance: [[0.39222661 0.40903446 0.38477961 0.40526989 0.39465989 0.41881282
  0.38392536 0.41730564 0.38461571 0.40990132 0.42875561 0.44470425
  0.37782704 0.41123368 0.41226532 0.43159757 0.41430054 0.44420796
  0.51142639 0.4906035 ]]
Predicted MSEs: [[0.39222661 0.38477961 0.39465989 0.38392536 0.38461571 0.42875561
  0.37782704 0.41226532 0.41430054 0.51142639]]
Best algorithm for MSE:  Supervised_PatchTST_MSE
Second best algorithm for MSE:  FPT_MSE
Predicted MAEs: [[0.40903446 0.40526989 0.41881282 0.41730564 0.40990132 0.44470425
  0.41123368 0.43159757 0.44420796 0.4906035 ]]
Best algorithm for MAE:  GPHT_MAE
Second best algorithm for MAE:  GPHT'_MAE
None


### For the `ettm1` dataset:

In [88]:
print(prediction(rf, "data/feature_extraction/ettm1_96_features.csv", algorithms))

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step
Predicted performance: [[0.37144994 0.38785805 0.36053342 0.38092152 0.33598815 0.37707414
  0.34736517 0.38076255 0.35157873 0.3841633  0.37124673 0.39408269
  0.35815725 0.38558174 0.36456244 0.39355849 0.43453439 0.42760983
  0.35713304 0.38037338]]
Predicted MSEs: [[0.37144994 0.36053342 0.33598815 0.34736517 0.35157873 0.37124673
  0.35815725 0.36456244 0.43453439 0.35713304]]
Best algorithm for MSE:  Self-supervised_PatchTST_MSE
Second best algorithm for MSE:  FPT_MSE
Predicted MAEs: [[0.38785805 0.38092152 0.37707414 0.38076255 0.3841633  0.39408269
  0.38558174 0.39355849 0.42760983 0.38037338]]
Best algorithm for MAE:  Self-supervised_PatchTST_MAE
Second best algorithm for MAE:  DLinear_MAE
None


In [89]:
print(prediction(rf, "data/feature_extraction/ettm1_192_features.csv", algorithms))

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step
Predicted performance: [[0.37144994 0.38785805 0.36053342 0.38092152 0.33598815 0.37707414
  0.34736517 0.38076255 0.35157873 0.3841633  0.37124673 0.39408269
  0.35815725 0.38558174 0.36456244 0.39355849 0.43453439 0.42760983
  0.35713304 0.38037338]]
Predicted MSEs: [[0.37144994 0.36053342 0.33598815 0.34736517 0.35157873 0.37124673
  0.35815725 0.36456244 0.43453439 0.35713304]]
Best algorithm for MSE:  Self-supervised_PatchTST_MSE
Second best algorithm for MSE:  FPT_MSE
Predicted MAEs: [[0.38785805 0.38092152 0.37707414 0.38076255 0.3841633  0.39408269
  0.38558174 0.39355849 0.42760983 0.38037338]]
Best algorithm for MAE:  Self-supervised_PatchTST_MAE
Second best algorithm for MAE:  DLinear_MAE
None


In [90]:
print(prediction(rf, "data/feature_extraction/ettm1_336_features.csv", algorithms))

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 12ms/step
Predicted performance: [[0.37144994 0.38785805 0.36053342 0.38092152 0.33598815 0.37707414
  0.34736517 0.38076255 0.35157873 0.3841633  0.37124673 0.39408269
  0.35815725 0.38558174 0.36456244 0.39355849 0.43453439 0.42760983
  0.35713304 0.38037338]]
Predicted MSEs: [[0.37144994 0.36053342 0.33598815 0.34736517 0.35157873 0.37124673
  0.35815725 0.36456244 0.43453439 0.35713304]]
Best algorithm for MSE:  Self-supervised_PatchTST_MSE
Second best algorithm for MSE:  FPT_MSE
Predicted MAEs: [[0.38785805 0.38092152 0.37707414 0.38076255 0.3841633  0.39408269
  0.38558174 0.39355849 0.42760983 0.38037338]]
Best algorithm for MAE:  Self-supervised_PatchTST_MAE
Second best algorithm for MAE:  DLinear_MAE
None


In [91]:
print(prediction(rf, "data/feature_extraction/ettm1_720_features.csv", algorithms))

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step
Predicted performance: [[0.37144994 0.38785805 0.36053342 0.38092152 0.33598815 0.37707414
  0.34736517 0.38076255 0.35157873 0.3841633  0.37124673 0.39408269
  0.35815725 0.38558174 0.36456244 0.39355849 0.43453439 0.42760983
  0.35713304 0.38037338]]
Predicted MSEs: [[0.37144994 0.36053342 0.33598815 0.34736517 0.35157873 0.37124673
  0.35815725 0.36456244 0.43453439 0.35713304]]
Best algorithm for MSE:  Self-supervised_PatchTST_MSE
Second best algorithm for MSE:  FPT_MSE
Predicted MAEs: [[0.38785805 0.38092152 0.37707414 0.38076255 0.3841633  0.39408269
  0.38558174 0.39355849 0.42760983 0.38037338]]
Best algorithm for MAE:  Self-supervised_PatchTST_MAE
Second best algorithm for MAE:  DLinear_MAE
None


### For the `ettm2` dataset:

In [92]:
print(prediction(rf, "data/feature_extraction/ettm2_96_features.csv", algorithms))

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step
Predicted performance: [[0.24511167 0.29934107 0.23287613 0.29234107 0.23742077 0.30500831
  0.23464996 0.30516565 0.2326606  0.30103075 0.24848008 0.3145892
  0.23954585 0.30789646 0.24235104 0.31105517 0.24980731 0.31480533
  0.23886008 0.31469673]]
Predicted MSEs: [[0.24511167 0.23287613 0.23742077 0.23464996 0.2326606  0.24848008
  0.23954585 0.24235104 0.24980731 0.23886008]]
Best algorithm for MSE:  SimMTM_MSE
Second best algorithm for MSE:  GPHT_MSE
Predicted MAEs: [[0.29934107 0.29234107 0.30500831 0.30516565 0.30103075 0.3145892
  0.30789646 0.31105517 0.31480533 0.31469673]]
Best algorithm for MAE:  GPHT_MAE
Second best algorithm for MAE:  GPHT'_MAE
None


In [93]:
print(prediction(rf, "data/feature_extraction/ettm2_192_features.csv", algorithms))

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step
Predicted performance: [[0.24511167 0.29934107 0.23287613 0.29234107 0.23742077 0.30500831
  0.23464996 0.30516565 0.2326606  0.30103075 0.24848008 0.3145892
  0.23954585 0.30789646 0.24235104 0.31105517 0.24980731 0.31480533
  0.23886008 0.31469673]]
Predicted MSEs: [[0.24511167 0.23287613 0.23742077 0.23464996 0.2326606  0.24848008
  0.23954585 0.24235104 0.24980731 0.23886008]]
Best algorithm for MSE:  SimMTM_MSE
Second best algorithm for MSE:  GPHT_MSE
Predicted MAEs: [[0.29934107 0.29234107 0.30500831 0.30516565 0.30103075 0.3145892
  0.30789646 0.31105517 0.31480533 0.31469673]]
Best algorithm for MAE:  GPHT_MAE
Second best algorithm for MAE:  GPHT'_MAE
None


In [94]:
print(prediction(rf, "data/feature_extraction/ettm2_336_features.csv", algorithms))

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step
Predicted performance: [[0.24511167 0.29934107 0.23287613 0.29234107 0.23742077 0.30500831
  0.23464996 0.30516565 0.2326606  0.30103075 0.24848008 0.3145892
  0.23954585 0.30789646 0.24235104 0.31105517 0.24980731 0.31480533
  0.23886008 0.31469673]]
Predicted MSEs: [[0.24511167 0.23287613 0.23742077 0.23464996 0.2326606  0.24848008
  0.23954585 0.24235104 0.24980731 0.23886008]]
Best algorithm for MSE:  SimMTM_MSE
Second best algorithm for MSE:  GPHT_MSE
Predicted MAEs: [[0.29934107 0.29234107 0.30500831 0.30516565 0.30103075 0.3145892
  0.30789646 0.31105517 0.31480533 0.31469673]]
Best algorithm for MAE:  GPHT_MAE
Second best algorithm for MAE:  GPHT'_MAE
None


In [95]:
print(prediction(rf, "data/feature_extraction/ettm2_720_features.csv", algorithms))

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step
Predicted performance: [[0.24511167 0.29934107 0.23287613 0.29234107 0.23742077 0.30500831
  0.23464996 0.30516565 0.2326606  0.30103075 0.24848008 0.3145892
  0.23954585 0.30789646 0.24235104 0.31105517 0.24980731 0.31480533
  0.23886008 0.31469673]]
Predicted MSEs: [[0.24511167 0.23287613 0.23742077 0.23464996 0.2326606  0.24848008
  0.23954585 0.24235104 0.24980731 0.23886008]]
Best algorithm for MSE:  SimMTM_MSE
Second best algorithm for MSE:  GPHT_MSE
Predicted MAEs: [[0.29934107 0.29234107 0.30500831 0.30516565 0.30103075 0.3145892
  0.30789646 0.31105517 0.31480533 0.31469673]]
Best algorithm for MAE:  GPHT_MAE
Second best algorithm for MAE:  GPHT'_MAE
None
