In [9]:
import numpy as np
from tpot import TPOTRegressor
from sklearn.model_selection import train_test_split
from datasets.get_dataset import get_dataset

In [10]:
from math import sqrt
from sklearn.metrics import make_scorer, mean_squared_error

from tpot_evaluation import create_dataset


dataset = get_dataset("electricity", size=1000)


def rmse(y_true, y_pred):
    return sqrt(mean_squared_error(y_true, y_pred))

rmse_scorer = make_scorer(rmse, greater_is_better=False)

X, y = create_dataset(dataset, n_historical=100, n_steps_ahead=2)
train_X, test_X, train_y, test_y = train_test_split(
    X, y, test_size=0.1, random_state=0
)  # WARNING: to make tpot work for multivariate targets, you need to apply a fix to tpot.base package yourself, described here: https://github.com/EpistasisLab/tpot/pull/903


# Initialize TPOT regressor
tpot = TPOTRegressor(
    scoring=rmse_scorer, generations=20, population_size=10, verbosity=2, n_jobs=20
)

# Fit TPOT to the data
tpot.fit(train_X, train_y)

# Export the best model
tpot.export("best_tpot_model.py")

                                                                             
Generation 1 - Current best internal CV score: -1.4726008522003422
                                                                             
Generation 2 - Current best internal CV score: -1.4518251167840244
                                                                             
Generation 3 - Current best internal CV score: -1.4518251167840244
                                                                             
Generation 4 - Current best internal CV score: -1.4450470591117015
                                                                             
Generation 5 - Current best internal CV score: -1.4450470591117015
                                                                             
Generation 6 - Current best internal CV score: -1.4450470591117015
                                                                             
Generation 7 - Current best internal CV score: -1.43

In [11]:
predictions = tpot.predict(test_X)
rmse = sqrt(mean_squared_error(test_y, predictions))
print(f"Test RMSE: {rmse}")

Test RMSE: 1.3495065618222601


In [12]:
# 1/2 runs per dataset
# run tsot and tpot
# extract best algorithm setups
# evaluate both in tsot evaluation setup