In [1]:
import numpy as np
import pandas as pd
from tpot import TPOTRegressor
from ucimlrepo import fetch_ucirepo 
from sklearn.model_selection import train_test_split

electrical_grid_stability_simulated_data = fetch_ucirepo(id=471).data

features = electrical_grid_stability_simulated_data.features 
targets = electrical_grid_stability_simulated_data.targets

data = pd.merge(features, targets, left_index=True, right_index=True, how='outer')
data.drop(columns=['stabf', 'p1', 'p2', 'p3', 'p4'], inplace=True)

In [2]:
data.head()

Unnamed: 0,tau1,tau2,tau3,tau4,g1,g2,g3,g4,stab
0,2.95906,3.079885,8.381025,9.780754,0.650456,0.859578,0.887445,0.958034,0.055347
1,9.304097,4.902524,3.047541,1.369357,0.413441,0.862414,0.562139,0.78176,-0.005957
2,8.971707,8.848428,3.046479,1.214518,0.163041,0.766689,0.839444,0.109853,0.003471
3,0.716415,7.6696,4.486641,2.340563,0.446209,0.976744,0.929381,0.362718,0.028871
4,3.134112,7.608772,4.943759,9.857573,0.79711,0.45545,0.656947,0.820923,0.04986


In [3]:
import os

# Get the number of CPUs
num_cpus = os.cpu_count()

print(f"Number of CPUs: {num_cpus}")

Number of CPUs: 12


In [4]:
auto_ml = TPOTRegressor(
        # generations=100,           # Allows for decent exploration with reasonable runtime
        # population_size=10,       # Balances diversity with computational efficiency
        offspring_size=None,      # Defaults to population_size if not set
        mutation_rate=0.9,        # High mutation rate encourages exploration
        crossover_rate=0.1,       # Low crossover rate emphasizes exploration over exploitation
        cv=5,                     # 5-fold cross-validation balances performance with runtime
        use_dask=True,
        n_jobs=num_cpus,
        early_stop=10,
        periodic_checkpoint_folder='./checkpoint',
        verbosity=2               # Provides updates on the progress
)

In [5]:
STAB_COLUMN_INDEX = list(data.columns).index('stab')

train, test = train_test_split(data, test_size=0.2)

y_train = train['stab'].values
x_train = train.drop(columns=['stab']).values

y_test = test['stab'].values
x_test = test.drop(columns=['stab']).values

auto_ml.fit(x_train, y_train)
    
pred = auto_ml.predict(x_test)

                                                                                  
Generation 1 - Current best internal CV score: -7.234223320199358e-05
                                                                                  
Generation 2 - Current best internal CV score: -7.234223320199358e-05
                                                                                  
Generation 3 - Current best internal CV score: -7.234223320199358e-05
                                                                                  
Generation 4 - Current best internal CV score: -7.062653055136152e-05
                                                                                  
Generation 5 - Current best internal CV score: -6.637736165970412e-05
                                                                                   
Generation 6 - Current best internal CV score: -6.637736165970412e-05
                                                                                 

In [6]:
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score

print(r2_score(y_test, pred))
print(mean_squared_error(y_test, pred))
print(mean_absolute_error(y_test, pred))
print(np.mean(np.abs((y_test - pred) / y_test)) * 100)

0.9725315267793702
3.812375098790591e-05
0.004421909925339709
48.702336690973866
