Use hyperparameters from model-tuning notebook and train model on entire dataset.

In [2]:
# Model: ExtraTreeRegressor
#   RandomizedSearchCV:
#     Best Params: {'min_samples_split': 2, 'min_samples_leaf': 2, 'max_features': None, 'max_depth': 20}
#     Best CV Score: 0.966285
#     Test R Squared score: 0.96

In [1]:
import joblib
from config import ELECTRIC_MODEL_FILE, TRAIN_TEST_SPLIT_ELECTRIC_FILE
from sklearn.metrics import mean_squared_error, r2_score, root_mean_squared_error
from sklearn.tree._classes import ExtraTreeRegressor

In [2]:
# load train_test_split objects
X_train, X_test, y_train, y_test = joblib.load(TRAIN_TEST_SPLIT_ELECTRIC_FILE)

In [None]:
model = ExtraTreeRegressor(
    min_samples_split=2,
    min_samples_leaf=2,
    max_features=None,
    max_depth=20
)

model.fit(X_train, y_train)

In [4]:
# Predict on test set
y_pred = model.predict(X_test)

In [5]:
# calculate metrics

# R-Squared
r2 = r2_score(y_test, y_pred)
print("R-Squared: ",r2)

# Adjusted R-Squared
n = len(y_test)  # Number of samples
p = X_test.shape[1]  # Number of predictors
adjusted_r2 = 1 - (1 - r2) * (n - 1) / (n - p - 1)
print("Adjusted R-Squared: ",adjusted_r2)

# MSE
mean_squared_error_reg = mean_squared_error(y_true=y_test, y_pred=y_pred)
print("MSE: ",mean_squared_error_reg)

# RMSE
root_mean_squared_error_reg = root_mean_squared_error(y_true=y_test, y_pred=y_pred)
print("RMSE: ",root_mean_squared_error_reg)


R-Squared:  0.9706514944335698
Adjusted R-Squared:  0.9706438238558385
MSE:  17.6157250532277
RMSE:  4.197109130488234


In [6]:
# Store model
joblib.dump(model, ELECTRIC_MODEL_FILE)

['files/output/models/electric-model.pkl']