In [17]:
import time
import numpy as np
from src.data_loader import load_data
from src.feature_engineering import average_daily
from src.model.train_model import get_X_y, train_and_evaluate_models, get_best_fold_data_from_kf
import os
from objective_function import objective_et, objective_lasso
from src.Optimiser.KOA.koa_optimizer import koa_optimizer
from src.Optimiser.COA.coa_optimizer import coa_optimizer
from sklearn.linear_model import Lasso
from sklearn.model_selection import KFold
from sklearn.metrics import mean_absolute_error
import matplotlib.pyplot as plt
from analysis.wilcoxon_test import perform_wilcoxon_test

DATA_PATH = os.path.join("data", "Data.xlsx")


In [12]:
print("🔹 Loading raw data...")
df = load_data(DATA_PATH)
print(f"✅ Raw data shape: {df.shape}")

🔹 Loading raw data...
✅ Raw data shape: (50000, 12)


In [13]:
print("🔹 Aggregating daily averages (96 samples per day)...")
df_avg = average_daily(df, samples_per_day=96)
print(f"✅ Aggregated data shape: {df_avg.shape}")
print("✅ Sample preview:")
print(df_avg.head())

🔹 Aggregating daily averages (96 samples per day)...
✅ Aggregated data shape: (520, 12)
✅ Sample preview:
   Voltage (V)  Current (A)  Power Consumption (kW)  Temperature (Â°C)  \
0   229.442065    23.274581                5.338170          24.458245   
1   230.252406    27.590016                6.355897          25.281132   
2   230.139098    26.616651                6.113400          25.286834   
3   230.450687    28.109668                6.483036          26.025139   
4   230.131332    26.532084                6.102769          24.729869   

   Humidity (%)  Reactive Power (kVAR)  Power Factor  Solar Power (kW)  \
0     49.931362               1.335934      0.904576         24.128362   
1     47.592609               1.547024      0.902831         25.205873   
2     46.656764               1.560915      0.898813         22.725012   
3     51.258872               1.580661      0.905492         24.628754   
4     49.909403               1.526028      0.902028         23.925562   

   W

In [14]:
print("🔹 Preparing features and target...")
X, y = get_X_y(df_avg)
print(f"✅ X shape: {X.shape}, y shape: {y.shape}")


🔹 Preparing features and target...
✅ X shape: (520, 11), y shape: (520,)


In [22]:
from src.Utils.K_Fold import K_Fold


print("🔹 Performing K-Fold...")
X_train_shared, X_test_shared, y_train_shared, y_test_shared, combined_df = K_Fold(X, y, n_splits=5)


🔹 Performing K-Fold...


ValueError: Setting a random_state has no effect since shuffle is False. You should leave random_state to its default (None), or set shuffle=True.

# add to excel file 

In [None]:
import pandas as pd
from openpyxl import load_workbook

# Load workbook
book = load_workbook(DATA_PATH)

# If the sheet exists, delete it
if 'DATA after K-Fold' in book.sheetnames:
    std = book['DATA after K-Fold']
    book.remove(std)
    book.save(DATA_PATH)

# Now write the new data
with pd.ExcelWriter(DATA_PATH, engine='openpyxl', mode='a') as writer:
    combined_df.to_excel(writer, sheet_name='DATA after K-Fold', index=False)


KeyboardInterrupt: 

In [None]:
def summarize_metrics(metrics_list):
  return {key: np.mean([m[key] for m in metrics_list]) for key in metrics_list[0]}

avg_etr = summarize_metrics(etr_scores)
avg_lasso = summarize_metrics(lasso_scores)

print("\n🔹 Average Baseline Model Scores:")
print("🔹 Extra Trees Regressor:")
for k, v in avg_etr.items():
  print(f"  {k}: {v:.4f}")

print("\n🔹 Lasso Regression:")
for k, v in avg_lasso.items():
  print(f"  {k}: {v:.4f}")



🔹 Average Baseline Model Scores:
🔹 Extra Trees Regressor:
  R2: 0.9681
  RMSE: 0.0032
  MAE: 0.0410
  MAPE: 0.6522
  MARD: 0.3257

🔹 Lasso Regression:
  R2: 0.9359
  RMSE: 0.0063
  MAE: 0.0622
  MAPE: 0.9866
  MARD: 0.4928


# Define the parameter bounds for the optimizers


In [None]:

#Extra Trees Regressor
lb_et = [10, 5, 2, 1]          # n_estimators, max_depth, min_samples_split, min_samples_leaf
ub_et = [300, 50, 10, 10]
dim_et = 4

# Lasso Regression
lb_lasso = [0.0001, 100]       # alpha, max_iter
ub_lasso = [1.0, 1000]
dim_lasso = 2

n_agents = 3
max_iter = 5

# @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@

In [None]:
from sklearn.ensemble import ExtraTreesRegressor
from sklearn.linear_model import Lasso
from sklearn.metrics import mean_absolute_error
import numpy as np
params = []
def train_raw_model(model_name, X_train, y_train, X_test, y_test):
    # Choose model
    if model_name == "ETR":
        # params=random_state=42, n_estimators=100, max_depth=3, min_samples_split=5
        params = {
            "random_state": 42,
            "n_estimators": 99,
            "max_depth": 3,
            "min_samples_split": 5,
        }

        model = ExtraTreesRegressor(random_state=params["random_state"], n_estimators=params["n_estimators"],max_depth=params["max_depth"],min_samples_split=params["min_samples_split"])  # Default parameters
    elif model_name == "LR":
        # params=random_state=42, alpha=0.1, max_iter=1000, tol=0.01
        params = {
            "random_state": 42,
            "alpha": 0.1,
            "max_iter": 1000,
            "tol": 0.01,
        }
        model = Lasso(random_state=params["random_state"],alpha=params["alpha"],max_iter=params["max_iter"],tol=params['tol'])  # Default parameters
    else:
        raise ValueError("Unsupported model name. Use 'ETR' or 'LR'.")

    # Fit model
    model.fit(X_train, y_train)

    # Predict
    predictions = model.predict(X_test)

    # Calculate MAE
    mae_error = mean_absolute_error(y_test, predictions)

    # Since we're using default params, params is just the default config
    
    return {
        "model_name": model_name,
        "predictions": predictions,
        "mae": mae_error,
        "params": params,
   
    }






In [None]:
etr_result = train_raw_model("ETR", X_train_shared, y_train_shared, X_test_shared, y_test_shared)
lr_result = train_raw_model("LR", X_train_shared, y_train_shared, X_test_shared, y_test_shared)

# Access results
print("🔹 Extra Trees MAE:", etr_result["mae"])
print("🔹 Lasso Regression MAE:", lr_result["mae"])
print("🔧 Best Params (ETR):", etr_result["params"])
print("🔧 Best Params (LR):", lr_result["params"])



🔹 Extra Trees MAE: 0.0342600534746883
🔹 Lasso Regression MAE: 0.04867650404816282
🔧 Best Params (ETR): {'random_state': 42, 'n_estimators': 99, 'max_depth': 3, 'min_samples_split': 5}
🔧 Best Params (LR): {'random_state': 42, 'alpha': 0.1, 'max_iter': 1000, 'tol': 0.01}


In [None]:
import numpy as np

def koa_optimizer(objective_function, lb, ub, dim, n_agents, max_iter, X, y):
    pos = np.random.uniform(low=lb, high=ub, size=(n_agents, dim))
    fit = np.array([objective_function(ind, X, y) for ind in pos])

    best_idx = np.argmin(fit)
    best_pos = pos[best_idx].copy()
    best_fit = fit[best_idx]

    for t in range(max_iter):
        for i in range(n_agents):
            rand_idx = np.random.randint(n_agents)
            r = np.random.rand(dim)
            step = r * (pos[rand_idx] - pos[i])
            new_pos = pos[i] + step

            new_pos = np.clip(new_pos, lb, ub)
            new_fit = objective_function(new_pos, X, y)

            if new_fit < fit[i]:
                pos[i] = new_pos
                fit[i] = new_fit

                if new_fit < best_fit:
                    best_fit = new_fit
                    best_pos = new_pos

        print(f"🔁 Iter {t+1}/{max_iter} - Best MAE: {best_fit:.5f}")

    return best_pos, best_fit


# Example for Extra Trees Regressor (ETR)

In [None]:
from sklearn.ensemble import ExtraTreesRegressor
from sklearn.metrics import mean_absolute_error

# Objective Function for ETR
def fobj_etr(params, X, y):
    n_estimators = int(params[0])
    max_depth = int(params[1])
    min_samples_split = int(params[2])
    
    model = ExtraTreesRegressor(
        random_state=42,
        n_estimators=n_estimators,
        max_depth=max_depth,
        min_samples_split=min_samples_split
    )
    
    model.fit(X, y)
    preds = model.predict(X)
    mae = mean_absolute_error(y, preds)
    return mae

# Bounds & Dimension
lb = np.array([50, 2, 2])    # lower bounds: n_estimators, max_depth, min_samples_split
ub = np.array([150, 10, 10]) # upper bounds
dim = 3
n_agents = 10
max_iter = 20

# Run KOA for ETR
best_pos_etr, best_mae_etr = koa_optimizer(
    fobj_etr, lb, ub, dim, n_agents, max_iter,
    X_train_shared, y_train_shared
)

# Train on best parameters
best_params_etr = {
    'random_state': 42,
    'n_estimators': int(best_pos_etr[0]),
    'max_depth': int(best_pos_etr[1]),
    'min_samples_split': int(best_pos_etr[2])
}
etr_model = ExtraTreesRegressor(**best_params_etr)
etr_model.fit(X_train_shared, y_train_shared)

# Predictions
train_preds_etr = etr_model.predict(X_train_shared)
test_preds_etr = etr_model.predict(X_test_shared)
etr_final_preds = np.concatenate([train_preds_etr, test_preds_etr])


🔁 Iter 1/20 - Best MAE: 0.00940
🔁 Iter 2/20 - Best MAE: 0.00940
🔁 Iter 3/20 - Best MAE: 0.00940
🔁 Iter 4/20 - Best MAE: 0.00930
🔁 Iter 5/20 - Best MAE: 0.00930
🔁 Iter 6/20 - Best MAE: 0.00930
🔁 Iter 7/20 - Best MAE: 0.00930
🔁 Iter 8/20 - Best MAE: 0.00930
🔁 Iter 9/20 - Best MAE: 0.00929
🔁 Iter 10/20 - Best MAE: 0.00929
🔁 Iter 11/20 - Best MAE: 0.00929
🔁 Iter 12/20 - Best MAE: 0.00929
🔁 Iter 13/20 - Best MAE: 0.00929
🔁 Iter 14/20 - Best MAE: 0.00929
🔁 Iter 15/20 - Best MAE: 0.00929
🔁 Iter 16/20 - Best MAE: 0.00929
🔁 Iter 17/20 - Best MAE: 0.00929
🔁 Iter 18/20 - Best MAE: 0.00929
🔁 Iter 19/20 - Best MAE: 0.00929
🔁 Iter 20/20 - Best MAE: 0.00929


# Example for Lasso Regression (LR)

In [None]:
from sklearn.linear_model import Lasso

# Objective Function for Lasso
def fobj_lasso(params, X, y):
    alpha = params[0]
    max_iter = int(params[1])
    tol = params[2]
    
    model = Lasso(
        random_state=42,
        alpha=alpha,
        max_iter=max_iter,
        tol=tol
    )
    
    model.fit(X, y)
    preds = model.predict(X)
    mae = mean_absolute_error(y, preds)
    return mae

# Bounds & Dimension
lb_lasso = np.array([0.001, 100, 0.0001])
ub_lasso = np.array([1.0, 5000, 0.1])
dim_lasso = 3
n_agents = 15
max_iter = 20

# Run KOA for Lasso
best_pos_lasso, best_mae_lasso = koa_optimizer(
    fobj_lasso, lb_lasso, ub_lasso, dim_lasso, n_agents, max_iter,
    X_train_shared, y_train_shared
)

# Train on best parameters
best_params_lasso = {
    'random_state': 42,
    'alpha': best_pos_lasso[0],
    'max_iter': int(best_pos_lasso[1]),
    'tol': best_pos_lasso[2]
}
lasso_model = Lasso(**best_params_lasso)
lasso_model.fit(X_train_shared, y_train_shared)

# Predictions
train_preds_lasso = lasso_model.predict(X_train_shared)
test_preds_lasso = lasso_model.predict(X_test_shared)
#R2 

lasso_final_preds = np.concatenate([train_preds_lasso, test_preds_lasso])


🔁 Iter 1/20 - Best MAE: 0.02816
🔁 Iter 2/20 - Best MAE: 0.02816
🔁 Iter 3/20 - Best MAE: 0.02816
🔁 Iter 4/20 - Best MAE: 0.02816
🔁 Iter 5/20 - Best MAE: 0.02816
🔁 Iter 6/20 - Best MAE: 0.02816
🔁 Iter 7/20 - Best MAE: 0.02816
🔁 Iter 8/20 - Best MAE: 0.02816
🔁 Iter 9/20 - Best MAE: 0.02816
🔁 Iter 10/20 - Best MAE: 0.02816
🔁 Iter 11/20 - Best MAE: 0.02816
🔁 Iter 12/20 - Best MAE: 0.02816
🔁 Iter 13/20 - Best MAE: 0.02816
🔁 Iter 14/20 - Best MAE: 0.02816
🔁 Iter 15/20 - Best MAE: 0.02816
🔁 Iter 16/20 - Best MAE: 0.02816
🔁 Iter 17/20 - Best MAE: 0.02816
🔁 Iter 18/20 - Best MAE: 0.02816
🔁 Iter 19/20 - Best MAE: 0.02816
🔁 Iter 20/20 - Best MAE: 0.02816
Lasso R2 Score: 0.9837


# COA

In [None]:
import numpy as np

def p_obj(x, t, T):
    C = 0.2
    Q = 3
    return C * (1 / (np.sqrt(2 * np.pi) * Q)) * np.exp(-((x - 25)**2) / (2 * Q**2))

def coa_optimizer(objective_function, lb, ub, dim, n_agents, max_iter, X, y):
    pos = np.random.uniform(low=lb, high=ub, size=(n_agents, dim))
    fit = np.array([objective_function(ind, X, y) for ind in pos])

    best_idx = np.argmin(fit)
    best_pos = pos[best_idx].copy()
    best_fit = fit[best_idx]

    for t in range(1, max_iter + 1):
        C = 2 - (t / max_iter)
        temp = np.random.rand() * 15 + 20
        xf = (best_pos + pos[best_idx]) / 2
        Xfood = best_pos.copy()
        new_pos = np.zeros_like(pos)

        for i in range(n_agents):
            if temp > 30:
                if np.random.rand() < 0.5:
                    new_pos[i] = pos[i] + C * np.random.rand(dim) * (xf - pos[i])
                else:
                    z = np.random.randint(n_agents)
                    new_pos[i] = pos[i] - pos[z] + xf
            else:
                F1 = objective_function(Xfood, X, y)
                P = 3 * np.random.rand() * fit[i] / F1
                if P > 2:
                    Xfood = np.exp(-1 / P) * Xfood
                    new_pos[i] = (pos[i] +
                                  np.cos(2 * np.pi * np.random.rand(dim)) * Xfood * p_obj(temp, t, max_iter) -
                                  np.sin(2 * np.pi * np.random.rand(dim)) * Xfood * p_obj(temp, t, max_iter))
                else:
                    new_pos[i] = (pos[i] - Xfood) * p_obj(temp, t, max_iter) + \
                                 p_obj(temp, t, max_iter) * np.random.rand(dim) * pos[i]

            new_pos[i] = np.clip(new_pos[i], lb, ub)

        for i in range(n_agents):
            new_fit = objective_function(new_pos[i], X, y)
            if new_fit < fit[i]:
                fit[i] = new_fit
                pos[i] = new_pos[i]
                if new_fit < best_fit:
                    best_fit = new_fit
                    best_pos = new_pos[i]

        print(f"🌀 Iter {t}/{max_iter} - Best MAE: {best_fit:.5f}")

    return best_pos, best_fit


In [None]:
from sklearn.ensemble import ExtraTreesRegressor
from sklearn.metrics import mean_absolute_error

def fobj_etr(params, X, y):
    n_estimators = int(params[0])
    max_depth = int(params[1])
    min_samples_split = float(params[2])

    model = ExtraTreesRegressor(
        random_state=42,
        n_estimators=n_estimators,
        max_depth=max_depth,
        min_samples_split=min_samples_split
    )
    model.fit(X, y)
    preds = model.predict(X)
    return mean_absolute_error(y, preds)

lb_etr = np.array([50, 2, 0.01])  # make min_samples_split start at 2

ub_etr = np.array([150, 10, 1.0])
dim_etr = 3
n_agents = 10
max_iter = 20

best_pos_etr, best_mae_etr = coa_optimizer(
    fobj_etr, lb_etr, ub_etr, dim_etr, n_agents, max_iter,
    X_train_shared, y_train_shared
)

best_params_etr = {
    'random_state': 42,
    'n_estimators': int(best_pos_etr[0]),
    'max_depth': int(best_pos_etr[1]),
    'min_samples_split': int(best_pos_etr[2])
}
etr_model = ExtraTreesRegressor(**best_params_etr)
etr_model.fit(X_train_shared, y_train_shared)

train_preds_etr = etr_model.predict(X_train_shared)
test_preds_etr = etr_model.predict(X_test_shared)
etr_final_preds = np.concatenate([train_preds_etr, test_preds_etr])


🌀 Iter 1/20 - Best MAE: 0.02356
🌀 Iter 2/20 - Best MAE: 0.01048
🌀 Iter 3/20 - Best MAE: 0.01048
🌀 Iter 4/20 - Best MAE: 0.01048
🌀 Iter 5/20 - Best MAE: 0.00680
🌀 Iter 6/20 - Best MAE: 0.00680
🌀 Iter 7/20 - Best MAE: 0.00680
🌀 Iter 8/20 - Best MAE: 0.00680


KeyboardInterrupt: 

In [None]:
from sklearn.linear_model import Lasso

def fobj_lasso(params, X, y):
    alpha = params[0]
    max_iter = int(params[1])
    tol = params[2]

    model = Lasso(
        random_state=42,
        alpha=alpha,
        max_iter=max_iter,
        tol=tol
    )
    model.fit(X, y)
    preds = model.predict(X)
    return mean_absolute_error(y, preds)

lb_lasso = np.array([0.001, 100, 0.0001])
ub_lasso = np.array([1.0, 5000, 0.1])
dim_lasso = 3

best_pos_lasso, best_mae_lasso = coa_optimizer(
    fobj_lasso, lb_lasso, ub_lasso, dim_lasso, 15, 20,
    X_train_shared, y_train_shared
)

best_params_lasso = {
    'random_state': 42,
    'alpha': best_pos_lasso[0],
    'max_iter': int(best_pos_lasso[1]),
    'tol': best_pos_lasso[2]
}
lasso_model = Lasso(**best_params_lasso)
lasso_model.fit(X_train_shared, y_train_shared)

train_preds_lasso = lasso_model.predict(X_train_shared)
test_preds_lasso = lasso_model.predict(X_test_shared)
lasso_final_preds = np.concatenate([train_preds_lasso, test_preds_lasso])


🌀 Iter 1/20 - Best MAE: 0.01739
🌀 Iter 2/20 - Best MAE: 0.01739
🌀 Iter 3/20 - Best MAE: 0.01739
🌀 Iter 4/20 - Best MAE: 0.01739
🌀 Iter 5/20 - Best MAE: 0.01739
🌀 Iter 6/20 - Best MAE: 0.01739
🌀 Iter 7/20 - Best MAE: 0.01739
🌀 Iter 8/20 - Best MAE: 0.01739
🌀 Iter 9/20 - Best MAE: 0.01739
🌀 Iter 10/20 - Best MAE: 0.01739
🌀 Iter 11/20 - Best MAE: 0.01739
🌀 Iter 12/20 - Best MAE: 0.01739
🌀 Iter 13/20 - Best MAE: 0.01739
🌀 Iter 14/20 - Best MAE: 0.01739
🌀 Iter 15/20 - Best MAE: 0.01739
🌀 Iter 16/20 - Best MAE: 0.01739
🌀 Iter 17/20 - Best MAE: 0.01739
🌀 Iter 18/20 - Best MAE: 0.01739
🌀 Iter 19/20 - Best MAE: 0.01739
🌀 Iter 20/20 - Best MAE: 0.01739


# @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@

In [None]:
print("\n🔹 Optimizing Extra Trees Regressor with KOA...")
best_params_koa_et, best_mae_koa_et = koa_optimizer(objective_et, lb_et, ub_et, dim_et, n_agents, max_iter, X, y)
print(f"ETR + KOA Best params: {best_params_koa_et}")
print(f"ETR + KOA Best MAE: {best_mae_koa_et:.5f}")



🔹 Optimizing Extra Trees Regressor with KOA...


TypeError: '<' not supported between instances of 'NoneType' and 'NoneType'

In [None]:

    # Optimize Extra Trees with COA
    print("\n🔹 Optimizing Extra Trees Regressor with COA...")
    best_params_coa_et, best_mae_coa_et = coa_optimizer(objective_et, lb_et, ub_et, dim_et, n_agents, max_iter, X, y)
    print(f"ETR + COA Best params: {best_params_coa_et}")
    print(f"ETR + COA Best MAE: {best_mae_coa_et:.5f}")

In [None]:
    # Optimize Lasso with KOA
    print("\n🔹 Optimizing Lasso Regression with KOA...")
    best_params_koa_lasso, best_mae_koa_lasso = koa_optimizer(objective_lasso, lb_lasso, ub_lasso, dim_lasso, n_agents, max_iter, X, y)
    print(f"Lasso + KOA Best params: {best_params_koa_lasso}")
    print(f"Lasso + KOA Best MAE: {best_mae_koa_lasso:.5f}")


In [None]:
   # Optimize Lasso with COA
    print("\n🔹 Optimizing Lasso Regression with COA...")
    best_params_coa_lasso, best_mae_coa_lasso = coa_optimizer(objective_lasso, lb_lasso, ub_lasso, dim_lasso, n_agents, max_iter, X, y)
    print(f"Lasso + COA Best params: {best_params_coa_lasso}")
    print(f"Lasso + COA Best MAE: {best_mae_coa_lasso:.5f}")


In [None]:
    # Wilcoxon Test on baseline models
    mae_etr = [score['MAE'] for score in etr_scores]
    mae_lr = [score['MAE'] for score in lasso_scores]
    perform_wilcoxon_test(mae_etr, mae_lr, model_name_1="ETR", model_name_2="LR")

🔹 Extra Trees Regressor:
  R2: 0.9681
  RMSE: 0.0032
  MAE: 0.0410
  MAPE: 0.6522
  MARD: 0.3257

🔹 Lasso Regression:
  R2: 0.9359
  RMSE: 0.0063
  MAE: 0.0622
  MAPE: 0.9866
  MARD: 0.4928

🔹 Optimizing Extra Trees Regressor with KOA...


TypeError: '<' not supported between instances of 'NoneType' and 'NoneType'