<a href="https://colab.research.google.com/github/mhriyad99/smart-grid/blob/main/training.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install xlsxwriter

Collecting xlsxwriter
  Downloading xlsxwriter-3.2.9-py3-none-any.whl.metadata (2.7 kB)
Downloading xlsxwriter-3.2.9-py3-none-any.whl (175 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m175.3/175.3 kB[0m [31m3.6 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: xlsxwriter
Successfully installed xlsxwriter-3.2.9


In [None]:
import os
import sys
import json
import time
import warnings
import numpy as np
import pandas as pd
from dataclasses import dataclass
from sklearn.base import BaseEstimator, TransformerMixin
from sklearn.utils.validation import check_is_fitted, check_array
from typing import Dict, Any, List, Tuple, Optional
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import cross_val_score, cross_validate, cross_validate
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.neural_network import MLPClassifier
from xgboost import XGBClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.model_selection import StratifiedKFold, GridSearchCV
from sklearn.metrics import (
    accuracy_score, f1_score, roc_auc_score,
    precision_score, recall_score, confusion_matrix,
    classification_report, roc_curve
)
from sklearn.model_selection import train_test_split

from google.colab import drive
drive.mount('/content/drive')

RANDOM_STATE = 42
OUTER_SPLITS = 5
INNER_SPLITS = 2
warnings.filterwarnings('ignore')

Mounted at /content/drive


In [None]:
raw_data = pd.read_csv('/content/drive/MyDrive/Colab Notebooks/Smart Grid/Data/raw_data.csv')
X = raw_data.drop(['stab', 'stabf'], axis=1)
y = raw_data['stabf']
squared_data = pd.read_csv('/content/drive/MyDrive/Colab Notebooks/Smart Grid/Data/squared_data.csv')
min_max_data = pd.read_csv('/content/drive/MyDrive/Colab Notebooks/Smart Grid/Data/min_max_data.csv')
box_cox_data = pd.read_csv('/content/drive/MyDrive/Colab Notebooks/Smart Grid/Data/box_cox_data.csv')
standard_data = pd.read_csv('/content/drive/MyDrive/Colab Notebooks/Smart Grid/Data/standard_data.csv')

In [None]:
models = {
    "DT": DecisionTreeClassifier(random_state=RANDOM_STATE),
    "GB": GradientBoostingClassifier(random_state=RANDOM_STATE),
    "KNN": KNeighborsClassifier(),
    "LR": LogisticRegression(random_state=RANDOM_STATE),
    "MLP": MLPClassifier(max_iter=500, random_state=RANDOM_STATE),
    "RF": RandomForestClassifier(random_state=RANDOM_STATE),
    "SVM": SVC(random_state=RANDOM_STATE, probability=True),
    "XGB": XGBClassifier(random_state=RANDOM_STATE),
}

selected_features = ['tau1', 'tau2', 'tau3', 'tau4', 'g1', 'g2', 'g3', 'g4']

In [None]:
param_grids = {
    'LR': {
        'C': [0.01, 0.1, 1.0, 10.0, 100.0],
        'penalty': ['l1', 'l2'],
        'solver': ['liblinear', 'saga'],
        'class_weight': [None, 'balanced']
    },
    'DT': {
        'criterion': ['gini', 'entropy'],
        'max_depth': [None, 10, 15 , 20, 25],
        'min_samples_split': [2, 5, 10],
        'min_samples_leaf': [1, 2, 4],
        'class_weight': [None, 'balanced']
    },
    'RF': {
        'n_estimators': [100, 200],
        'max_depth': [None, 10 , 20],
        'criterion': ['gini', 'entropy'],
        'max_features': ['sqrt', 'log2'],
        'class_weight': [None, 'balanced']
    },
    'SVM': [
        {'kernel': ['linear'], 'C': [0.1, 1.0], 'class_weight': [None, 'balanced']},
        {'kernel': ['rbf'], 'C': [0.1, 1.0], 'gamma': ['scale', 0.1], 'class_weight': [None, 'balanced']}
    ],
    'XGB': {
        'n_estimators': [100, 200],
        'max_depth': [3, 5, 7],
        'learning_rate': [0.01, 0.1, 0.2],
    },
    'KNN': {
        'n_neighbors': [3, 5, 7],
        'weights': ['uniform', 'distance'],
        'p': [1, 2]
    },
    'GB': {
        'n_estimators': [100, 200],
        'learning_rate': [0.01, 0.1, 0.2],
        'max_depth': [3, 5],
        'max_features': ['sqrt', 'log2']
    },
    'MLP': {
        'hidden_layer_sizes': [(50,), (100,), (100,50)],
        'activation': ['relu'],
        'alpha': [1e-4, 1e-3],
        'learning_rate_init': [0.001, 0.005],
}
}

In [None]:
def calibrate_dss_threshold(best_estimator, X: pd.DataFrame, y: np.ndarray) -> Dict[str, Any]:
    """
    Calibrate a decision threshold via Youden's J on ROC (worked example).
    Returns threshold and ROC stats.
    """
    y_prob = best_estimator.predict_proba(X)[:, 1]
    fpr, tpr, thr = roc_curve(y, y_prob)
    j = tpr - fpr
    idx = int(np.argmax(j))
    youden_thr = float(thr[idx])
    return {'threshold': youden_thr,
            'fpr': fpr.tolist(),
            'tpr': tpr.tolist(),
            'thresholds': thr.tolist()}

In [None]:
def evaluate_models(models, X_train, y_train, cv_folds=5):
    metrics = ['accuracy', 'precision_macro', 'recall_macro', 'f1_macro', 'roc_auc']
    evaluation_results = []
    fold_records = []
    dss_records = []

    scorers = {
        'accuracy': 'accuracy',
        'precision_macro': 'precision_macro',
        'recall_macro': 'recall_macro',
        'f1_macro': 'f1_macro',
        'roc_auc': 'roc_auc'
    }

    cv = StratifiedKFold(n_splits=cv_folds, shuffle=True, random_state=42)

    for model_name, model in models.items():
        if model is None:
            continue

        model_results = {'Model': model_name}

        try:
            cv_results = cross_validate(
                model, X_train, y_train,
                cv=cv, scoring=scorers,
                return_estimator=True, return_indices=True
            )

            # Aggregate (mean, std)
            for metric in metrics:
                scores = cv_results[f'test_{metric}']
                model_results[f'{metric.capitalize()}_Mean'] = scores.mean()
                model_results[f'{metric.capitalize()}_Std'] = scores.std()

            # Per-fold results
            for fold_idx, (est, (_, test_idx)) in enumerate(zip(cv_results['estimator'], cv.split(X_train, y_train)), start=1):
                y_pred = est.predict(X_train.iloc[test_idx])
                y_true = y_train.iloc[test_idx]

                # Metrics
                acc = accuracy_score(y_true, y_pred)
                f1 = f1_score(y_true, y_pred, zero_division=0)
                try:
                    auc = roc_auc_score(y_true, est.predict_proba(X_train.iloc[test_idx])[:, 1]) if hasattr(est, "predict_proba") else np.nan
                except ValueError:
                    auc = np.nan
                prec = precision_score(y_true, y_pred, zero_division=0)
                rec = recall_score(y_true, y_pred, zero_division=0)

                # Class-wise
                report = classification_report(y_true, y_pred, labels=[0, 1], output_dict=True, zero_division=0)

                # DSS calibration
                if hasattr(est, "predict_proba"):
                    dss = calibrate_dss_threshold(est, X_train.iloc[test_idx], y_true)
                    dss_records.append({
                        "Model": model_name,
                        "Fold": fold_idx,
                        "DSS_Threshold": dss["threshold"],
                        "DSS_FPR": dss["fpr"],
                        "DSS_TPR": dss["tpr"],
                        "DSS_Thresholds": dss["thresholds"]
                    })

                record = {
                    "Model": model_name,
                    "Fold": fold_idx,
                    "Accuracy": acc,
                    "F1": f1,
                    "ROC_AUC": auc,
                    "Precision": prec,
                    "Recall": rec,
                    "Precision_Class0": report["0"]["precision"],
                    "Recall_Class0": report["0"]["recall"],
                    "Precision_Class1": report["1"]["precision"],
                    "Recall_Class1": report["1"]["recall"],
                    "DSS_Threshold": dss.get("threshold", np.nan)
                }
                fold_records.append(record)


        except Exception as e:
            print(f"Error evaluating {model_name}: {str(e)}")
            for metric in metrics:
                model_results[f'{metric.capitalize()}_Mean'] = np.nan
                model_results[f'{metric.capitalize()}_Std'] = np.nan

        evaluation_results.append(model_results)

    evaluation_df = pd.DataFrame(evaluation_results)
    fold_results_df = pd.DataFrame(fold_records)
    dss_results_df = pd.DataFrame(dss_records)

    metrics_cols = [
        'Precision_Class0', 'Recall_Class0',
        'Precision_Class1', 'Recall_Class1',
        'DSS_Threshold'
    ]
    summary = (
        fold_results_df
        .groupby("Model")[metrics_cols]
        .agg(['mean', 'std'])
        .reset_index()
    )
    summary.columns = [
        f"{col[0]}_{col[1]}" if col[1] else col[0]
        for col in summary.columns
    ]
    evaluation_df = pd.merge(evaluation_df, summary, on='Model', how='left')

    return evaluation_df, fold_results_df, dss_results_df


In [None]:
def nested_cv_tuning(
    X, y, models, param_grids,
    outer_splits=5, inner_splits=2, random_state=42,
    scoring='accuracy', n_jobs=-1
):
    outer_cv = StratifiedKFold(
        n_splits=outer_splits, shuffle=True, random_state=random_state
    )

    fold_records = []   # detailed per fold
    dss_records = []    # DSS threshold results

    for model_name, model in models.items():
        print(f"\nNested CV for {model_name}...")

        for outer_fold, (tr_idx, te_idx) in enumerate(outer_cv.split(X, y), start=1):
            print(f"  Outer fold {outer_fold}/{outer_splits}")

            X_tr, X_te = X.iloc[tr_idx], X.iloc[te_idx]
            y_tr, y_te = y[tr_idx], y[te_idx]

            gs = GridSearchCV(
                estimator=model,
                param_grid=param_grids[model_name],
                cv=inner_splits,
                scoring=scoring,
                n_jobs=n_jobs,
                verbose=0
            )

            with warnings.catch_warnings():
                warnings.simplefilter("ignore")
                gs.fit(X_tr, y_tr)

            best_model = gs.best_estimator_

            # Predict
            y_prob = None
            if hasattr(best_model, "predict_proba"):
                y_prob = best_model.predict_proba(X_te)[:, 1]
                y_pred = (y_prob >= 0.5).astype(int)
            else:
                y_pred = best_model.predict(X_te)

            # Metrics
            acc = accuracy_score(y_te, y_pred)
            f1 = f1_score(y_te, y_pred, zero_division=0)
            try:
                auc = roc_auc_score(y_te, y_prob) if y_prob is not None else np.nan
            except ValueError:
                auc = np.nan
            precision = precision_score(y_te, y_pred, zero_division=0)
            recall = recall_score(y_te, y_pred, zero_division=0)

            cm = confusion_matrix(y_te, y_pred, labels=[0, 1])
            tn, fp, fn, tp = cm.ravel()
            specificity = tn / (tn + fp) if (tn + fp) > 0 else 0

            report = classification_report(
                y_te, y_pred, labels=[0, 1], output_dict=True, zero_division=0
            )

            fold_records.append({
                'Model': model_name,
                'Outer_Fold': outer_fold,
                'Best_Params': gs.best_params_,
                'Accuracy': acc,
                'F1': f1,
                'ROC_AUC': auc,
                'Precision': precision,
                'Recall': recall,
                'Specificity': specificity,
                'Precision_Class0': report['0']['precision'],
                'Recall_Class0': report['0']['recall'],
                'Precision_Class1': report['1']['precision'],
                'Recall_Class1': report['1']['recall']
            })

            # DSS threshold calibration
            if y_prob is not None:
                dss = calibrate_dss_threshold(best_model, X_te, y_te)
                dss_records.append({
                    'Model': model_name,
                    'Outer_Fold': outer_fold,
                    'DSS_Threshold': dss['threshold'],
                    'DSS_FPR': dss['fpr'],
                    'DSS_TPR': dss['tpr'],
                    'DSS_Thresholds': dss['thresholds']
                })

            print(f"    Fold {outer_fold} results: Acc={acc:.3f}, F1={f1:.3f}, AUC={auc:.3f}")

    fold_results_df = pd.DataFrame(fold_records)
    dss_results_df = pd.DataFrame(dss_records)
    fold_results_df['DSS_Threshold'] = dss_results_df['DSS_Threshold']

    # Aggregate mean and std for each model
    metrics_cols = [
        'Precision_Class0', 'Recall_Class0',
        'Precision_Class1', 'Recall_Class1', 'DSS_Threshold'
    ]

    summary = (
        fold_results_df
        .groupby("Model")[metrics_cols]
        .agg(['mean', 'std'])
        .reset_index()
    )

    # Flatten MultiIndex
    summary.columns = [
        f"{col[0]}_{col[1]}" if col[1] else col[0]
        for col in summary.columns
    ]

    return summary, fold_results_df, dss_results_df


# Raw (No feature selection, No Grid search)

In [None]:
raw_result_df, raw_fold_result_df, raw_dss_df = evaluate_models(models, X, y, cv_folds=5)

In [None]:
min_max_result_df, min_max_fold_result_df, min_max_dss_df = evaluate_models(models, min_max_data, y, cv_folds=5)

In [None]:
box_cox_result_df, box_cox_fold_result_df, box_cox_dss_df = evaluate_models(models, box_cox_data, y, cv_folds=5)

In [None]:
standard_result_df, standard_fold_result_df, standard_dss_df = evaluate_models(models, standard_data, y, cv_folds=5)

In [None]:
squared_result_df, squared_fold_result_df, squared_dss_df = evaluate_models(models, squared_data, y, cv_folds=5)

In [None]:
raw_fold_result_df.keys()

Index(['Model', 'Fold', 'Accuracy', 'F1', 'ROC_AUC', 'Precision', 'Recall',
       'Precision_Class0', 'Recall_Class0', 'Precision_Class1',
       'Recall_Class1', 'DSS_Threshold'],
      dtype='object')

In [None]:
writer = pd.ExcelWriter('/content/drive/MyDrive/Colab Notebooks/Smart Grid/Data/raw_results.xlsx', engine='xlsxwriter')

raw_result_df.to_excel(writer, sheet_name='no_fs_no_gs', startrow=1, index=False)
worksheet = writer.sheets['no_fs_no_gs']
worksheet.write(0, 0, 'Raw Results')

min_max_result_df.to_excel(writer, sheet_name='no_fs_no_gs', startrow=17, index=False)
worksheet.write(16, 0, 'Min-Max Results')

box_cox_result_df.to_excel(writer, sheet_name='no_fs_no_gs', startrow=33, index=False)
worksheet.write(32, 0, 'Box-Cox Results')

standard_result_df.to_excel(writer, sheet_name='no_fs_no_gs', startrow=49, index=False)
worksheet.write(48, 0, 'Standard Scaled Results')

squared_result_df.to_excel(writer, sheet_name='no_fs_no_gs', startrow=65, index=False)
worksheet.write(64, 0, 'Squared Results')

raw_fold_result_df.to_excel(writer, sheet_name='raw_fold_results', index=False)
min_max_fold_result_df.to_excel(writer, sheet_name='min_max_fold_results', index=False)
box_cox_fold_result_df.to_excel(writer, sheet_name='box_cox_fold_results', index=False)
standard_fold_result_df.to_excel(writer, sheet_name='standard_fold_results', index=False)
squared_fold_result_df.to_excel(writer, sheet_name='squared_fold_results', index=False)

raw_dss_df.to_excel(writer, sheet_name='raw_fold_results', startrow=45,index=False)
min_max_dss_df.to_excel(writer, sheet_name='min_max_fold_results', startrow=45, index=False)
box_cox_dss_df.to_excel(writer, sheet_name='box_cox_fold_results', startrow=45, index=False)
standard_dss_df.to_excel(writer, sheet_name='standard_fold_results', startrow=45, index=False)
squared_dss_df.to_excel(writer, sheet_name='squared_fold_results', startrow=45, index=False)

writer.close()

# Training on selected features (Without Grid Search)

In [None]:
fs_raw_result_df, fs_raw_fold_result_df, fs_raw_dss_df = evaluate_models(models, X[selected_features], y, cv_folds=5)

In [None]:
fs_min_max_result_df, fs_min_max_fold_result_df, fs_min_max_dss_df = evaluate_models(models, min_max_data[selected_features], y, cv_folds=5)

In [None]:
fs_box_cox_result_df, fs_box_cox_fold_result_df, fs_box_cox_dss_df = evaluate_models(models, squared_data[selected_features], y, cv_folds=5)

In [None]:
fs_standard_result_df, fs_standard_fold_result_df, fs_standard_dss_df = evaluate_models(models, standard_data[selected_features], y, cv_folds=5)

In [None]:
fs_squared_result_df, fs_squared_fold_result_df, fs_squared_dss_df = evaluate_models(models, squared_data[selected_features], y, cv_folds=5)

In [None]:
writer = pd.ExcelWriter('/content/drive/MyDrive/Colab Notebooks/Smart Grid/Data/fs_results.xlsx', engine='xlsxwriter')

fs_raw_result_df.to_excel(writer, sheet_name='fs_no_gs', startrow=1, index=False)
worksheet = writer.sheets['fs_no_gs']
worksheet.write(0, 0, 'Raw Results')

fs_min_max_result_df.to_excel(writer, sheet_name='fs_no_gs', startrow=17, index=False)
worksheet.write(16, 0, 'Min-Max Results')

fs_box_cox_result_df.to_excel(writer, sheet_name='fs_no_gs', startrow=33, index=False)
worksheet.write(32, 0, 'Box-Cox Results')

fs_standard_result_df.to_excel(writer, sheet_name='fs_no_gs', startrow=49, index=False)
worksheet.write(48, 0, 'Standard Scaled Results')

fs_squared_result_df.to_excel(writer, sheet_name='fs_no_gs', startrow=65, index=False)
worksheet.write(64, 0, 'Squared Results')


fs_raw_fold_result_df.to_excel(writer, sheet_name='raw_fold_results', index=False)
fs_min_max_fold_result_df.to_excel(writer, sheet_name='min_max_fold_results', index=False)
fs_box_cox_fold_result_df.to_excel(writer, sheet_name='box_cox_fold_results', index=False)
fs_standard_fold_result_df.to_excel(writer, sheet_name='standard_fold_results', index=False)
fs_squared_fold_result_df.to_excel(writer, sheet_name='squared_fold_results', index=False)

fs_raw_dss_df.to_excel(writer, sheet_name='raw_fold_results', startrow=45,index=False)
fs_min_max_dss_df.to_excel(writer, sheet_name='min_max_fold_results', startrow=45, index=False)
fs_box_cox_dss_df.to_excel(writer, sheet_name='box_cox_fold_results', startrow=45, index=False)
fs_standard_dss_df.to_excel(writer, sheet_name='standard_fold_results', startrow=45, index=False)
fs_squared_dss_df.to_excel(writer, sheet_name='squared_fold_results', startrow=45, index=False)

writer.close()

# Training on full data with Grid search

In [None]:
start_time = time.time()

gs_raw_df, gs_raw_fold_results_df, gs_raw_dss_df = nested_cv_tuning(
    X, y, models, param_grids,
    outer_splits=OUTER_SPLITS, inner_splits= INNER_SPLITS, random_state=RANDOM_STATE)

end_time = time.time()

print(f"Time taken: {end_time - start_time:.2f} seconds")


Nested CV for RF...
  Outer fold 1/5
    Fold 1 results: Acc=0.875, F1=0.806, AUC=0.949
  Outer fold 2/5
    Fold 2 results: Acc=0.880, F1=0.831, AUC=0.954
  Outer fold 3/5
    Fold 3 results: Acc=0.850, F1=0.783, AUC=0.935
  Outer fold 4/5
    Fold 4 results: Acc=0.860, F1=0.797, AUC=0.948
  Outer fold 5/5
    Fold 5 results: Acc=0.840, F1=0.775, AUC=0.924
Time taken: 192.19 seconds


In [None]:
gs_box_cox_df, gs_box_cox_fold_results_df, gs_box_cox_dss_df = nested_cv_tuning(
    box_cox_data, y, models, param_grids,
    outer_splits=OUTER_SPLITS, inner_splits=INNER_SPLITS,random_state=RANDOM_STATE)

In [None]:
gs_min_max_df, gs_min_max_fold_results_df, gs_min_max_dss_df = nested_cv_tuning(
    min_max_data, y, models, param_grids,
    outer_splits=OUTER_SPLITS, inner_splits=INNER_SPLITS, random_state=RANDOM_STATE)

In [None]:
gs_squared_df, gs_squared_fold_results_df, gs_squared_dss_df = nested_cv_tuning(
    squared_data, y, models, param_grids,
    outer_splits=OUTER_SPLITS, inner_splits=INNER_SPLITS, random_state=RANDOM_STATE)

In [None]:
gs_standard_df, gs_standard_fold_results_df, gs_standard_dss_df = nested_cv_tuning(
    standard_data, y, models, param_grids,
    outer_splits=OUTER_SPLITS, inner_splits=INNER_SPLITS, random_state=RANDOM_STATE)

In [None]:
writer = pd.ExcelWriter('/content/drive/MyDrive/Colab Notebooks/Smart Grid/Data/gs_raw_results.xlsx', engine='xlsxwriter')

gs_raw_df.to_excel(writer, sheet_name='summary', startrow=1, index=False)
worksheet = writer.sheets['summary']
worksheet.write(0, 0, 'Raw Results')

gs_min_max_df.to_excel(writer, sheet_name='summary', startrow=17, index=False)
worksheet.write(16, 0, 'Min-Max Results')

gs_box_cox_df.to_excel(writer, sheet_name='summary', startrow=33, index=False)
worksheet.write(32, 0, 'Box-Cox Results')

gs_standard_df.to_excel(writer, sheet_name='summary', startrow=49, index=False)
worksheet.write(48, 0, 'Standard Scaled Results')

gs_squared_df.to_excel(writer, sheet_name='summary', startrow=65, index=False)
worksheet.write(64, 0, 'Squared Results')

gs_raw_fold_results_df.to_excel(writer, sheet_name='raw_fold_results', index=False)
gs_min_max_fold_results_df.to_excel(writer, sheet_name='min_max_fold_results', index=False)
gs_box_cox_fold_results_df.to_excel(writer, sheet_name='box_cox_fold_results', index=False)
gs_standard_fold_results_df.to_excel(writer, sheet_name='standard_fold_results', index=False)
gs_squared_fold_results_df.to_excel(writer, sheet_name='squared_fold_results', index=False)

gs_raw_dss_df.to_excel(writer, sheet_name='raw_fold_results', startrow=45,index=False)
gs_min_max_dss_df.to_excel(writer, sheet_name='min_max_fold_results', startrow=45, index=False)
gs_box_cox_dss_df.to_excel(writer, sheet_name='box_cox_fold_results', startrow=45, index=False)
gs_standard_dss_df.to_excel(writer, sheet_name='standard_fold_results', startrow=45, index=False)
gs_squared_dss_df.to_excel(writer, sheet_name='squared_fold_results', startrow=45, index=False)

writer.close()

# Grid Search with selected features

In [None]:
gs_fs_raw_df, gs_fs_raw_fold_results_df, gs_fs_raw_dss_df = nested_cv_tuning(
    X[selected_features], y, models, param_grids,
    outer_splits=OUTER_SPLITS, inner_splits=INNER_SPLITS, random_state=RANDOM_STATE)


Nested CV for DT...
  Outer fold 1/5
    Fold 1 results: Acc=0.861, F1=0.812, AUC=0.892
  Outer fold 2/5
    Fold 2 results: Acc=0.860, F1=0.809, AUC=0.867
  Outer fold 3/5
    Fold 3 results: Acc=0.869, F1=0.817, AUC=0.856
  Outer fold 4/5
    Fold 4 results: Acc=0.864, F1=0.817, AUC=0.892
  Outer fold 5/5
    Fold 5 results: Acc=0.862, F1=0.814, AUC=0.885

Nested CV for GB...
  Outer fold 1/5
    Fold 1 results: Acc=0.940, F1=0.916, AUC=0.989
  Outer fold 2/5
    Fold 2 results: Acc=0.949, F1=0.929, AUC=0.990
  Outer fold 3/5
    Fold 3 results: Acc=0.948, F1=0.926, AUC=0.990
  Outer fold 4/5
    Fold 4 results: Acc=0.943, F1=0.920, AUC=0.989
  Outer fold 5/5
    Fold 5 results: Acc=0.947, F1=0.924, AUC=0.990

Nested CV for KNN...
  Outer fold 1/5
    Fold 1 results: Acc=0.830, F1=0.751, AUC=0.898
  Outer fold 2/5
    Fold 2 results: Acc=0.830, F1=0.754, AUC=0.899
  Outer fold 3/5
    Fold 3 results: Acc=0.847, F1=0.778, AUC=0.916
  Outer fold 4/5
    Fold 4 results: Acc=0.835, F1=0

In [None]:
gs_fs_box_cox_df, gs_fs_box_cox_fold_results_df, gs_fs_box_cox_dss_df = nested_cv_tuning(
    box_cox_data[selected_features], y, models, param_grids,
    outer_splits=OUTER_SPLITS, inner_splits=INNER_SPLITS, random_state=RANDOM_STATE)


Nested CV for DT...
  Outer fold 1/5
    Fold 1 results: Acc=0.861, F1=0.812, AUC=0.892
  Outer fold 2/5
    Fold 2 results: Acc=0.860, F1=0.809, AUC=0.867
  Outer fold 3/5
    Fold 3 results: Acc=0.869, F1=0.817, AUC=0.856
  Outer fold 4/5
    Fold 4 results: Acc=0.864, F1=0.817, AUC=0.892
  Outer fold 5/5
    Fold 5 results: Acc=0.862, F1=0.814, AUC=0.885

Nested CV for GB...
  Outer fold 1/5
    Fold 1 results: Acc=0.940, F1=0.916, AUC=0.989
  Outer fold 2/5
    Fold 2 results: Acc=0.949, F1=0.929, AUC=0.990
  Outer fold 3/5
    Fold 3 results: Acc=0.948, F1=0.926, AUC=0.990
  Outer fold 4/5
    Fold 4 results: Acc=0.943, F1=0.920, AUC=0.989
  Outer fold 5/5
    Fold 5 results: Acc=0.947, F1=0.924, AUC=0.990

Nested CV for KNN...
  Outer fold 1/5
    Fold 1 results: Acc=0.830, F1=0.751, AUC=0.898
  Outer fold 2/5
    Fold 2 results: Acc=0.830, F1=0.754, AUC=0.899
  Outer fold 3/5
    Fold 3 results: Acc=0.847, F1=0.778, AUC=0.916
  Outer fold 4/5
    Fold 4 results: Acc=0.835, F1=0

In [None]:
gs_fs_min_max_df, gs_fs_min_max_fold_results_df, gs_fs_min_max_dss_df = nested_cv_tuning(
    min_max_data[selected_features], y, models, param_grids,
    outer_splits=OUTER_SPLITS, inner_splits=INNER_SPLITS, random_state=RANDOM_STATE)


Nested CV for DT...
  Outer fold 1/5
    Fold 1 results: Acc=0.861, F1=0.812, AUC=0.892
  Outer fold 2/5
    Fold 2 results: Acc=0.860, F1=0.809, AUC=0.867
  Outer fold 3/5
    Fold 3 results: Acc=0.869, F1=0.817, AUC=0.856
  Outer fold 4/5
    Fold 4 results: Acc=0.864, F1=0.817, AUC=0.892
  Outer fold 5/5
    Fold 5 results: Acc=0.862, F1=0.814, AUC=0.885

Nested CV for GB...
  Outer fold 1/5
    Fold 1 results: Acc=0.940, F1=0.916, AUC=0.989
  Outer fold 2/5
    Fold 2 results: Acc=0.949, F1=0.929, AUC=0.990
  Outer fold 3/5
    Fold 3 results: Acc=0.948, F1=0.926, AUC=0.990
  Outer fold 4/5
    Fold 4 results: Acc=0.943, F1=0.920, AUC=0.989
  Outer fold 5/5
    Fold 5 results: Acc=0.947, F1=0.924, AUC=0.990

Nested CV for KNN...
  Outer fold 1/5
    Fold 1 results: Acc=0.902, F1=0.858, AUC=0.971
  Outer fold 2/5
    Fold 2 results: Acc=0.896, F1=0.849, AUC=0.965
  Outer fold 3/5
    Fold 3 results: Acc=0.904, F1=0.862, AUC=0.972
  Outer fold 4/5
    Fold 4 results: Acc=0.906, F1=0

In [None]:
gs_fs_squared_df, gs_fs_squared_fold_results_df, gs_fs_squared_dss_df = nested_cv_tuning(
    squared_data[selected_features], y, models, param_grids,
    outer_splits=OUTER_SPLITS, inner_splits=INNER_SPLITS, random_state=RANDOM_STATE)


Nested CV for DT...
  Outer fold 1/5
    Fold 1 results: Acc=0.862, F1=0.812, AUC=0.889
  Outer fold 2/5
    Fold 2 results: Acc=0.859, F1=0.809, AUC=0.866
  Outer fold 3/5
    Fold 3 results: Acc=0.859, F1=0.807, AUC=0.888
  Outer fold 4/5
    Fold 4 results: Acc=0.872, F1=0.822, AUC=0.891
  Outer fold 5/5
    Fold 5 results: Acc=0.862, F1=0.814, AUC=0.885

Nested CV for GB...
  Outer fold 1/5
    Fold 1 results: Acc=0.940, F1=0.916, AUC=0.989
  Outer fold 2/5
    Fold 2 results: Acc=0.949, F1=0.929, AUC=0.990
  Outer fold 3/5
    Fold 3 results: Acc=0.948, F1=0.926, AUC=0.990
  Outer fold 4/5
    Fold 4 results: Acc=0.943, F1=0.920, AUC=0.989
  Outer fold 5/5
    Fold 5 results: Acc=0.947, F1=0.924, AUC=0.990

Nested CV for KNN...
  Outer fold 1/5
    Fold 1 results: Acc=0.756, F1=0.654, AUC=0.802
  Outer fold 2/5
    Fold 2 results: Acc=0.756, F1=0.648, AUC=0.809
  Outer fold 3/5
    Fold 3 results: Acc=0.776, F1=0.681, AUC=0.825
  Outer fold 4/5
    Fold 4 results: Acc=0.774, F1=0

In [None]:
gs_fs_standard_df, gs_fs_standard_fold_results_df, gs_fs_standard_dss_df = nested_cv_tuning(
    standard_data[selected_features], y, models, param_grids,
    outer_splits=OUTER_SPLITS, inner_splits=INNER_SPLITS, random_state=RANDOM_STATE)


Nested CV for DT...
  Outer fold 1/5
    Fold 1 results: Acc=0.861, F1=0.812, AUC=0.892
  Outer fold 2/5
    Fold 2 results: Acc=0.860, F1=0.809, AUC=0.867
  Outer fold 3/5
    Fold 3 results: Acc=0.869, F1=0.817, AUC=0.856
  Outer fold 4/5
    Fold 4 results: Acc=0.864, F1=0.817, AUC=0.892
  Outer fold 5/5
    Fold 5 results: Acc=0.862, F1=0.814, AUC=0.885

Nested CV for GB...
  Outer fold 1/5
    Fold 1 results: Acc=0.940, F1=0.916, AUC=0.989
  Outer fold 2/5
    Fold 2 results: Acc=0.949, F1=0.929, AUC=0.990
  Outer fold 3/5
    Fold 3 results: Acc=0.948, F1=0.926, AUC=0.990
  Outer fold 4/5
    Fold 4 results: Acc=0.943, F1=0.920, AUC=0.989
  Outer fold 5/5
    Fold 5 results: Acc=0.947, F1=0.924, AUC=0.990

Nested CV for KNN...
  Outer fold 1/5
    Fold 1 results: Acc=0.902, F1=0.858, AUC=0.971
  Outer fold 2/5
    Fold 2 results: Acc=0.896, F1=0.849, AUC=0.965
  Outer fold 3/5
    Fold 3 results: Acc=0.904, F1=0.862, AUC=0.972
  Outer fold 4/5
    Fold 4 results: Acc=0.906, F1=0

In [None]:
writer = pd.ExcelWriter('/content/drive/MyDrive/Colab Notebooks/Smart Grid/Data/gs_fs_results.xlsx', engine='xlsxwriter')

gs_fs_raw_df.to_excel(writer, sheet_name='summary', startrow=1, index=False)
worksheet = writer.sheets['summary']
worksheet.write(0, 0, 'Raw Results')

gs_fs_min_max_df.to_excel(writer, sheet_name='summary', startrow=17, index=False)
worksheet.write(16, 0, 'Min-Max Results')

gs_fs_box_cox_df.to_excel(writer, sheet_name='summary', startrow=33, index=False)
worksheet.write(32, 0, 'Box-Cox Results')

gs_fs_standard_df.to_excel(writer, sheet_name='summary', startrow=49, index=False)
worksheet.write(48, 0, 'Standard Scaled Results')

gs_fs_squared_df.to_excel(writer, sheet_name='summary', startrow=65, index=False)
worksheet.write(64, 0, 'Squared Results')

gs_fs_raw_fold_results_df.to_excel(writer, sheet_name='raw_fold_results', index=False)
gs_fs_min_max_fold_results_df.to_excel(writer, sheet_name='min_max_fold_results', index=False)
gs_fs_box_cox_fold_results_df.to_excel(writer, sheet_name='box_cox_fold_results', index=False)
gs_fs_standard_fold_results_df.to_excel(writer, sheet_name='standard_fold_results', index=False)
gs_fs_squared_fold_results_df.to_excel(writer, sheet_name='squared_fold_results', index=False)

gs_fs_raw_dss_df.to_excel(writer, sheet_name='raw_fold_results', startrow=45,index=False)
gs_fs_min_max_dss_df.to_excel(writer, sheet_name='min_max_fold_results', startrow=45, index=False)
gs_fs_box_cox_dss_df.to_excel(writer, sheet_name='box_cox_fold_results', startrow=45, index=False)
gs_fs_standard_dss_df.to_excel(writer, sheet_name='standard_fold_results', startrow=45, index=False)
gs_fs_squared_dss_df.to_excel(writer, sheet_name='squared_fold_results', startrow=45, index=False)

writer.close()