In [6]:
from src.data_loader import load_data
from src.preprocess import split_and_scale
from src.evaluate import evaluate_model
import pandas as pd
from openpyxl import load_workbook
from openpyxl.styles import Alignment, PatternFill
from src.model_zoo import model_module 


# ==== Load & Prepare Data ====

In [7]:

excel_path = "Data/Fraud Detection Dataset.xlsx"
X, y = load_data(excel_path, "Fraudulent")
x_train, x_test, y_train, y_test = split_and_scale(X, y,test_size=0.5)

# ==== Train Model ====

In [None]:
from src.model_zoo import model_module
from src. import ModelEvaluator

models = {
    "RF": model_module(
        name="RF",
        library="ensemble",
        function="RandomForestClassifier",
        attributes={
            "n_estimators": 200,
            "max_depth": 10,
            "min_samples_split": 10,
            "min_samples_leaf": 5,
            "max_features": "sqrt",
            "bootstrap": True,
            "random_state": 42
        }
    ),
    "ANN": model_module(
        name="ANN",
        library="neural_network",
        function="MLPClassifier",
        attributes={
            "hidden_layer_sizes": (100,),
            "max_iter": 300,
            "random_state": 42
        }
    ),
    "KNN": model_module(
        name="KNN",
        library="neighbors",
        function="KNeighborsClassifier",
        attributes={
            "n_neighbors": 8,
            "algorithm": "kd_tree",
            "leaf_size": 28
        }
    ),
    "DT": model_module(
        name="DT",
        library="tree",
        function="DecisionTreeClassifier",
        attributes={
            "max_depth": 20,
            "min_samples_split": 5,
            "criterion": "gini"
        }
    )
}



RF Evaluation:
[Train]  Accuracy: 0.9498 - Precision: 0.9527 - Recall: 0.9468 - F1: 0.9497
[Test]   Accuracy: 0.5105 - Precision: 0.4995 - Recall: 0.5000 - F1: 0.4997
[All]    Accuracy: 0.7301 - Precision: 0.7281 - Recall: 0.7262 - F1: 0.7271

ANN Evaluation:




[Train]  Accuracy: 0.6674 - Precision: 0.6663 - Recall: 0.6738 - F1: 0.6701
[Test]   Accuracy: 0.4982 - Precision: 0.4877 - Recall: 0.5210 - F1: 0.5038
[All]    Accuracy: 0.5828 - Precision: 0.5757 - Recall: 0.5983 - F1: 0.5868

KNN Evaluation:
[Train]  Accuracy: 0.6392 - Precision: 0.6861 - Recall: 0.5164 - F1: 0.5893
[Test]   Accuracy: 0.4962 - Precision: 0.4801 - Recall: 0.3658 - F1: 0.4152
[All]    Accuracy: 0.5677 - Precision: 0.5837 - Recall: 0.4420 - F1: 0.5031

DT Evaluation:
[Train]  Accuracy: 0.9339 - Precision: 0.9436 - Recall: 0.9233 - F1: 0.9333
[Test]   Accuracy: 0.4936 - Precision: 0.4825 - Recall: 0.4906 - F1: 0.4865
[All]    Accuracy: 0.7137 - Precision: 0.7115 - Recall: 0.7096 - F1: 0.7105



# ==== Create DataFrames for Predictions ====

In [1]:

df_train = pd.DataFrame({
    "y_train": y_train,
    "y_pred_train": results["y_pred_train"],
}).reset_index(drop=True)

df_test = pd.DataFrame({
    "y_test": y_test,
    "y_pred_test": results["y_pred_test"],
}).reset_index(drop=True)

df_combined = pd.concat([df_train, df_test], axis=1)


NameError: name 'pd' is not defined

# ==== Save Predictions to Excel ====

In [None]:

with pd.ExcelWriter(excel_path, engine="openpyxl", mode="a", if_sheet_exists="replace") as writer:
    df_combined.to_excel(writer, sheet_name="RF", index=False)


# ==== Write Metrics Next to Predictions ====

In [None]:

book = load_workbook(excel_path)
sheet = book["RF"]

metrics = {
    "train_accuracy": results["train_accuracy"],
    "test_accuracy": results["test_accuracy"],
    "precision": results["precision"],
    "recall": results["recall"],
}

start_col = sheet.max_column + 2
for i, (key, value) in enumerate(metrics.items()):
    sheet.cell(row=1, column=start_col + i, value=key).alignment = Alignment(horizontal="center", vertical="center")
    sheet.cell(row=2, column=start_col + i, value=value).alignment = Alignment(horizontal="center", vertical="center")


# ==== Style the Sheet ====

In [None]:

green_fill = PatternFill(start_color="00FF00", end_color="00FF00", fill_type="solid")
yellow_fill = PatternFill(start_color="FFFF00", end_color="FFFF00", fill_type="solid")
header = [cell.value for cell in sheet[1]]

for row in sheet.iter_rows():
    for i, cell in enumerate(row):
        cell.alignment = Alignment(horizontal="center", vertical="center")
        if cell.row == 1:
            cell.fill = green_fill
        elif header[i] and "pred" in str(header[i]).lower():
            cell.fill = yellow_fill

book.save(excel_path)