In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score
from scipy.special import gamma
import random

# Load data function
file_path = "data/FraudDetectionDataset.xlsx"

def load_data(file_path):
    # Load data from Excel
    data = pd.read_excel(file_path)
    
    # Columns to be used for features (excluding the 'Fraudulent' target column)
    X = data.drop(columns='Fraudulent')
    y = data['Fraudulent']  # The target column is 'Fraudulent'
    
    return X, y

# HPO-related Levy function
def levy(n, m, beta=1.5):
    num = gamma(1 + beta) * np.sin(np.pi * beta / 2)
    den = gamma((1 + beta) / 2) * beta * 2 ** ((beta - 1) / 2)
    sigma_u = (num / den) ** (1 / beta)
    u = np.random.normal(0, sigma_u, size=(n, m))
    v = np.random.normal(0, 1, size=(n, m))
    step = u / (np.abs(v) ** (1 / beta))
    return step

# Hippopotamus Optimizer (HPO) for model hyperparameter tuning
def HPO(SearchAgents, Max_iterations, lowerbound, upperbound, dimension, model, X_train, y_train, X_test, y_test):
    # Fitness function defined inside HPO
    def fitness(params):
        # Set the hyperparameters for the model
        model.set_params(n_estimators=int(params[0]))  # Example for RandomForestClassifier
        # Train and evaluate the model
        accuracy = evaluate_model(model, X_train, y_train, X_test, y_test)
        # Return 1 - accuracy to minimize
        return 1 - accuracy

    # HPO code starts here:
    if np.isscalar(lowerbound):
        lowerbound = np.ones(dimension) * lowerbound
    if np.isscalar(upperbound):
        upperbound = np.ones(dimension) * upperbound

    # Initialization
    X = lowerbound + np.random.rand(SearchAgents, dimension) * (upperbound - lowerbound)
    fit = np.array([fitness(ind) for ind in X])  # Use the fitness function here

    best_so_far = np.zeros(Max_iterations)

    for t in range(Max_iterations):
        best_idx = np.argmin(fit)
        best = fit[best_idx]

        if t == 0:
            Xbest = X[best_idx].copy()
            fbest = best
        elif best < fbest:
            fbest = best
            Xbest = X[best_idx].copy()

        # HPO algorithm continues here...

    return fbest, Xbest, best_so_far

# Function to evaluate a model's performance
def evaluate_model(model, X_train, y_train, X_test, y_test):
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred)
    return accuracy

# Main Script
if __name__ == "__main__":
    # Load the dataset
    X, y = load_data(file_path)

    # Split data into training and testing sets
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

    # Define hyperparameter ranges for different models (this is just an example, modify as needed)
    rf_param_range = [10, 100]  # for n_estimators
    ann_param_range = [5, 200]  # for hidden_layer_sizes
    nb_param_range = [0.1, 2.0]  # for var_smoothing
    dt_param_range = [1, 30]  # for max_depth

    # Define model and hyperparameters here
    model = RandomForestClassifier()  # You can change this to any model like MLPClassifier, etc.

    # Run HPO for the selected model
    best_accuracy, best_params, _ = HPO(
        SearchAgents=5, Max_iterations=10,
        lowerbound=rf_param_range[0], upperbound=rf_param_range[1],
        dimension=1, model=model,
        X_train=X_train, y_train=y_train, X_test=X_test, y_test=y_test
    )

    print(f"Best Parameters for {model.__class__.__name__}: {best_params}")
    print(f"Best Accuracy: {1 - best_accuracy}")


Best Parameters for RandomForestClassifier: [98.83715726]
Best Accuracy: 0.4942381562099871
