# ML Pipeline Test - Multiple Models Comparison
This notebook demonstrates the usage of our ML pipeline module with multiple models.

In [None]:
# Import required libraries
import pandas as pd
import numpy as np
from src.main import run_model_pipeline
import matplotlib.pyplot as plt
import seaborn as sns
from openpyxl import load_workbook
from openpyxl.styles import PatternFill, Alignment, Font
from src.Exel_Modules.excel_utils import save_predictions_and_metrics_to_excel



## Define Models
We'll test multiple models with different configurations.

# Define models to test

In [3]:

models = {
    'Random Forest': {
        'library': 'sklearn.ensemble',
        'function': 'RandomForestClassifier',
        'params': {'n_estimators': 100, 'max_depth': 10}
    },
    'Gradient Boosting': {
        'library': 'sklearn.ensemble',
        'function': 'GradientBoostingClassifier',
        'params': {'n_estimators': 100, 'learning_rate': 0.1}
    },
    'SVM': {
        'library': 'sklearn.svm',
        'function': 'SVC',
        'params': {'kernel': 'rbf', 'C': 1.0}
    },
    'Logistic Regression': {
        'library': 'sklearn.linear_model',
        'function': 'LogisticRegression',
        'params': {'max_iter': 1000}
    }
}

In [4]:


# Data parameters
DATA_PATH = "data/FraudDetectionDataset.xlsx"
TARGET_COLUMN = "Fraudulent"

# Store results for all models
all_results = {}

# Run pipeline for each model
for model_name, model_config in models.items():
    print(f"\nTraining {model_name}...")
    results = run_model_pipeline(
        data_path=DATA_PATH,
        target_column=TARGET_COLUMN,
        model_library=model_config['library'],
        model_function=model_config['function'],
        model_params=model_config['params']
    )
    all_results[model_name] = results
# print(all_results["Random Forest"])


Training Random Forest...
[Train] Accuracy: 0.9683 | Precision: 0.9702 | Recall: 0.9659 | f1_score: 0.9680
[Test ] Accuracy: 0.4712 | Precision: 0.4560 | Recall: 0.4500 | f1_score: 0.4530
[All  ] Accuracy: 0.8688 | Precision: 0.8698 | Recall: 0.8644 | f1_score: 0.8671

Training Gradient Boosting...
[Train] Accuracy: 0.7039 | Precision: 0.7113 | Recall: 0.6811 | f1_score: 0.6959
[Test ] Accuracy: 0.5083 | Precision: 0.4945 | Recall: 0.4737 | f1_score: 0.4839
[All  ] Accuracy: 0.6648 | Precision: 0.6686 | Recall: 0.6403 | f1_score: 0.6542

Training SVM...
[Train] Accuracy: 0.6302 | Precision: 0.6376 | Recall: 0.5941 | f1_score: 0.6151
[Test ] Accuracy: 0.4930 | Precision: 0.4781 | Recall: 0.4605 | f1_score: 0.4692
[All  ] Accuracy: 0.6028 | Precision: 0.6054 | Recall: 0.5678 | f1_score: 0.5860

Training Logistic Regression...
[Train] Accuracy: 0.5309 | Precision: 0.5299 | Recall: 0.5019 | f1_score: 0.5156
[Test ] Accuracy: 0.5122 | Precision: 0.4986 | Recall: 0.4684 | f1_score: 0.4830
[

# ==== Save Predictions to Excel ====

In [None]:


# Call the function
save_predictions_and_metrics_to_excel(
    results_dict=all_results,
    model_name="Random Forest",
    excel_path="data/FraudDetectionDataset.xlsx"
)


In [1]:
import numpy as np

def initialize_population(num_agents, dim, lower_bound, upper_bound):
    """
    مقداردهی اولیه جمعیت با تعداد عامل‌ها، ابعاد و حدود مشخص

    Args:
        num_agents (int): تعداد اسب‌های آبی (عامل‌ها)
        dim (int): تعداد ابعاد فضای جست‌وجو
        lower_bound (float or array): حد پایین هر بعد
        upper_bound (float or array): حد بالا هر بعد

    Returns:
        np.ndarray: ماتریسی با شکل (num_agents, dim) از جمعیت اولیه
    """
    population = np.random.uniform(low=lower_bound, high=upper_bound, size=(num_agents, dim))
    return population


In [2]:
# پارامترهای آزمایشی
num_agents = 5
dimensions = 3
LB = 0
UB = 100

pop = initialize_population(num_agents, dimensions, LB, UB)
print(pop)


[[6.76538703e+01 7.48691226e-01 4.17337993e-02]
 [1.12075240e+01 2.74188889e+01 8.91410909e+01]
 [9.40567652e+01 6.34153036e+01 1.01973438e+01]
 [4.33876012e+01 2.15478017e+00 3.12457283e+01]
 [4.18118057e+01 6.12047321e+01 3.37693500e+01]]


In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import classification_report
from sklearn.model_selection import RandomizedSearchCV
from scipy.stats import uniform, randint

# مسیر فایل
DATA_PATH = "data/FraudDetectionDataset.xlsx"

# 1. خواندن داده
df = pd.read_excel(DATA_PATH)
X = df.drop(columns=['Fraudulent'])
y = df['Fraudulent']

# 2. پیش‌پردازش
X = pd.get_dummies(X)  # تبدیل داده‌های غیرعددی به عددی
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# 3. تقسیم‌بندی داده‌ها
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

# 4. تعریف مدل MLP
mlp = MLPClassifier(max_iter=500, random_state=42)

# 5. تعریف فضای جستجوی ابرپارامترها
param_dist = {
    'hidden_layer_sizes': [(50,), (100,), (100, 50), (150, 100)],
    'activation': ['relu', 'tanh', 'logistic'],
    'solver': ['adam', 'sgd'],
    'alpha': uniform(0.0001, 0.01),
    'learning_rate': ['constant', 'adaptive']
}

# 6. اجرای RandomizedSearchCV
random_search = RandomizedSearchCV(mlp, param_distributions=param_dist,
                                   n_iter=20, cv=3, scoring='accuracy', random_state=42, n_jobs=-1)
random_search.fit(X_train, y_train)

# 7. ارزیابی مدل
best_model = random_search.best_estimator_
y_pred = best_model.predict(X_test)
print("Best Parameters:", random_search.best_params_)
print("\nClassification Report:")
print(classification_report(y_test, y_pred))


Best Parameters: {'activation': 'logistic', 'alpha': np.float64(0.006199966577826209), 'hidden_layer_sizes': (150, 100), 'learning_rate': 'constant', 'solver': 'adam'}

Classification Report:
              precision    recall  f1-score   support

           0       0.53      0.64      0.58       401
           1       0.51      0.40      0.45       380

    accuracy                           0.52       781
   macro avg       0.52      0.52      0.51       781
weighted avg       0.52      0.52      0.52       781



: 

In [4]:
import pandas as pd

DATA_PATH = "data/FraudDetectionDataset.xlsx"
df = pd.read_excel(DATA_PATH)

print("ستون‌های موجود در فایل:")
print(df.columns)


ستون‌های موجود در فایل:
Index(['Transaction_ID', 'User_ID', 'Transaction_Amount', 'Transaction_Type',
       'Time_of_Transaction', 'Device_Used', 'Location',
       'Previous_Fraudulent_Transactions', 'Account_Age',
       'Number_of_Transactions_Last_24H', 'Payment_Method', 'Fraudulent'],
      dtype='object')
