# Libraries

In [74]:
import pandas as pd
from sklearn.ensemble import RandomForestClassifier
import warnings
warnings.filterwarnings('ignore')
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix
import json

# Loading the processed Data

In [50]:
X_train = pd.read_csv('/Users/raya/Desktop/fraud-detection/european-dataset/data/processed/X_train.csv')
X_test = pd.read_csv('/Users/raya/Desktop/fraud-detection/european-dataset/data/processed/X_test.csv')
y_train = pd.read_csv('/Users/raya/Desktop/fraud-detection/european-dataset/data/processed/y_train.csv')['Class']
y_test = pd.read_csv('/Users/raya/Desktop/fraud-detection/european-dataset/data/processed/y_test.csv')['Class']


In [51]:
y_train = y_train.ravel()
y_test = y_test.ravel()

# Training

In [52]:
model = RandomForestClassifier(random_state=42)

In [53]:
param_dist = {
    'n_estimators': [50, 100, 200, 500],
    'max_depth': [None, 10, 20, 30],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 4],
    'max_features': ['auto', 'sqrt', 'log2']
}

In [54]:
from sklearn.model_selection import RandomizedSearchCV

random_search = RandomizedSearchCV(
    estimator=model,
    param_distributions=param_dist,
    n_iter=20,
    cv=5,
    random_state=42,
    n_jobs=-1
)

In [55]:
random_search.fit(X_train, y_train)

In [56]:
best_model = RandomForestClassifier(**random_search.best_params_, random_state=42)
best_model.fit(X_train, y_train)

In [66]:
y_pred = best_model.predict(X_test)

# Evaluation

In [67]:
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred)
tn, fp, fn, tp = confusion_matrix(y_test, y_pred).ravel()

In [76]:
metrics = {
    'Accuracy': float(accuracy),
    'Precision': float(precision),
    'Recall': float(recall),
    'F1-score': float(f1),
    'True Positives': float(tp),
    'False Positives': float(fp)
}
with open('/Users/raya/Desktop/fraud-detection/european-dataset/reports/random_forest/metrics.json', 'w') as f:
    json.dump(metrics, f,indent=4)

pd.DataFrame({'Actual': y_test, 'Predicted': y_pred}).head(30).to_csv('/Users/raya/Desktop/fraud-detection/european-dataset/reports/random_forest/results.csv')