In [3]:
import pandas as pd
import numpy as np
from sklearn.ensemble import IsolationForest
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, accuracy_score, f1_score, precision_recall_curve, auc
from xgboost import XGBClassifier

# Load dataset
df = pd.read_csv("Train.csv")  # Replace with your actual dataset

# Split target and features
y = df.iloc[:, 0]   # First column is the target variable
X = df.iloc[:, 1:]  # Remaining columns are features

# Train-Test Split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, stratify=y, random_state=42)

# Implementing the specific Isolation Forest model
iso_forest = IsolationForest(
    n_estimators=310,
    contamination=0.08362674314960916,
    random_state=42
)
iso_forest.fit(X_train)

# Generate Anomaly Scores as a New Feature
X_train["anomaly_score"] = iso_forest.decision_function(X_train)
X_test["anomaly_score"] = iso_forest.decision_function(X_test)

# Implementing the specific XGBoost model
xgb_model = XGBClassifier(
    scale_pos_weight=(y_train.value_counts()[0] / y_train.value_counts()[1]),
    eval_metric="logloss",
    learning_rate=0.06784517813469125,
    max_depth=7,
    n_estimators=995,
    random_state=42
)
xgb_model.fit(X_train, y_train)

# Predict Probabilities
y_pred_prob = xgb_model.predict_proba(X_test)[:, 1]

# Adjust Decision Threshold
threshold = 0.4  # Can be tuned further
y_pred = (y_pred_prob >= threshold).astype(int)

# Evaluation
accuracy = accuracy_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred)
precision, recall, _ = precision_recall_curve(y_test, y_pred_prob)
pr_auc = auc(recall, precision)

# Print Results
print(f"Accuracy: {accuracy:.4f}")
print(f"F1 Score: {f1:.4f}")
print(f"Precision-Recall AUC: {pr_auc:.4f}")
print("\nClassification Report:")
print(classification_report(y_test, y_pred))

Accuracy: 0.9808
F1 Score: 0.6038
Precision-Recall AUC: 0.6078

Classification Report:
              precision    recall  f1-score   support

           0       0.99      0.99      0.99      1060
           1       0.73      0.52      0.60        31

    accuracy                           0.98      1091
   macro avg       0.86      0.76      0.80      1091
weighted avg       0.98      0.98      0.98      1091

