In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import IsolationForest
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

# Load the dataset
data = pd.read_csv('creditcard.csv')

# Data preprocessing
X = data.drop(columns=['Class'])  # Features
y = data['Class']  # Class labels

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Feature scaling
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Create the Isolation Forest model
if_model = IsolationForest(contamination=0.01, random_state=42)

# Train the model on the training set
if_model.fit(X_train_scaled)

# Predict anomalies on the test set
y_pred_if = if_model.predict(X_test_scaled)

# Convert the predicted values to binary (1 for anomaly, -1 for normal)
y_pred_if_binary = [1 if pred == -1 else 0 for pred in y_pred_if]

# Calculate evaluation metrics for Isolation Forest
accuracy_if = accuracy_score(y_test, y_pred_if_binary)
precision_if = precision_score(y_test, y_pred_if_binary)
recall_if = recall_score(y_test, y_pred_if_binary)
f1_if = f1_score(y_test, y_pred_if_binary)

# Print the evaluation metrics for Isolation Forest
print("Isolation Forest Results:")
print(f"Accuracy: {accuracy_if}")
print(f"Precision: {precision_if}")
print(f"Recall: {recall_if}")
print(f"F1-score: {f1_if}")


Isolation Forest Results:
Accuracy: 0.9904731809510434
Precision: 0.09738717339667459
Recall: 0.6029411764705882
F1-score: 0.16768916155419222
