In [None]:
# Required Imports
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import classification_report, accuracy_score, precision_recall_curve, roc_auc_score, confusion_matrix
from sklearn.model_selection import train_test_split
from sklearn.ensemble import IsolationForest
from sklearn.svm import OneClassSVM
from imblearn.over_sampling import SMOTE
from pycaret.classification import *

# Mount Google Drive
from google.colab import drive
drive.mount('/content/drive')

# Load Data
df = pd.read_csv("/content/drive/MyDrive/creditcard.csv")

# Basic Data Info
print(df.head())
print(df.shape)
print(df.isnull().sum())

# Visualize Class Distribution
fraud_check = df['Class'].value_counts()
fraud_check.plot(kind='bar', rot=0, color='r')
plt.title("Normal and Fraud Distribution")
plt.xlabel("Class")
plt.ylabel("Frequency")
plt.xticks([0, 1], ['Normal', 'Fraud'])
plt.show()

# Split into Fraud and Normal
fraud_df = df[df['Class'] == 1]
normal_df = df[df['Class'] == 0]

# Amount Description
print(fraud_df['Amount'].describe())
print(normal_df['Amount'].describe())

# Amount Distribution
fig, (ax1, ax2) = plt.subplots(2, 1, sharex=True, figsize=(10,6))
fig.suptitle('Transaction Amount Distribution by Class')

ax1.hist(fraud_df['Amount'], bins=50)
ax1.set_title('Fraud Transactions')

ax2.hist(normal_df['Amount'], bins=50)
ax2.set_title('Normal Transactions')

plt.xlabel('Amount ($)')
plt.ylabel('Number of Transactions')
plt.yscale('log')
plt.show()

# Correlation Matrix
plt.figure(figsize=(20, 20))
sns.heatmap(df.corr(), annot=True, fmt='.2f', cmap='coolwarm')
plt.show()

# Feature and Target Definition
X = df.drop(columns=['Class'])
y = df['Class']

# Train-Test Split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42, stratify=y)

# Oversampling using SMOTE
sm = SMOTE(random_state=42)
X_res, y_res = sm.fit_resample(X_train, y_train)

# Isolation Forest Model
iso_forest = IsolationForest(n_estimators=100, max_samples='auto', contamination=0.172, random_state=42)
iso_forest.fit(X_res)

# Predictions
y_pred = iso_forest.predict(X_test)
y_pred = np.where(y_pred == 1, 0, 1)  # Reverse the anomaly indicator

# Evaluation
print("Isolation Forest")
print("Accuracy:", accuracy_score(y_test, y_pred))
print("ROC AUC Score:", roc_auc_score(y_test, y_pred))
print("Classification Report:\n", classification_report(y_test, y_pred))
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))

# One-Class SVM Model
svm = OneClassSVM(kernel='rbf', gamma=0.1, nu=0.05)
svm.fit(X_res)

# Predictions
y_pred_svm = svm.predict(X_test)
y_pred_svm = np.where(y_pred_svm == 1, 0, 1)  # Reverse the anomaly indicator

# Evaluation
print("One-Class SVM")
print("Accuracy:", accuracy_score(y_test, y_pred_svm))
print("ROC AUC Score:", roc_auc_score(y_test, y_pred_svm))
print("Classification Report:\n", classification_report(y_test, y_pred_svm))
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred_svm))

# PyCaret Model
df['Class'] = df['Class'].astype('int')
clf_setup = setup(data=df, target='Class', session_id=42, silent=True, log_experiment=True, experiment_name='credit_card_fraud')

# Compare Models
best_model = compare_models()

# Tune the Best Model
tuned_model = tune_model(best_model)

# Finalize Model
final_model = finalize_model(tuned_model)

# Predict on Test Set
pred_holdout = predict_model(final_model, data=X_test)
print(pred_holdout)

# Evaluate PyCaret Model
print("PyCaret Model")
print("Accuracy:", accuracy_score(y_test, pred_holdout['Label']))
print("ROC AUC Score:", roc_auc_score(y_test, pred_holdout['Label']))
print("Classification Report:\n", classification_report(y_test, pred_holdout['Label']))
print("Confusion Matrix:\n", confusion_matrix(y_test, pred_holdout['Label']))
