# Anomaly Detection with PyCaret
## Credit Card Fraud Detection

**Dataset:** Credit Card Fraud (284,807 transactions)
**Goal:** Detect fraudulent transactions
**Challenge:** Highly imbalanced (0.17% fraud)

In [None]:
!pip install pycaret[full] -q

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from pycaret.anomaly import *
import warnings
warnings.filterwarnings('ignore')
print('✓ Libraries imported')

In [None]:
# Using a sample dataset for demonstration
# For full dataset, download from Kaggle
url = 'https://raw.githubusercontent.com/nsethi31/Kaggle-Data-Credit-Card-Fraud-Detection/master/creditcard.csv'
df = pd.read_csv(url)
# Use sample for faster execution
df_sample = df.sample(n=10000, random_state=42)
print(f'Shape: {df_sample.shape}')
df_sample.head()

In [None]:
print('Class distribution:')
print(df_sample['Class'].value_counts())
print(f'\nFraud %: {df_sample["Class"].mean()*100:.2f}%')

In [None]:
# Remove target for unsupervised learning
df_features = df_sample.drop('Class', axis=1)
setup(data=df_features, session_id=42, normalize=True, verbose=True)

In [None]:
models()

In [None]:
iforest = create_model('iforest', fraction=0.01)

In [None]:
plot_model(iforest, plot='tsne')

In [None]:
plot_model(iforest, plot='umap')

In [None]:
results = assign_model(iforest)
results.head()

In [None]:
print('Anomaly distribution:')
print(results['Anomaly'].value_counts())
print(f'\nAnomalies detected: {(results["Anomaly"]==1).sum()}')

In [None]:
# Evaluate against actual labels
from sklearn.metrics import classification_report
print(classification_report(df_sample['Class'], results['Anomaly']))

In [None]:
save_model(iforest, 'fraud_detection')
print('✓ Model saved')

In [None]:
print('='*60)
print('ANOMALY DETECTION SUMMARY')
print('='*60)
print('Algorithm: Isolation Forest')
print('Anomalies: Detected successfully')
print('Use case: Fraud detection')
print('✓ COMPLETED')
print('='*60)