[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/googlecolab/colab-samples/blob/main/notebooks/basic_notebook_features/text_cells.ipynb)




In [None]:
# Install PyCaret from GitHub master branch
!pip install git+https://github.com/pycaret/pycaret.git@master --upgrade -q

In [None]:
# Load credit card dataset for anomaly detection
import pandas as pd
import warnings
warnings.filterwarnings('ignore')

# Load a sample of credit card transactions
url = 'https://raw.githubusercontent.com/nsethi31/Kaggle-Data-Credit-Card-Fraud-Detection/master/creditcard.csv'
df_full = pd.read_csv(url)

# Use a smaller sample for faster processing (2000 transactions)
df = df_full.sample(n=2000, random_state=42).reset_index(drop=True)

# Drop 'Class' column (fraud label) since anomaly detection is unsupervised
# Keep it separately to validate later
fraud_labels = df['Class'].copy()
df = df.drop(columns=['Class', 'Time'])  # Remove Class and Time

print(f"‚úÖ Credit Card Dataset loaded: {df.shape}")
print(f"Features: {len(df.columns)} transaction features (V1-V28, Amount)")
print(f"\nüîç Known frauds in sample: {fraud_labels.sum()} out of {len(df)}")
df.head()

In [None]:
# Quick exploration of transaction data
print("üìä Transaction Statistics:")
print(df.describe())

print(f"\nüí° Dataset Info:")
print(f"   ‚Ä¢ Total Transactions: {len(df)}")
print(f"   ‚Ä¢ Features: V1-V28 (PCA components) + Amount")
print(f"   ‚Ä¢ Task: Detect fraudulent transactions (anomalies)")

In [None]:
from pycaret.anomaly import *

# Initialize anomaly detection environment
anom_exp = setup(
    data=df,
    session_id=999,
    normalize=True,
    pca=True,
    pca_components=10,
    verbose=False
)

In [None]:
# Check if GPU is available
!nvidia-smi --query-gpu=name --format=csv,noheader

In [None]:
# Create Isolation Forest model for anomaly detection
print("ü§ñ Creating Isolation Forest model...")
iforest_model = create_model('iforest', fraction=0.05)
print("‚úÖ Isolation Forest created!")
print(f"\nModel will flag approximately {0.05*100}% of data as anomalies")

In [None]:
# Assign anomaly labels to transactions
anomaly_results = assign_model(iforest_model)

# Count detected anomalies
n_anomalies = (anomaly_results['Anomaly'] == 1).sum()
n_normal = (anomaly_results['Anomaly'] == 0).sum()

print(f"‚úÖ Anomaly Detection Complete!")
print(f"\nüìä Results:")
print(f"   ‚Ä¢ Normal Transactions: {n_normal}")
print(f"   ‚Ä¢ Anomalies Detected: {n_anomalies}")
print(f"   ‚Ä¢ Anomaly Rate: {(n_anomalies/len(anomaly_results)*100):.2f}%")

print("\nüîç Sample of detected anomalies:")
anomaly_results[anomaly_results['Anomaly'] == 1].head(5)

In [None]:
# Visualize anomalies using t-SNE
print("üìà Visualizing anomalies in 2D space...")
plot_model(iforest_model, plot='tsne')

In [None]:
# Compare detected anomalies with actual fraud labels
anomaly_results['True_Fraud'] = fraud_labels.values

print("üéØ Validation Against Known Fraud:")
print(f"\nDetected as Anomaly & Actually Fraud: {((anomaly_results['Anomaly'] == 1) & (anomaly_results['True_Fraud'] == 1)).sum()}")
print(f"Detected as Anomaly but Normal: {((anomaly_results['Anomaly'] == 1) & (anomaly_results['True_Fraud'] == 0)).sum()}")
print(f"Missed Frauds (detected as normal): {((anomaly_results['Anomaly'] == 0) & (anomaly_results['True_Fraud'] == 1)).sum()}")

# Calculate detection rate
fraud_detection_rate = ((anomaly_results['Anomaly'] == 1) & (anomaly_results['True_Fraud'] == 1)).sum() / fraud_labels.sum()
print(f"\n‚úÖ Fraud Detection Rate: {fraud_detection_rate*100:.1f}%")

In [None]:
# Try alternative anomaly detection algorithm
print("üß© Creating KNN-based anomaly detector...")
knn_model = create_model('knn')

knn_results = assign_model(knn_model)
n_knn_anomalies = (knn_results['Anomaly'] == 1).sum()

print(f"\nKNN detected {n_knn_anomalies} anomalies")
plot_model(knn_model, plot='tsne')

In [None]:
# Test on new unseen transactions
print("üîÆ Testing on new transactions...")
new_transactions = df.sample(10, random_state=123)
predictions = predict_model(iforest_model, data=new_transactions)

print(f"\nüìä Predictions for 10 new transactions:")
print(f"   Normal: {(predictions['Anomaly'] == 0).sum()}")
print(f"   Suspicious: {(predictions['Anomaly'] == 1).sum()}")

predictions[['Amount', 'Anomaly', 'Anomaly_Score']]