# Fraud Detection - Exploratory Data Analysis

This notebook explores the synthetic fraud detection dataset and provides insights into transaction patterns.

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from scipy import stats

# Set style
plt.style.use('default')
sns.set_palette('husl')

# Display options
pd.set_option('display.max_columns', None)
print('✅ Libraries imported successfully!')

In [None]:
# Generate data first
exec(open('../src/data_processing/generate_data.py').read())

# Load the dataset
df = pd.read_csv('../data/raw/fraud_data.csv')
print(f'Dataset loaded: {df.shape}')
df.head()

In [None]:
# Basic dataset information
print('=== DATASET OVERVIEW ===')
print(f'Shape: {df.shape}')
print(f'Memory usage: {df.memory_usage(deep=True).sum() / 1024**2:.2f} MB')
print('\n=== FRAUD STATISTICS ===')
print(f'Total transactions: {len(df):,}')
print(f'Fraud cases: {df["Class"].sum():,}')
print(f'Normal cases: {(df["Class"] == 0).sum():,}')
print(f'Fraud rate: {df["Class"].mean():.4f} ({df["Class"].mean()*100:.2f}%)')