# EDA: Credit Card Fraud Detection
This notebook contains exploratory data analysis of the credit card transaction data for fraud detection.

In [None]:
# Import required libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from pathlib import Path

# Set style
sns.set_style('whitegrid')
%matplotlib inline

In [None]:
# Load the data
data_dir = Path('../data/raw')
credit_data = pd.read_csv(data_dir / 'creditcard.csv')

# Display basic info
print("Credit Card Data Shape:", credit_data.shape)
print("\nCredit Card Data Info:")
credit_data.info()

In [None]:
# Basic statistics
credit_data.describe()

In [None]:
# Check for missing values
credit_data.isnull().sum()

In [None]:
# Class distribution
class_dist = credit_data['Class'].value_counts(normalize=True)
plt.figure(figsize=(8, 6))
sns.barplot(x=class_dist.index, y=class_dist.values)
plt.title('Class Distribution')
plt.xlabel('Class (0: Legitimate, 1: Fraudulent)')
plt.ylabel('Percentage')
plt.show()

In [None]:
# Distribution of transaction amounts
plt.figure(figsize=(10, 6))
sns.histplot(credit_data[credit_data['Class'] == 0]['Amount'], bins=50, label='Legitimate', alpha=0.5, color='blue')
sns.histplot(credit_data[credit_data['Class'] == 1]['Amount'], bins=50, label='Fraudulent', alpha=0.5, color='red')
plt.title('Transaction Amount Distribution by Class')
plt.xlabel('Amount')
plt.legend()
plt.yscale('log')
plt.show()