# Exploratory Data Analysis (EDA) - UPI Fraud Detection

This notebook analyzes the UPI transaction dataset to understand patterns, distributions, and the characteristics of fraudulent transactions.

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import yaml

# Load Config
with open('../07_configs/config.yaml', 'r') as f:
    config = yaml.safe_load(f)

# Load Data
df = pd.read_csv('../' + config['paths']['raw_data'])
df['Timestamp'] = pd.to_datetime(df['Timestamp'])
df.head()

## 1. Class Distribution
Checking the balance between normal and fraudulent transactions.

In [None]:
plt.figure(figsize=(6, 4))
sns.countplot(x='IsFraud', data=df)
plt.title('Class Distribution')
plt.show()

print(df['IsFraud'].value_counts(normalize=True))

## 2. Transaction Amount Analysis
Do fraudulent transactions have higher amounts?

In [None]:
plt.figure(figsize=(10, 6))
sns.boxplot(x='IsFraud', y='Amount', data=df)
plt.yscale('log')
plt.title('Transaction Amount by Class (Log Scale)')
plt.show()

## 3. Temporal Patterns
Are frauds more common at certain hours?

In [None]:
df['Hour'] = df['Timestamp'].dt.hour

plt.figure(figsize=(12, 6))
sns.histplot(data=df, x='Hour', hue='IsFraud', common_norm=False, stat='density', kde=True)
plt.title('Transaction Hour Distribution')
plt.show()