In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
import os

def create_visualizations(df, save_dir):
    # Ensure save directory exists
    os.makedirs(save_dir, exist_ok=True)

    sns.set_style("whitegrid")

    # Visualize numerical columns
    for col in df.select_dtypes(include=np.number).columns:
        plt.figure(figsize=(10, 6))
        sns.histplot(df[col], bins=50)   # removed kde=True, limited bins
        plt.title(f"Distribution of {col}")
        plt.savefig(os.path.join(save_dir, f"{col}_distribution.png"))
        plt.close()

    # Visualize categorical columns
    for col in df.select_dtypes(include=object).columns:
        plt.figure(figsize=(10, 6))
        sns.countplot(y=col, data=df, order=df[col].value_counts().index[:20])  # show top 20 categories
        plt.title(f"Distribution of {col}")
        plt.savefig(os.path.join(save_dir, f"{col}_distribution.png"))
        plt.close()

    # Spending patterns by merchant
    if 'merchant' in df.columns and 'amount' in df.columns:
        plt.figure(figsize=(12, 8))
        sns.boxplot(x='merchant', y='amount', data=df.sample(n=2000) if len(df) > 2000 else df)  # sample max 2000 rows
        plt.title("Spending Patterns by Merchant")
        plt.xticks(rotation=45)
        plt.savefig(os.path.join(save_dir, "spending_by_merchant.png"))
        plt.close()

    # Customer demographics: Age vs. Income
    if 'age' in df.columns and 'income' in df.columns and 'gender' in df.columns:
        plt.figure(figsize=(12, 8))
        sns.scatterplot(x='age', y='income', hue='gender',
                        data=df.sample(n=3000) if len(df) > 3000 else df, alpha=0.5)
        plt.title("Customer Demographics: Age vs. Income")
        plt.savefig(os.path.join(save_dir, "age_vs_income.png"))
        plt.close()

    # Correlation matrix
    plt.figure(figsize=(12, 10))
    corr = df.select_dtypes(include=np.number).corr()
    sns.heatmap(corr, cmap='coolwarm', fmt='.2f')  # removed annot=True for speed
    plt.title("Correlation Matrix")
    plt.savefig(os.path.join(save_dir, "correlation_matrix.png"))
    plt.close()


# ✅ Paths
file_path = r"C:\Users\ajroy\OneDrive\Desktop\mypython.py\git practice\credit-card-frauad-detection-project\data\credit_card_transactions.csv"
save_dir = r"C:\Users\ajroy\OneDrive\Desktop\mypython.py\git practice\credit-card-frauad-detection-project\docs"

# Load data & create visualizations
df = pd.read_csv(file_path)
create_visualizations(df, save_dir)

print("✅ Fast Visualizations created and saved inside 'credit/docs/' folder.")


✅ Fast Visualizations created and saved inside 'credit/docs/' folder.
