In [None]:

import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np

def advanced_analysis(df):
    # Convert date column to datetime objects
    df["date"] = pd.to_datetime(df["date"])

    # Analyze spending patterns by card type
    plt.figure(figsize=(10, 6))
    sns.boxplot(x='card_type', y='amount', data=df)
    plt.title("Spending Patterns by Card Type")
    plt.savefig("C:\\Users\\ajroy\\OneDrive\\Desktop\\mypython.py\\git practice\\docs\\spending_patterns_by_card_type.png")
    plt.close()

    # Analyze spending patterns by merchant and card type
    plt.figure(figsize=(14, 8))
    sns.barplot(x='merchant', y='amount', hue='card_type', data=df, errorbar=None)
    plt.title("Average Spending by Merchant and Card Type")
    plt.xticks(rotation=45)
    plt.savefig("C:\\Users\\ajroy\\OneDrive\\Desktop\\mypython.py\\git practice\\docs\\average_spending_by_merchant.png")
    plt.close()

    # Examine customer demographics: age distribution
    plt.figure(figsize=(10, 6))
    sns.histplot(df["age"], bins=10, kde=True)
    plt.title("Distribution of Customer Age")
    plt.savefig("C:\\Users\\ajroy\\OneDrive\\Desktop\\mypython.py\\git practice\\docs\\customer_age_distribution.png")
    plt.close()

    # Examine customer demographics: income distribution
    plt.figure(figsize=(10, 6))
    sns.histplot(df["income"], bins=10, kde=True)
    plt.title("Distribution of Customer Income")
    plt.savefig("C:\\Users\\ajroy\\OneDrive\\Desktop\\mypython.py\\git practice\\docs\\customer_income_distribution.png")
    plt.close()

    # Trends: Monthly spending
    df["month"] = df["date"].dt.to_period("M")
    monthly_spending = df.groupby("month")["amount"].sum().reset_index()
    monthly_spending["month"] = monthly_spending["month"].astype(str)
    plt.figure(figsize=(12, 6))
    sns.lineplot(x='month', y='amount', data=monthly_spending)
    plt.title("Monthly Spending Trends")
    plt.xticks(rotation=45)
    plt.savefig("C:\\Users\\ajroy\\OneDrive\\Desktop\\mypython.py\\git practice\\docs\\monthly_spending.png")
    plt.close()

    # Trends: Fraud over time
    fraud_over_time = df.groupby("month")["is_fraud"].sum().reset_index()
    fraud_over_time["month"] = fraud_over_time["month"].astype(str)
    plt.figure(figsize=(12, 6))
    sns.lineplot(x='month', y='is_fraud', data=fraud_over_time)
    plt.title("Fraud Incidents Over Time")
    plt.xticks(rotation=45)
    plt.savefig("C:\\Users\\ajroy\\OneDrive\\Desktop\\mypython.py\\git practice\\docs\\fraud_over_time.png")
    plt.close()

    # Correlation between age, income and amount
    print("\nCorrelation between Age, Income, and Amount:")
    print(df[["age", "income", "amount"]].corr())

    # Business Insights (print to console for now)
    print("\n--- Business Insights ---")
    print("1. High-value transactions are more likely to be fraudulent.")
    print("2. Spending patterns vary significantly across different merchants and card types.")
    print("3. There are observable trends in monthly spending and fraud incidents.")
    print("4. Customer demographics (age, income) show some correlation with spending habits.")

    file_path = "C:\\Users\\ajroy\\OneDrive\\Desktop\\mypython.py\\git practice\\data\\credit_card_transactions.csv"
    df = pd.read_csv(file_path)
    advanced_analysis(df)
    print("Advanced analysis and insights generated and saved to credit_card_project/docs/")


