In [None]:
{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Fraud Detection EDA"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import numpy as np\n",
    "import matplotlib.pyplot as plt\n",
    "import seaborn as sns\n",
    "\n",
    "# Load data\n",
    "df = pd.read_csv('../data/raw_data.csv')\n",
    "\n",
    "# Clean data\n",
    "df['age'] = df['age'].str.replace(\"'\", \"\").astype(int)\n",
    "df['category'] = df['category'].str.replace(\"e$_\", \"\").str.replace(\"*\", \"\")\n",
    "\n",
    "# Basic info\n",
    "print(df.info())\n",
    "print(df.describe())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Fraud distribution\n",
    "plt.figure(figsize=(8, 5))\n",
    "sns.countplot(x='fraud', data=df)\n",
    "plt.title('Distribution of Fraudulent Transactions')\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Amount vs Fraud\n",
    "plt.figure(figsize=(10, 6))\n",
    "sns.boxplot(x='fraud', y='amount', data=df)\n",
    "plt.yscale('log')\n",
    "plt.title('Transaction Amount by Fraud Status')\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Category analysis\n",
    "plt.figure(figsize=(12, 6))\n",
    "sns.countplot(y='category', hue='fraud', data=df)\n",
    "plt.title('Transaction Category by Fraud Status')\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Correlation matrix\n",
    "df['same_zip'] = (df['zipcodeOn'] == df['zipMerchant']).astype(int)\n",
    "corr = df[['age', 'amount', 'same_zip', 'fraud']].corr()\n",
    "plt.figure(figsize=(8, 6))\n",
    "sns.heatmap(corr, annot=True, cmap='coolwarm')\n",
    "plt.title('Correlation Matrix')\n",
    "plt.show()"
   ]
  }
 ]
}