In [None]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from config.settings import DATA_PATHS

# Load data with features
df = pd.read_parquet(DATA_PATHS['processed'])

# 1. Correlation analysis
numeric_cols = df.select_dtypes(include=['float64', 'int64']).columns
corr_matrix = df[numeric_cols].corr()

plt.figure(figsize=(15, 10))
sns.heatmap(corr_matrix, annot=True, cmap='coolwarm', center=0)
plt.title('Feature Correlation Matrix')
plt.show()

# 2. Feature distributions
plt.figure(figsize=(15, 10))

plt.subplot(2, 2, 1)
sns.boxplot(x='transaction_dow', y='amount', data=df)
plt.title('Transaction Amount by Day of Week')

plt.subplot(2, 2, 2)
sns.scatterplot(x='user_mean_amount', y='amount', data=df.sample(1000))
plt.title('User Mean Amount vs Transaction Amount')

plt.subplot(2, 2, 3)
sns.histplot(df['time_since_last_txn'].dropna(), bins=50)
plt.title('Time Since Last Transaction (hours)')

plt.subplot(2, 2, 4)
sns.boxplot(x='merchant_category', y='amount_zscore', data=df)
plt.xticks(rotation=45)
plt.title('Amount Z-Score by Merchant Category')

plt.tight_layout()
plt.show()