In [7]:
import os
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# Create folder to save figures
os.makedirs("../outputs/figures", exist_ok=True)

# Load dataset
df = pd.read_csv("../data/raw/credit_score_data.csv")

# Optional: standardize column names
df.columns = df.columns.str.replace(" ", "_")

# Class distribution
plt.figure(figsize=(6,4))
sns.countplot(x="Credit_Score", data=df)
plt.title("Distribution of Credit Score")
plt.savefig("../outputs/figures/credit_score_distribution.png")
plt.close()  # Close the figure to avoid overlap

# Numeric correlation heatmap
plt.figure(figsize=(10,6))
sns.heatmap(df.select_dtypes(include=['number']).corr(), annot=True, cmap="coolwarm")
plt.title("Correlation Heatmap")
plt.savefig("../outputs/figures/correlation_heatmap.png")
plt.close()

# Income vs Credit Score
plt.figure(figsize=(8,5))
sns.boxplot(x="Credit_Score", y="Income", data=df)
plt.title("Income vs Credit Score")
plt.savefig("../outputs/figures/income_vs_credit_score.png")
plt.close()
