In [None]:
# 02_Preprocessing_FeatureEng.ipynb

# 📦 Import required libraries
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from scripts.feature_engineering import load_and_engineer

# 📁 Step 1: Run feature engineering pipeline
print("Running feature engineering script...")
df_fe = load_and_engineer()

# ✅ Step 2: Preview processed dataset
print("✅ Preview of feature-engineered dataset:")
display(df_fe.head())

# ✅ Step 3: Check data types and nulls
print("ℹ️ Dataset Info:")
display(df_fe.info())

print("🔍 Missing Values:")
display(df_fe.isna().sum().sort_values(ascending=False))

# ✅ Step 4: Summary statistics of engineered features
print("📊 Feature Summary:")
display(df_fe.describe())

# ✅ Step 5: Visualize correlations
plt.figure(figsize=(12, 8))
sns.heatmap(df_fe.corr(), cmap="coolwarm", annot=False)
plt.title("Feature Correlation Heatmap")
plt.tight_layout()
plt.savefig("../outputs/feature_correlation_heatmap.png")
plt.show()

# ✅ Step 6: Visualize Age and ChronicCount distributions
fig, ax = plt.subplots(1, 2, figsize=(14, 5))

sns.histplot(df_fe['Age'], kde=True, ax=ax[0], color='skyblue')
ax[0].set_title("Age Distribution")

sns.countplot(x='ChronicCount', data=df_fe, ax=ax[1], palette="Set2")
ax[1].set_title("Chronic Condition Count")

plt.tight_layout()
plt.savefig("../outputs/age_chronic_dist.png")
plt.show()

print("✅ Feature Engineering Notebook Completed.")
