In [None]:
# Step 1: Import libraries
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# Better plots
%matplotlib inline
sns.set(style="whitegrid")

# Step 2: Load cleaned data
df = pd.read_csv("../data/processed/final.csv")
df.head()  # Show first 5 rows

# Step 3: Basic info
print(df.info())
print(df.describe())

# Step 4: Check missing values
sns.heatmap(df.isnull(), cbar=False, cmap="viridis")
plt.title("Missing Values Heatmap")
plt.show()

# Step 5: Target column distribution (if exists)
if 'target' in df.columns:
    sns.countplot(df['target'])
    plt.title("Target Distribution")
    plt.show()

# Step 6: Numeric feature distributions
numeric_cols = df.select_dtypes(include=['int64','float64']).columns
df[numeric_cols].hist(figsize=(12,10), bins=20)
plt.suptitle("Numeric Feature Distributions")
plt.show()

# Step 7: Correlation matrix
plt.figure(figsize=(10,8))
sns.heatmap(df.corr(), annot=True, cmap="coolwarm")
plt.title("Feature Correlation Matrix")
plt.show()
