In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.datasets import fetch_california_housing

# Load the California Housing dataset
housing_data = fetch_california_housing(as_frame=True)
data = housing_data.frame

# Show first few rows
print(data.head())

# Fetch numerical columns
numerical_features = data.select_dtypes(include=['float64', 'int64']).columns
print(f"Numerical features: {list(numerical_features)}")

# Create histograms for each numerical feature
for feature in numerical_features:
    plt.figure(figsize=(8, 4))
    plt.hist(data[feature], bins=30, color='skyblue', edgecolor='black')
    plt.title(f"Histogram of {feature}")
    plt.xlabel(feature)
    plt.ylabel("Frequency")
    plt.grid(True)
    plt.tight_layout()
    plt.show()

# Create box plots for each numerical feature and identify outliers
for feature in numerical_features:
    plt.figure(figsize=(8, 4))
    sns.boxplot(x=data[feature], color='orange')
    plt.title(f"Box Plot of {feature}")
    plt.xlabel(feature)
    plt.tight_layout()
    plt.show()

    # Outlier detection using IQR
    Q1 = data[feature].quantile(0.25)
    Q3 = data[feature].quantile(0.75)
    IQR = Q3 - Q1
    outliers = data[(data[feature] < (Q1 - 1.5 * IQR)) | (data[feature] > (Q3 + 1.5 * IQR))]

    print(f"\nOutliers detected in '{feature}': {len(outliers)}")

