In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.datasets import fetch_california_housing

# Load the dataset
california_housing = fetch_california_housing()
housing_data = pd.DataFrame(california_housing.data, columns=california_housing.feature_names)
housing_data['target'] = california_housing.target  # Add the target variable (median house value)

# Exploratory Data Analysis (EDA)
print(housing_data.head())  # Preview the first few rows
print(housing_data.info())  # Get information about the dataset
print(housing_data.describe())  # Get summary statistics

# Visualizations
# Scatter plot: Median House Value vs. Median Income
plt.figure(figsize=(10, 6))
sns.scatterplot(x='MedInc', y='target', data=housing_data)
plt.title('Median House Value vs. Median Income')
plt.xlabel('Median Income')
plt.ylabel('Median House Value')
plt.show()

# Pair plot: Explore relationships between numeric features
sns.pairplot(housing_data)
plt.show()

# Box plot: Median House Value by House Age
plt.figure(figsize=(10, 6))
sns.boxplot(x='HouseAge', y='target', data=housing_data)
plt.title('Median House Value by House Age')
plt.xlabel('House Age')
plt.ylabel('Median House Value')
plt.show()

# Heatmap: Correlation between numeric features
numeric_features = housing_data.select_dtypes(include=[float, int]).columns
corr_matrix = housing_data[numeric_features].corr()
plt.figure(figsize=(10, 8))
sns.heatmap(corr_matrix, annot=True, cmap='coolwarm')
plt.title('Correlation Matrix')
plt.show()

# Statistical summary
print('Summary Statistics:')
print(housing_data.describe())