# California Housing Price Prediction
**Author:** Katie McGaughey
**Date:** 3-15-2025
**Objective:** Predict the median house price in California using available housing features.

In [None]:
# Import necessary libraries
import pandas as pd  # For data manipulation
import numpy as np  # For numerical computations
import matplotlib.pyplot as plt  # For static visualizations
import seaborn as sns  # For statistical data visualization

# Import the California housing dataset from sklearn
from sklearn.datasets import fetch_california_housing
from sklearn.model_selection import train_test_split  # Splitting data
from sklearn.linear_model import LinearRegression  # Linear regression model
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score  # Model evaluation

In [None]:
# Histograms of numerical features
data_frame.hist(bins=30, figsize=(12, 8))
plt.suptitle("Feature Distributions")
plt.show()

# Box plots for each numerical feature
plt.figure(figsize=(12, 8))
data_frame.boxplot(rot=45)
plt.title("Feature Boxplots")
plt.show()

# Correlation heatmap
plt.figure(figsize=(10, 6))
sns.heatmap(data_frame.corr(), annot=True, cmap='coolwarm', fmt='.2f')
plt.title("Feature Correlation Heatmap")
plt.show()

# Scatter plot of selected features
plt.figure(figsize=(8, 6))
sns.scatterplot(x=data_frame['MedInc'], y=data_frame['MedHouseVal'], alpha=0.5)
plt.xlabel('Median Income')
plt.ylabel('Median House Value')
plt.title("Median Income vs. House Value")
plt.show()

In [None]:
# Scatter plot of actual vs predicted values
plt.figure(figsize=(8, 6))
sns.scatterplot(x=y_test, y=y_pred, alpha=0.5)
plt.xlabel('Actual House Values')
plt.ylabel('Predicted House Values')
plt.title("Actual vs Predicted House Values")
plt.show()

# Residual plot
residuals = y_test - y_pred
plt.figure(figsize=(8, 6))
sns.histplot(residuals, bins=30, kde=True)
plt.xlabel('Residuals')
plt.title("Residual Distribution")
plt.show()