In [None]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import fetch_california_housing
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import train_test_split

In [None]:


# Load the dataset
california = fetch_california_housing()
X = california.data[:, 0].reshape(-1, 1)  # Use 'MedInc' (median income) as the feature
y = california.target  # Target variable (house prices)

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Initialize and train the linear regression model
model = LinearRegression()
model.fit(X_train, y_train)

# Make predictions
y_pred = model.predict(X_test)

# Calculate Mean Squared Error
mse = mean_squared_error(y_test, y_pred)
print(f"Mean Squared Error: {mse:.2f}")

# Visualization
plt.figure(figsize=(10, 6))

# Scatter plot of the test data
plt.scatter(X_test, y_test, color='blue', label='Actual prices')

# Plot the regression line
plt.plot(X_test, y_pred, color='red', linewidth=2, label='Regression line')

# Labels and title
plt.xlabel('Median Income')
plt.ylabel('House Price')
plt.title('Linear Regression on California Housing Dataset (Single Feature)')
plt.legend()

# Show the plot
plt.show()
