In [19]:
from sklearn.datasets import fetch_california_housing
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
import numpy as np

#fetch the Housing data
housing = fetch_california_housing(as_frame = True)

#set Features(X) and Target(y)
X = housing.data
y = housing.target

#Combine into one dataframe
df = pd.concat([X, y], axis = 1)

#Split the data into 80% for training and 20% for testing
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 42)

# Initialize the model
model = LinearRegression()

#fit on training data
model.fit(X_test, y_test)

#predict y on test set
y_pred = model.predict(X_test)

#Compare the actual test values and the model predictions
comparisons = pd.DataFrame({"Actual": y_test, "Predicted": y_pred})

#Model metrics
mae = mean_absolute_error(y_test, y_pred)
mse = mean_squared_error(y_test, y_pred)
rmse = np.sqrt(mse)
r2 = r2_score(y_test, y_pred)


print(f"R2 Score: {r2}\nMean Absolute Error: {mae}\nMean Squared Error: {mse}\nRoot Mean Squared Error: {rmse}")

R2 Score: 0.596054650433006
Mean Absolute Error: 0.528942661428345
Mean Squared Error: 0.5293336127912477
Root Mean Squared Error: 0.727553168360394
