In [1]:
import pandas as pd
from sklearn.datasets import fetch_california_housing
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression


# Load California housing dataset
california_data = fetch_california_housing()

# Create DataFrame and set target variable as 'Price'
data = pd.DataFrame(california_data.data, columns=california_data.feature_names)
data['Price'] = california_data.target
print(data.head())

# Define features and target
X = data[['MedInc', 'HouseAge', 'AveRooms', 'AveBedrms', 'Population', 'AveOccup', 'Latitude', 'Longitude']]
y = data['Price']

# Split the data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)

# Initialize and fit the model
model = LinearRegression()
model.fit(X_train, y_train)

# Predict the test set results and store in y_pred
y_pred = model.predict(X_test)

# Calculate accuracy score
score = model.score(X_test, y_test)
print("Accuracy:", score * 100, "%")
print("Predicted values:", y_pred[:5])  # Display first 5 predictions for reference


   MedInc  HouseAge  AveRooms  AveBedrms  Population  AveOccup  Latitude  \
0  8.3252      41.0  6.984127   1.023810       322.0  2.555556     37.88   
1  8.3014      21.0  6.238137   0.971880      2401.0  2.109842     37.86   
2  7.2574      52.0  8.288136   1.073446       496.0  2.802260     37.85   
3  5.6431      52.0  5.817352   1.073059       558.0  2.547945     37.85   
4  3.8462      52.0  6.281853   1.081081       565.0  2.181467     37.85   

   Longitude  Price  
0    -122.23  4.526  
1    -122.22  3.585  
2    -122.24  3.521  
3    -122.25  3.413  
4    -122.25  3.422  
Accuracy: 59.43232652466215 %
Predicted values: [2.28110738 2.79009128 1.90332794 1.01760331 2.94852425]
