In [1]:
#importing libraries
import pandas as pd
from sklearn.datasets import fetch_california_housing
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score
import numpy as np

In [2]:
#Loading the california dataset
housing = fetch_california_housing(as_frame=True)

#Create a dataframe from the dataset
df = housing.frame

df.head()

Unnamed: 0,MedInc,HouseAge,AveRooms,AveBedrms,Population,AveOccup,Latitude,Longitude,MedHouseVal
0,8.3252,41.0,6.984127,1.02381,322.0,2.555556,37.88,-122.23,4.526
1,8.3014,21.0,6.238137,0.97188,2401.0,2.109842,37.86,-122.22,3.585
2,7.2574,52.0,8.288136,1.073446,496.0,2.80226,37.85,-122.24,3.521
3,5.6431,52.0,5.817352,1.073059,558.0,2.547945,37.85,-122.25,3.413
4,3.8462,52.0,6.281853,1.081081,565.0,2.181467,37.85,-122.25,3.422


In [3]:
#Features (independent) and Target (dependent) Variable
X = df.drop('MedHouseVal', axis=1)
y = df['MedHouseVal']

#Splitting dataset into training and testing
X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.2, random_state=42)

In [4]:
#Train the linear regression model
model = LinearRegression()
model.fit(X_train,y_train)

In [5]:
#Make Predictions
y_pred = model.predict(X_test)

In [6]:
#Evaluation
mse = mean_squared_error(y_test,y_pred)
r2 = r2_score(y_test, y_pred)

print(f"Mean Squared Error: {mse:.4}\nR2 Score:{r2:.4}\n")

print("Model Coefficents:")
print(f"Intercept: {model.intercept_}\nCoefficents: {model.coef_}")

coef_df = pd.DataFrame(model.coef_, X.columns, columns=['Coefficent'])

print("Coefficent for each feature")
print(coef_df)

Mean Squared Error: 0.5559
R2 Score:0.5758

Model Coefficents:
Intercept: -37.023277706064064
Coefficents: [ 4.48674910e-01  9.72425752e-03 -1.23323343e-01  7.83144907e-01
 -2.02962058e-06 -3.52631849e-03 -4.19792487e-01 -4.33708065e-01]
Coefficent for each feature
            Coefficent
MedInc        0.448675
HouseAge      0.009724
AveRooms     -0.123323
AveBedrms     0.783145
Population   -0.000002
AveOccup     -0.003526
Latitude     -0.419792
Longitude    -0.433708


In [15]:
print("\n📈 Training data summary:")
print(df.describe())


📈 Training data summary:
             MedInc      HouseAge      AveRooms     AveBedrms    Population  \
count  20640.000000  20640.000000  20640.000000  20640.000000  20640.000000   
mean       3.870671     28.639486      5.429000      1.096675   1425.476744   
std        1.899822     12.585558      2.474173      0.473911   1132.462122   
min        0.499900      1.000000      0.846154      0.333333      3.000000   
25%        2.563400     18.000000      4.440716      1.006079    787.000000   
50%        3.534800     29.000000      5.229129      1.048780   1166.000000   
75%        4.743250     37.000000      6.052381      1.099526   1725.000000   
max       15.000100     52.000000    141.909091     34.066667  35682.000000   

           AveOccup      Latitude     Longitude   MedHouseVal  
count  20640.000000  20640.000000  20640.000000  20640.000000  
mean       3.070655     35.631861   -119.569704      2.068558  
std       10.386050      2.135952      2.003532      1.153956  
min   

In [None]:
#Function to get the input from the user
def get_user_input():
    print("Enter values for the new house:")
    data = {
        'MedInc': float(input("Median Income (e.g., 4.0): ")),
        'HouseAge': float(input("House Age (e.g., 30): ")),
        'AveRooms': float(input("Average Rooms (e.g., 5.5): ")),
        'AveBedrms': float(input("Average Bedrooms (e.g., 1.0): ")),
        'Population': float(input("Population (e.g., 1200): ")),
        'AveOccup': float(input("Average Occupants (e.g., 3.0): ")),
        'Latitude': float(input("Latitude (e.g., 35.0): ")),
        'Longitude': float(input("Longitude (e.g., -119.0): "))
    }
    return pd.DataFrame([data])



In [17]:
# Use safe input values or prompt the user
new_data = get_user_input()

predicted_price = model.predict(new_data)[0]
predicted_price = max(predicted_price, 0)

print(f"\n🏠 Predicted Median House Value: ${predicted_price * 100000:.2f}")


Enter details for house price prediction:

🏠 Predicted Median House Value: $3570369.79
