<a href="https://colab.research.google.com/github/mmarushika/sdc-lab/blob/main/LinearRegressionRealDataset.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# California Housing Price Prediction using Linear Regression

# Step 1: Import libraries
import pandas as pd
import numpy as np
from sklearn.datasets import fetch_california_housing
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
from sklearn.preprocessing import StandardScaler

# Step 2: Load the California housing dataset
california = fetch_california_housing(as_frame=True)
df = california.frame
print("Sample Data:")
print(df.head())

# Step 3: Feature matrix (X) and target vector (y)
X = df.drop(columns=["MedHouseVal"])
y = df["MedHouseVal"]  # Median house value in $100,000s

# Optional: Feature Scaling
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Step 4: Train-test split
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

# Step 5: Train the model
model = LinearRegression()
model.fit(X_train, y_train)

# Step 6: Evaluate the model
y_pred = model.predict(X_test)
mse = mean_squared_error(y_test, y_pred)
print(f"\nModel trained. Mean Squared Error: {mse:.4f}")

# Step 7: Prediction Input Prompt
print("\n--- Predict Housing Price ---")
print("Enter the following values:")

try:
    MedInc = float(input("Median Income (in 10k USD): "))
    HouseAge = float(input("House Age (in years): "))
    AveRooms = float(input("Average number of rooms: "))
    AveBedrms = float(input("Average number of bedrooms: "))
    Population = float(input("Block population: "))
    AveOccup = float(input("Average household occupancy: "))
    Latitude = float(input("Latitude: "))
    Longitude = float(input("Longitude: "))

    input_features = np.array([[MedInc, HouseAge, AveRooms, AveBedrms, Population, AveOccup, Latitude, Longitude]])
    input_scaled = scaler.transform(input_features)
    predicted_price = model.predict(input_scaled)[0] * 100000  # converting back to dollars

    print(f"\nEstimated Median House Value: ${predicted_price:,.2f}")
except Exception as e:
    print(f"Error: {e}")


Sample Data:
   MedInc  HouseAge  AveRooms  AveBedrms  Population  AveOccup  Latitude  \
0  8.3252      41.0  6.984127   1.023810       322.0  2.555556     37.88   
1  8.3014      21.0  6.238137   0.971880      2401.0  2.109842     37.86   
2  7.2574      52.0  8.288136   1.073446       496.0  2.802260     37.85   
3  5.6431      52.0  5.817352   1.073059       558.0  2.547945     37.85   
4  3.8462      52.0  6.281853   1.081081       565.0  2.181467     37.85   

   Longitude  MedHouseVal  
0    -122.23        4.526  
1    -122.22        3.585  
2    -122.24        3.521  
3    -122.25        3.413  
4    -122.25        3.422  

Model trained. Mean Squared Error: 0.5559

--- Predict Housing Price ---
Enter the following values:
Median Income (in 10k USD): 6
House Age (in years): 30
Average number of rooms: 6.5
Average number of bedrooms: 1
Block population: 1200
Average household occupancy: 3
Latitude: 34
Longitude: -118

Estimated Median House Value: $283,363.53


