In [11]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, mean_absolute_error
import numpy as np

# Load the dataset from the uploaded file
data_path = "/Users/npurwar/Downloads/Housing.csv"

try:
    data = pd.read_csv(data_path)
except FileNotFoundError:
    raise FileNotFoundError("The file 'Housing.csv' was not found. Please ensure it is correctly uploaded to the environment.")

# Preprocess the data
# Separate features and target
X = data.drop("price", axis=1)
y = data["price"]

# One-hot encode categorical variables
categorical_columns = ["mainroad", "guestroom", "basement", "hotwaterheating", "airconditioning", "prefarea", "furnishingstatus"]
encoder = OneHotEncoder(sparse=False, drop='first')
categorical_encoded = encoder.fit_transform(X[categorical_columns])

# Drop the original categorical columns and concatenate the encoded features
X = X.drop(categorical_columns, axis=1)
X = np.concatenate([X.values, categorical_encoded], axis=1)

# Scale the features
scaler = StandardScaler()
X = scaler.fit_transform(X)



In [12]:
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train the Linear Regression model
model = LinearRegression()
model.fit(X_train, y_train)

In [13]:
# Evaluate the model on the test set
y_pred_test = model.predict(X_test)
mse = mean_squared_error(y_test, y_pred_test)
mae = mean_absolute_error(y_test, y_pred_test)
print(f"Test Loss (MSE): {mse:.4f}")
print(f"Test MAE: {mae:.4f}")

Test Loss (MSE): 1754318687330.6692
Test MAE: 970043.4039


In [14]:
# Display some predictions
for i in range(5):
    print(f"Predicted Price: {y_pred_test[i]:,.2f}, Actual Price: {y_test.iloc[i]:,.2f}")

Predicted Price: 5,164,653.90, Actual Price: 4,060,000.00
Predicted Price: 7,224,722.30, Actual Price: 6,650,000.00
Predicted Price: 3,109,863.24, Actual Price: 3,710,000.00
Predicted Price: 4,612,075.33, Actual Price: 6,440,000.00
Predicted Price: 3,294,646.26, Actual Price: 2,800,000.00
