In [37]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
import matplotlib.pyplot as plt

In [39]:
# Loading and preprocessing the dataset
df = pd.read_csv('/Users/sathwik/Downloads/Housing.csv')

In [41]:
# Converting categorical variables to numeric
binary_columns = ['mainroad', 'guestroom', 'basement', 'hotwaterheating', 'airconditioning', 'prefarea']
for col in binary_columns:
    df[col] = df[col].map({'yes': 1, 'no': 0})

In [43]:
# Converting furnishingstatus to numeric using one-hot encoding
df = pd.get_dummies(df, columns=['furnishingstatus'], drop_first=True)

In [45]:
# Defining features and target
X = df.drop('price', axis=1)
y = df['price']

In [47]:
# Splitting data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [49]:
# Simple Linear Regression (using area as the only feature)
X_train_simple = X_train[['area']]
X_test_simple = X_test[['area']]
simple_lr = LinearRegression()
simple_lr.fit(X_train_simple, y_train)

In [51]:
# Predicting and evaluating simple linear regression
y_pred_simple = simple_lr.predict(X_test_simple)
mae_simple = mean_absolute_error(y_test, y_pred_simple)
mse_simple = mean_squared_error(y_test, y_pred_simple)
r2_simple = r2_score(y_test, y_pred_simple)

In [53]:
print("Simple Linear Regression (using area):")
print(f"MAE: {mae_simple:.2f}")
print(f"MSE: {mse_simple:.2f}")
print(f"R²: {r2_simple:.2f}")
print(f"Coefficient (area): {simple_lr.coef_[0]:.2f}")
print(f"Intercept: {simple_lr.intercept_:.2f}")

Simple Linear Regression (using area):
MAE: 1474748.13
MSE: 3675286604768.19
R²: 0.27
Coefficient (area): 425.73
Intercept: 2512254.26


In [55]:
# Plotting simple linear regression
plt.figure(figsize=(10, 6))
plt.scatter(X_test_simple, y_test, color='blue', label='Actual')
plt.plot(X_test_simple, y_pred_simple, color='red', label='Regression Line')
plt.xlabel('Area (sq ft)')
plt.ylabel('Price')
plt.title('Simple Linear Regression: Price vs Area')
plt.legend()
plt.savefig('simple_linear_regression.png')
plt.close()

In [57]:
# Multiple Linear Regression
multiple_lr = LinearRegression()
multiple_lr.fit(X_train, y_train)

In [59]:
y_pred_multiple = multiple_lr.predict(X_test)
mae_multiple = mean_absolute_error(y_test, y_pred_multiple)
mse_multiple = mean_squared_error(y_test, y_pred_multiple)
r2_multiple = r2_score(y_test, y_pred_multiple)

In [61]:
print("\nMultiple Linear Regression:")
print(f"MAE: {mae_multiple:.2f}")
print(f"MSE: {mse_multiple:.2f}")
print(f"R²: {r2_multiple:.2f}")
print("\nCoefficients:")
for feature, coef in zip(X.columns, multiple_lr.coef_):
    print(f"{feature}: {coef:.2f}")
print(f"Intercept: {multiple_lr.intercept_:.2f}")


Multiple Linear Regression:
MAE: 970043.40
MSE: 1754318687330.66
R²: 0.65

Coefficients:
area: 235.97
bedrooms: 76778.70
bathrooms: 1094444.79
stories: 407476.59
mainroad: 367919.95
guestroom: 231610.04
basement: 390251.18
hotwaterheating: 684649.89
airconditioning: 791426.74
parking: 224841.91
prefarea: 629890.57
furnishingstatus_semi-furnished: -126881.82
furnishingstatus_unfurnished: -413645.06
Intercept: 260032.36


In [63]:
# Plotting actual vs predicted prices for multiple linear regression
plt.figure(figsize=(10, 6))
plt.scatter(y_test, y_pred_multiple, color='blue')
plt.plot([y_test.min(), y_test.max()], [y_test.min(), y_test.max()], 'r--', lw=2)
plt.xlabel('Actual Price')
plt.ylabel('Predicted Price')
plt.title('Multiple Linear Regression: Actual vs Predicted Prices')
plt.savefig('multiple_linear_regression.png')
plt.close()