In [7]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression, LogisticRegression
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, r2_score, accuracy_score

In [10]:
# Load dataset
df = pd.read_csv('housePrediction.csv')
df

Unnamed: 0,Area (sq ft),Bedrooms,Price (in $1000s)
0,1800,3,250
1,2200,4,320
2,1500,2,180
3,3000,5,450
4,1200,2,160


In [11]:
# Features and target variable
X = df[['Area (sq ft)', 'Bedrooms']]
y = df['Price (in $1000s)']

In [15]:
# Split the data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4, random_state=42)



# LINEAR REGRESSION

In [16]:
# --- Linear Regression ---
lin_reg = LinearRegression()
lin_reg.fit(X_train, y_train)
y_pred_lin = lin_reg.predict(X_test)
lin_mse = mean_squared_error(y_test, y_pred_lin)
lin_r2 = r2_score(y_test, y_pred_lin)

# LOGISTIC REGRESSION

In [17]:
# --- Logistic Regression (Not Ideal for Regression) ---
log_reg = LogisticRegression()
log_reg.fit(X_train, y_train.astype(int))  # Converting price to int for classification-like fitting
y_pred_log = log_reg.predict(X_test)
log_acc = accuracy_score(y_test.astype(int), y_pred_log)  # Not ideal for regression

# RANDOM FOREST REGRESSION

In [18]:
# --- Random Forest Regression ---
rf_reg = RandomForestRegressor(n_estimators=100, random_state=42)
rf_reg.fit(X_train, y_train)
y_pred_rf = rf_reg.predict(X_test)
rf_mse = mean_squared_error(y_test, y_pred_rf)
rf_r2 = r2_score(y_test, y_pred_rf)

In [19]:
# --- Model Comparison ---
print("Linear Regression MSE:", lin_mse, "R2 Score:", lin_r2)
print("Random Forest Regression MSE:", rf_mse, "R2 Score:", rf_r2)
print("Logistic Regression Accuracy (Not Suitable for Regression):", log_acc)

Linear Regression MSE: 99.99999999999943 R2 Score: 0.9843750000000001
Random Forest Regression MSE: 2377.8849999999984 R2 Score: 0.6284554687500002
Logistic Regression Accuracy (Not Suitable for Regression): 0.0


In [20]:
# Selecting the best model
best_model = "Random Forest Regression" if rf_r2 > lin_r2 else "Linear Regression"
print("Best model based on R2 Score:", best_model)


Best model based on R2 Score: Linear Regression
