In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report


In [None]:
df = pd.read_csv("/content/bengaluru_house_prices.csv")
df.head()


Unnamed: 0,area_type,availability,location,size,society,total_sqft,bath,balcony,price
0,Super built-up Area,19-Dec,Electronic City Phase II,2 BHK,Coomee,1056,2.0,1.0,39.07
1,Plot Area,Ready To Move,Chikka Tirupathi,4 Bedroom,Theanmp,2600,5.0,3.0,120.0
2,Built-up Area,Ready To Move,Uttarahalli,3 BHK,,1440,2.0,3.0,62.0
3,Super built-up Area,Ready To Move,Lingadheeranahalli,3 BHK,Soiewre,1521,3.0,1.0,95.0
4,Super built-up Area,Ready To Move,Kothanur,2 BHK,,1200,2.0,1.0,51.0


In [None]:
median_price = df['price'].median()
df['expensive'] = (df['price'] > median_price).astype(int)


In [None]:
df['bhk'] = df['size'].apply(lambda x: pd.to_numeric(str(x).split(' ')[0], errors='coerce'))

def clean_total_sqft(value):
    if isinstance(value, str):
        value = value.strip()
    if '-' in value:
            parts = value.split('-')
            try:
                return (float(parts[0]) + float(parts[1])) / 2
            except ValueError:
                return np.nan
        else:
            try:
                return float(value)
            except ValueError:
                return np.nan
    return value

df['total_sqft'] = df['total_sqft'].apply(clean_total_sqft)
df['total_sqft'] = pd.to_numeric(df['total_sqft'], errors='coerce')

# Drop rows with NaN values in the selected feature columns
df.dropna(subset=['total_sqft', 'bath', 'bhk'], inplace=True)

X = df[['total_sqft', 'bath', 'bhk']]
y = df['expensive']

In [None]:
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42)


In [None]:
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [None]:
model = LogisticRegression()
model.fit(X_train, y_train)


In [None]:
y_pred = model.predict(X_test)


In [None]:
print("Accuracy:", accuracy_score(y_test, y_pred))
print("\nConfusion Matrix:\n", confusion_matrix(y_test, y_pred))
print("\nClassification Report:\n", classification_report(y_test, y_pred))


Accuracy: 0.8182506626277926

Confusion Matrix:
 [[1179  179]
 [ 301  982]]

Classification Report:
               precision    recall  f1-score   support

           0       0.80      0.87      0.83      1358
           1       0.85      0.77      0.80      1283

    accuracy                           0.82      2641
   macro avg       0.82      0.82      0.82      2641
weighted avg       0.82      0.82      0.82      2641



In [None]:
new_house = np.array([[120000, 10, 10]])   # sqft, bath, bhk
new_house = scaler.transform(new_house)

prediction = model.predict(new_house)

if prediction[0] == 1:
    print("The house is Expensive")
else:
    print("The house is Not Expensive")


The house is Expensive


