<a href="https://colab.research.google.com/github/mmarushika/sdc-lab/blob/main/LogisticRegressionSyntheticDataset.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# Housing Price Classification using Logistic Regression (with synthetic dataset)

# Step 1: Import libraries
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report
import matplotlib.pyplot as plt

# Step 2: Generate synthetic dataset
np.random.seed(42)
n_samples = 1000

area = np.random.normal(2000, 500, n_samples)
bedrooms = np.random.randint(1, 6, n_samples)
bathrooms = np.random.randint(1, 4, n_samples)
age = np.random.randint(0, 50, n_samples)
location_score = np.random.uniform(0, 10, n_samples)

# Create a synthetic price and then categorize it
price = (area * 150) + (bedrooms * 10000) + (bathrooms * 5000) - (age * 200) + (location_score * 10000)
price = price + np.random.normal(0, 10000, n_samples)

# Categorize price: 0 = Low, 1 = Medium, 2 = High
bins = [0, 250000, 450000, np.inf]
labels = [0, 1, 2]
price_category = pd.cut(price, bins=bins, labels=labels)

# Create DataFrame
df = pd.DataFrame({
    'Area': area,
    'Bedrooms': bedrooms,
    'Bathrooms': bathrooms,
    'Age': age,
    'LocationScore': location_score,
    'PriceCategory': price_category.astype(int)
})

print("Sample data:")
print(df.head())

# Step 3: Prepare data for training
X = df.drop(columns=['PriceCategory'])
y = df['PriceCategory']

scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Step 4: Split data and train model
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

model = LogisticRegression(multi_class='multinomial', solver='lbfgs', max_iter=1000)
model.fit(X_train, y_train)

# Step 5: Evaluate model
y_pred = model.predict(X_test)
print("\nClassification Report:\n")
print(classification_report(y_test, y_pred, target_names=["Low", "Medium", "High"]))

# Step 6: Prediction prompt
print("\n--- Predict Price Category ---")
try:
    area = float(input("Enter area (in sq ft): "))
    bedrooms = int(input("Enter number of bedrooms: "))
    bathrooms = int(input("Enter number of bathrooms: "))
    age = int(input("Enter age of house (in years): "))
    location_score = float(input("Enter location score (0-10): "))

    input_features = np.array([[area, bedrooms, bathrooms, age, location_score]])
    input_scaled = scaler.transform(input_features)
    prediction = model.predict(input_scaled)[0]

    category_map = {0: "Low", 1: "Medium", 2: "High"}
    print(f"\nPredicted Price Category: {category_map[prediction]}")

except Exception as e:
    print(f"Error in input: {e}")




Sample data:
          Area  Bedrooms  Bathrooms  Age  LocationScore  PriceCategory
0  2248.357077         4          3   32       0.143814              1
1  1930.867849         1          3   49       7.643534              1
2  2323.844269         3          1   37       6.237431              1
3  2761.514928         5          3   26       7.623026              2
4  1882.923313         3          3   33       0.389380              1

Classification Report:

              precision    recall  f1-score   support

         Low       1.00      0.50      0.67        12
      Medium       0.92      1.00      0.96       145
        High       1.00      0.84      0.91        43

    accuracy                           0.94       200
   macro avg       0.97      0.78      0.85       200
weighted avg       0.94      0.94      0.93       200


--- Predict Price Category ---


KeyboardInterrupt: Interrupted by user