In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline

In [None]:
data = pd.read_csv("new_data.csv")
data.head()

In [None]:
X = data.drop("Price Per Sq Ft (INR)", axis=1)
y = data["Price Per Sq Ft (INR)"]

In [None]:
categorial_cols = ["Locality","Land Type","Zone"]
one_hot = OneHotEncoder()
transformer = ColumnTransformer([("one_hot", one_hot, categorial_cols)], remainder="passthrough")

In [None]:
model = Pipeline(steps=[("transformer", transformer),("regressor", LinearRegression())])
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
rmse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)
print("RMSE:", rmse)
print("R² Score:", r2)
print(data["Price Per Sq Ft (INR)"].describe())

In [None]:
from sklearn.metrics import mean_absolute_error
mae = mean_absolute_error(y_test, y_pred)
print(f"MAE: ₹{mae:.2f}")
nrmse = rmse / y_test.mean()
print(f"NRMSE: {nrmse * 100:.2f}%")

In [None]:
import pickle
with open("land_price_model.pkl", "wb") as f:
    pickle.dump(model, f)