In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, r2_score

In [None]:
df = pd.read_csv(r"C:\Users\user\Downloads\DelhiHousePrice.csv")

In [None]:
df

In [None]:
df.head(15)

In [None]:
df.tail(15)

In [None]:
df.info()

In [None]:
df.shape

In [None]:
print("Initial Data Overview:")
print(df.describe())

In [None]:
df = df.dropna()

In [None]:
df = df.drop_duplicates()

In [None]:
df

In [None]:
for col in df.columns:
    if df[col].dtype == 'object':
        df[col] = pd.factorize(df[col])[0]

In [None]:
sns.pairplot(df)
plt.show()

In [None]:
plt.figure(figsize=(10, 8))
sns.heatmap(df.corr(), annot=True, cmap='coolwarm')
plt.title('Feature Correlation Heatmap')
plt.show()

In [None]:
X = df.drop('Price', axis=1)  
Y = df['Price']
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=42)

In [None]:
X_train

In [None]:
Y_train

In [None]:
X_test

In [None]:
Y_test

In [None]:
lr_model = LinearRegression()
lr_model.fit(X_train, Y_train)
lr_predictions = lr_model.predict(X_test)
print("Linear Regression Results:")
print("R-squared:", r2_score(Y_test, lr_predictions))
print("Mean Squared Error:", mean_squared_error(Y_test, lr_predictions))

In [None]:
dt_model = DecisionTreeRegressor()
dt_model.fit(X_train, Y_train)
dt_predictions = dt_model.predict(X_test)
print("\nDecision Tree Results:")
print("R-squared:", r2_score(Y_test, dt_predictions))
print("Mean Squared Error:", mean_squared_error(Y_test, dt_predictions))

In [None]:
rf_model = RandomForestRegressor(n_estimators=100, random_state=42)
rf_model.fit(X_train, Y_train)
rf_predictions = rf_model.predict(X_test)
print("\nRandom Forest Results:")
print("R-squared:", r2_score(Y_test, rf_predictions))
print("Mean Squared Error:", mean_squared_error(Y_test, rf_predictions))

In [None]:
plt.figure(figsize=(10, 6))
plt.scatter(Y_test, rf_predictions, alpha=0.6, color='red')
plt.plot([Y.min(), Y.max()], [Y.min(), Y.max()], 'k--', lw=2)
plt.xlabel('Actual Prices')
plt.ylabel('Predicted Prices')
plt.title('Actual vs. Predicted Prices')
plt.show()

In [None]:
import pickle
from sklearn.tree import DecisionTreeClassifier
model_dt = DecisionTreeClassifier(random_state=42)

model_dt.fit(X_train, Y_train)

with open('house_price_model.pkl','wb') as file:
    pickle.dump(model_dt, file)
    print("Model Saved Successsfully")

In [None]:
#data normalization with sklearn
from sklearn.preprocessing import StandardScaler
sc=StandardScaler()
#fit scaler on training data
X_train=sc.fit_transform(X_train)

#transform testing data
X_test=sc.transform(X_test)

In [None]:
with open('scdelhi.pkl', 'wb') as scaler_file:
    pickle.dump(sc, scaler_file)

In [None]:
import os

# Streamlit code as a string
streamlit_code = """
import streamlit as st
import pickle
import numpy as np

# Load the trained model and scaler
with open('house_price_model.pkl', 'rb') as model_file:
    model = pickle.load(model_file)

with open('scaler.pkl', 'rb') as scaler_file:
    scaler = pickle.load(scaler_file)

# Create the web app
st.title('Delhi House Price Prediction App')

# Input fields
st.subheader("Enter the property details:")
area = st.number_input('Area (in sq. ft.)', min_value=0.0, max_value=10000.0, value=1000.0, step=10.0)
bhk = st.number_input('Number of Bedrooms (BHK)', min_value=1, max_value=10, value=2, step=1)
bathroom = st.number_input('Number of Bathrooms', min_value=1, max_value=10, value=1, step=1)
furnishing = st.selectbox('Furnishing', ['Unfurnished', 'Semi-Furnished', 'Fully-Furnished'])
locality = st.selectbox('Locality', ['South Delhi', 'North Delhi', 'East Delhi', 'West Delhi', 'Central Delhi'])
parking = st.number_input('Number of Parking Spaces', min_value=0, max_value=5, value=1, step=1)
status = st.selectbox('Status', ['Ready to Move', 'Under Construction'])
transaction = st.selectbox('Transaction Type', ['New Property', 'Resale'])
property_type = st.selectbox('Property Type', ['Apartment', 'Independent House', 'Villa', 'Builder Floor'])
per_sqft = st.number_input('Per Square Foot Rate (₹)', min_value=0.0, max_value=100000.0, value=5000.0, step=100.0)

# Encoding categorical features
furnishing_encoded = {'Unfurnished': 0, 'Semi-Furnished': 1, 'Fully-Furnished': 2}[furnishing]
locality_encoded = {
    'South Delhi': 0,
    'North Delhi': 1,
    'East Delhi': 2,
    'West Delhi': 3,
    'Central Delhi': 4
}[locality]
status_encoded = {'Ready to Move': 0, 'Under Construction': 1}[status]
transaction_encoded = {'New Property': 0, 'Resale': 1}[transaction]
property_type_encoded = {
    'Apartment': 0,
    'Independent House': 1,
    'Villa': 2,
    'Builder Floor': 3
}[property_type]

# Prepare the feature vector
features = np.array([[area, bhk, bathroom, furnishing_encoded, locality_encoded, parking, status_encoded, transaction_encoded, property_type_encoded, per_sqft]], dtype=np.float64)

# Scale the features
features_scaled = scaler.transform(features)

# Predict the house price
predicted_price = model.predict(features_scaled)

# Display the result
st.write(f'Predicted House Price: ₹{predicted_price[0]:,.2f}')
import streamlit as st
import pickle
import numpy as np

# Load the trained model and scaler
with open('house_price_model.pkl', 'rb') as model_file:
    model = pickle.load(model_file)

with open('scaler.pkl', 'rb') as scaler_file:
    scaler = pickle.load(scaler_file)

# Create the web app
st.title('Delhi House Price Prediction App')

# Input fields
st.subheader("Enter the property details:")
area = st.number_input('Area (in sq. ft.)', min_value=0.0, max_value=10000.0, value=1000.0, step=10.0)
bhk = st.number_input('Number of Bedrooms (BHK)', min_value=1, max_value=10, value=2, step=1)
bathroom = st.number_input('Number of Bathrooms', min_value=1, max_value=10, value=1, step=1)
furnishing = st.selectbox('Furnishing', ['Unfurnished', 'Semi-Furnished', 'Fully-Furnished'])
locality = st.selectbox('Locality', ['South Delhi', 'North Delhi', 'East Delhi', 'West Delhi', 'Central Delhi'])
parking = st.number_input('Number of Parking Spaces', min_value=0, max_value=5, value=1, step=1)
status = st.selectbox('Status', ['Ready to Move', 'Under Construction'])
transaction = st.selectbox('Transaction Type', ['New Property', 'Resale'])
property_type = st.selectbox('Property Type', ['Apartment', 'Independent House', 'Villa', 'Builder Floor'])
per_sqft = st.number_input('Per Square Foot Rate (₹)', min_value=0.0, max_value=100000.0, value=5000.0, step=100.0)

# Encoding categorical features
furnishing_encoded = {'Unfurnished': 0, 'Semi-Furnished': 1, 'Fully-Furnished': 2}[furnishing]
locality_encoded = {
    'South Delhi': 0,
    'North Delhi': 1,
    'East Delhi': 2,
    'West Delhi': 3,
    'Central Delhi': 4
}[locality]
status_encoded = {'Ready to Move': 0, 'Under Construction': 1}[status]
transaction_encoded = {'New Property': 0, 'Resale': 1}[transaction]
property_type_encoded = {
    'Apartment': 0,
    'Independent House': 1,
    'Villa': 2,
    'Builder Floor': 3
}[property_type]

# Prepare the feature vector
features = np.array([[area, bhk, bathroom, furnishing_encoded, locality_encoded, parking, status_encoded, transaction_encoded, property_type_encoded, per_sqft]], dtype=np.float64)

# Scale the features
features_scaled = scaler.transform(features)

# Predict the house price
predicted_price = model.predict(features_scaled)

# Display the result
st.write(f'Predicted House Price: ₹{predicted_price[0]:,.2f}')
"""
file_path = os.path.join('C:\\Users\\user', 'housepricepredictionapp.py')

try:
    
    with open(file_path, 'w', encoding='utf-8') as file:
        file.write(streamlit_code)
    print(f"File '{file_path}' has been saved.")
except Exception as e:
    print(f"Error saving file: {e}")