In [3]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from xgboost import XGBRegressor
import joblib
import json

# Load and preprocess data
df = pd.read_csv('clean_islamabad_properties.csv')

# Feature Engineering
df['Price_per_SqFt'] = df['Price_Millions'] * 1000000 / df['Area_SqFt']
df['Beds_to_Area_Ratio'] = df['Beds_Num'] / df['Area_SqFt']

# Prepare data for modeling
X = df[['Beds_Num', 'Area_SqFt', 'Location_Encoded', 'Price_per_SqFt', 'Beds_to_Area_Ratio']]
y = df['Price_Millions']

# Split and scale the data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)

# Train the model
model = XGBRegressor(n_estimators=100, learning_rate=0.1, random_state=42)
model.fit(X_train_scaled, y_train)

# Save the model and scaler
joblib.dump(model, 'xgb_model.joblib')
joblib.dump(scaler, 'scaler.joblib')

print("Model and scaler saved successfully.")

Model and scaler saved successfully.


In [5]:
import streamlit as st
import pandas as pd
import numpy as np
import joblib
import json

# Load the saved model, scaler, and location mapping
@st.cache_resource
def load_model_and_data():
    model = joblib.load('xgb_model.joblib')
    scaler = joblib.load('scaler.joblib')
    with open('location_encoding.json', 'r') as f:
        location_mapping = json.load(f)
    return model, scaler, location_mapping

model, scaler, location_mapping = load_model_and_data()

# Prediction function
def predict_price(beds, area, location):
    price_per_sqft = 0  # We'll update this after the first prediction
    beds_to_area_ratio = beds / area
    features = np.array([[beds, area, location, price_per_sqft, beds_to_area_ratio]])
    scaled_features = scaler.transform(features)
    predicted_price = model.predict(scaled_features)[0]
    
    # Update price_per_sqft and predict again
    price_per_sqft = predicted_price * 1000000 / area
    features = np.array([[beds, area, location, price_per_sqft, beds_to_area_ratio]])
    scaled_features = scaler.transform(features)
    final_predicted_price = model.predict(scaled_features)[0]
    
    return final_predicted_price

# Streamlit app
st.title('Islamabad Property Price Predictor')

# User inputs
area = st.number_input('Area (in square feet)', min_value=100, max_value=10000, value=1000)
beds = st.number_input('Number of Bedrooms', min_value=1, max_value=10, value=3)
location = st.selectbox('Location', list(location_mapping.keys()))

if st.button('Predict Price'):
    location_encoded = location_mapping[location]
    price = predict_price(beds, area, location_encoded)
    st.success(f'The predicted price is {price:.2f} million PKR')

# Optional: Add some information about the app
st.info('This app predicts property prices in Islamabad based on area, number of bedrooms, and location.')

2024-07-30 02:31:51.959 
  command:

    streamlit run C:\Users\hp\anaconda3\Lib\site-packages\ipykernel_launcher.py [ARGUMENTS]


DeltaGenerator()

In [11]:
streamlit run C:\Users\hp\anaconda3\Lib\site-packages\ipykernel_launcher.py [ARGUMENTS]

SyntaxError: invalid syntax (799773559.py, line 1)