In [None]:
!pip install streamlit
!pip install streamlit scikit-learn
!npm install localtunnel
!pip install markupsafe==2.0.1
!pip install --upgrade markupsafe Jinja2
!pip install pandas-profiling
!pip install ydata_profiling

Collecting streamlit
  Downloading streamlit-1.43.2-py2.py3-none-any.whl.metadata (8.9 kB)
Collecting watchdog<7,>=2.1.5 (from streamlit)
  Downloading watchdog-6.0.0-py3-none-manylinux2014_x86_64.whl.metadata (44 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m44.3/44.3 kB[0m [31m1.8 MB/s[0m eta [36m0:00:00[0m
Collecting pydeck<1,>=0.8.0b4 (from streamlit)
  Downloading pydeck-0.9.1-py2.py3-none-any.whl.metadata (4.1 kB)
Downloading streamlit-1.43.2-py2.py3-none-any.whl (9.7 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m9.7/9.7 MB[0m [31m64.1 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading pydeck-0.9.1-py2.py3-none-any.whl (6.9 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m6.9/6.9 MB[0m [31m91.8 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading watchdog-6.0.0-py3-none-manylinux2014_x86_64.whl (79 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m79.1/79.1 kB[0m [31m6.5 MB/s[0m eta [36m0:00:00[0m
[

In [None]:
%%writefile Housing_Price_prediction_app.py
import streamlit as st
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import plotly.graph_objects as go  # For radar chart
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error
from sklearn.preprocessing import LabelEncoder
from sklearn.impute import SimpleImputer

from pandas_profiling import ProfileReport
from ydata_profiling import ProfileReport
import streamlit.components.v1 as components

# Title of the App
st.markdown("""
    <h1 style='text-align: center; color: #FF5733; font-family: Verdana, sans-serif; font-size: 40px;'>
        'Housing Price Prediction Using Linear Regression'
    </h1>
""", unsafe_allow_html=True)

col1, col2, col3 = st.columns([1, 2, 1])

with col2:  # Center column
    st.image("/content/housing.png", caption="Predicting House Prices", width=300)

# File uploader for dataset
uploaded_file = st.file_uploader('Upload your housing dataset (CSV)', type='csv')

if uploaded_file is not None:
    df = pd.read_csv(uploaded_file)

    st.write('Data Preview:')
    st.dataframe(df.head(), use_container_width=True)
    st.data_editor(df, use_container_width=True)
    st.write('Data Information:')
    st.write(df.info())

    st.write('Missing Values:')
    st.write(df.isnull().sum())

    if st.button("Generate EDA Report"):
        profile = ProfileReport(df, explorative=True)
        profile_path = "eda_report.html"
        profile.to_file(profile_path)

        with open(profile_path, "r", encoding="utf-8") as f:
            html_content = f.read()
        components.html(html_content, height=800, scrolling=True)

    # Handle missing values
    num_cols = df.select_dtypes(include=['float64', 'int64']).columns
    cat_cols = df.select_dtypes(include=['object']).columns

    num_imputer = SimpleImputer(strategy='mean')
    df[num_cols] = num_imputer.fit_transform(df[num_cols])

    cat_imputer = SimpleImputer(strategy='most_frequent')
    df[cat_cols] = cat_imputer.fit_transform(df[cat_cols])

    st.write('Data after handling missing values:')
    st.dataframe(df.head())

    # Label encode categorical variables
    le_city, le_state, le_zip = LabelEncoder(), LabelEncoder(), LabelEncoder()
    df['City'] = le_city.fit_transform(df['City'])
    df['State'] = le_state.fit_transform(df['State'])
    df['Zip_Code'] = le_zip.fit_transform(df['Zip_Code'])

    # Selecting relevant features
    x = df[['Avg. Area Income', 'Avg. Area House Age', 'Avg. Area Number of Rooms',
            'Avg. Area Number of Bedrooms', 'Area Population', 'City', 'State', 'Zip_Code']]
    y = df['price']

    # Train-Test split
    x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42)

    # Train Linear Regression model
    model = LinearRegression()
    model.fit(x_train, y_train)

    y_pred = model.predict(x_test)
    R2 = model.score(x_test, y_test)
    mse = mean_squared_error(y_test, y_pred)
    st.write(f"### Model Performance")
    st.write(f"Model Test R2 Score: {R2:.2f}")
    st.write(f"Model Test MSE: {mse:.2f}")

    # --- User Input for Prediction ---
    st.header('Predict Housing Price')
    avg_income = st.number_input('Average Area Income', min_value=10000.0, max_value=200000.0, value=50000.0)
    house_age = st.number_input('Average Area House Age', min_value=0.0, max_value=50.0, value=10.0)
    num_rooms = st.number_input('Average Number of Rooms', min_value=1.0, max_value=10.0, value=5.0)
    num_bedrooms = st.number_input('Average Number of Bedrooms', min_value=1.0, max_value=6.0, value=3.0)
    population = st.number_input('Area Population', min_value=1000.0, max_value=100000.0, value=30000.0)
    city = st.text_input('City', value="New York")
    state = st.text_input('State', value="NY")
    zip_code = st.text_input('Zip Code', value="10001")


     # Encode user input
    city_encoded = le_city.transform([city])[0] if city in le_city.classes_ else -1
    state_encoded = le_state.transform([state])[0] if state in le_state.classes_ else -1
    zip_encoded = le_zip.transform([zip_code])[0] if zip_code in le_zip.classes_ else -1

    input_data = pd.DataFrame([[avg_income, house_age, num_rooms, num_bedrooms, population, city_encoded, state_encoded, zip_encoded]],
                                  columns=x.columns)

    prediction = model.predict(input_data)

    st.subheader(f'Predicted House Price: ₹{prediction[0]:,.2f}')

    # --- Radar Chart ---
    st.header('Radar Chart of User Input Compared to Dataset')

    max_values = x.max()
    min_values = x.min()

    input_values = np.array([avg_income, house_age, num_rooms, num_bedrooms, population, city_encoded, state_encoded, zip_encoded])
    normalized_input = (input_values - min_values) / (max_values - min_values)

    categories = ['Avg. Area Income', 'Avg. Area House Age', 'Avg. Area Number of Rooms',
            'Avg. Area Number of Bedrooms', 'Area Population', 'City', 'State', 'Zip_Code']

    fig = go.Figure()

    fig.add_trace(go.Scatterpolar(
        r=normalized_input,
        theta=categories,
        fill='toself',
        name='User Input'
    ))

    fig.add_trace(go.Scatterpolar(
        r=np.ones_like(input_values),
        theta=categories,
        fill='none',
        name='Max Dataset Value',
        line=dict(color='red', dash='dash')
    ))

    fig.add_trace(go.Scatterpolar(
        r=np.zeros_like(input_values),
        theta=categories,
        fill='none',
        name='Min Dataset Value',
        line=dict(color='blue', dash='dash')
    ))

    fig.update_layout(
        polar=dict(
            radialaxis=dict(visible=True, range=[0, 1])
        ),
        showlegend=True
    )

    st.plotly_chart(fig)

Writing Housing_Price_prediction_app.py


In [None]:
!streamlit run housing_price_prediction_model_app.py &>/content/logs.txt & curl ipv4.icanhazip.com


34.75.91.245


In [None]:
!npx localtunnel --port 8501

[1G[0K⠙[1G[0Kyour url is: https://nasty-bags-tan.loca.lt
^C
