In [1]:
# Importing Libraries
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score

In [2]:
# Loading the Dataset
data = pd.read_csv('Real_Estate.csv')

In [3]:
# Displaying Top 5 rows of the Dataset
data.head()

Unnamed: 0,Transaction date,House age,Distance to the nearest MRT station,Number of convenience stores,Latitude,Longitude,House price of unit area
0,2012-09-02 16:42:30.519336,13.3,4082.015,8,25.007059,121.561694,6.488673
1,2012-09-04 22:52:29.919544,35.5,274.0144,2,25.012148,121.54699,24.970725
2,2012-09-05 01:10:52.349449,1.1,1978.671,10,25.00385,121.528336,26.694267
3,2012-09-05 13:26:01.189083,22.2,1055.067,5,24.962887,121.482178,38.091638
4,2012-09-06 08:29:47.910523,8.5,967.4,6,25.011037,121.479946,21.65471


In [4]:
# Checking the Dataset Information
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 414 entries, 0 to 413
Data columns (total 7 columns):
 #   Column                               Non-Null Count  Dtype  
---  ------                               --------------  -----  
 0   Transaction date                     414 non-null    object 
 1   House age                            414 non-null    float64
 2   Distance to the nearest MRT station  414 non-null    float64
 3   Number of convenience stores         414 non-null    int64  
 4   Latitude                             414 non-null    float64
 5   Longitude                            414 non-null    float64
 6   House price of unit area             414 non-null    float64
dtypes: float64(5), int64(1), object(1)
memory usage: 22.8+ KB


In [5]:
# Selecting features and the target variable
features = data.drop(columns= 'House price of unit area')
target = data['House price of unit area'] 

In [6]:
# Defining the X and Y
X = features
y = target

In [7]:
# Splitting the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size= 0.2, random_state= 42)

In [8]:
# Check for non-numeric columns
print(X_train.dtypes)

# Drop non-numeric columns like 'Date' if present
X_train = X_train.select_dtypes(include=[np.number])
X_test = X_test.select_dtypes(include=[np.number])


Transaction date                        object
House age                              float64
Distance to the nearest MRT station    float64
Number of convenience stores             int64
Latitude                               float64
Longitude                              float64
dtype: object


In [9]:
# Model Initialization
model = LinearRegression()

In [10]:
# Drop any non-numeric columns
X_train = X_train.select_dtypes(include=[np.number])
X_test = X_test.select_dtypes(include=[np.number])

In [11]:
# Check for missing values
X_train = X_train.dropna()
X_test = X_test.dropna()

In [12]:
# Train and Fit the Model
model.fit(X_train, y_train)

In [13]:
# Now Let's create end to end solution for our Machine Learning Model
import dash
from dash import html, dcc, Input, Output, State

In [14]:
# Initialize Dash app
app = dash.Dash(__name__)

In [15]:
# Define the layout of the app
app.layout = html.Div([
    html.Div([
        html.H1('Real Estate Price Prediction', style = {'text-align': 'center'}),

        html.Div([
            dcc.Input(id = 'distance_to_mrt', type = 'number', placeholder = 'Distance to MRT Station (meters)',
                     style = {'margin':'10px', 'padding': '10px'}),
            dcc.Input(id = 'num_convenience_stores', type = 'number', placeholder= 'Number of Convenience Stores',
                     style = {'margin': '10px', 'padding': '10px'}),
            dcc.Input(id= 'latitude', type = 'number', placeholder= 'Latitude',
                     style = {'margin': '10px', 'padding': '10px'}),
            dcc.Input(id = 'longitude', type = 'number', placeholder= 'Longitude',
                     style = {'margin': '10px', 'padding': '10px'}),
            html.Button('Predict Price', id = 'predict_button', n_clicks = 0,
                       style = {'margin': '10px', 'padding': '10px', 'background-color':'#07Bff', 'color':'white'}),
            
        ], style = {'text-align': 'center'}),

        html.Div(id = 'prediction_output', style = {'text-align':'center', 'font-size':'20px', 'margin-top': '20px'})
    ], style = {'width': '50%', 'margin': '0 auto', 'border': '2px solid # 007Bff', 'padding': '20px', 'border-radius': '10px'})    
])

In [16]:
# Define callback to update output
@app.callback(
    Output('prediction_output', 'children'),
    [Input('predict_button', 'n_clicks')],
    [State('distance_to_mrt', 'value'),
        State('num_convenience_stores', 'value'),
        State('latitude', 'value'),
        State('longitude', 'value')])


def update_output(n_clicks, distance_to_mrt, num_convenience_stores, latitude, longitude):
    if n_clicks and n_clicks > 0:
        if all(v is not None for v in [distance_to_mrt, num_convenience_stores, latitude, longitude]):
            features = pd.DataFrame(
    [[distance_to_mrt, num_convenience_stores, latitude, longitude]],
    columns=[
        'Distance to the nearest MRT station',
        'Number of convenience stores',
        'Latitude',
        'Longitude'
    ]
)
            prediction = model.predict(features)[0]
            return f'Predicted House Price of Unit Area: {prediction:.2f}'
        else:
            return 'Please enter all values to get a prediction'
    return ''

# Run the app
if __name__ == '__main__':
    app.run_server(debug=True)