In [1]:
import seaborn as sns
import pandas as pd
from sklearn.linear_model import LinearRegression, Ridge
from sklearn.metrics import mean_absolute_error
import matplotlib.pyplot as plt
from sklearn.impute import SimpleImputer
from sklearn.pipeline import make_pipeline
from ipywidgets import Dropdown, FloatSlider, IntSlider, interact
from sklearn.utils.validation import check_is_fitted

In [2]:
df = pd.read_csv("D:/ML DATA/dataset/housing_price_dataset.csv")

In [3]:
df.head()

Unnamed: 0,SquareFeet,Bedrooms,Bathrooms,Neighborhood,YearBuilt,Price
0,2126,4,1,Rural,1969,215355.283618
1,2459,3,2,Rural,1980,195014.221626
2,1860,2,1,Suburb,1970,306891.012076
3,2294,2,1,Urban,1996,206786.787153
4,2130,5,2,Suburb,2001,272436.239065


In [4]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 50000 entries, 0 to 49999
Data columns (total 6 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   SquareFeet    50000 non-null  int64  
 1   Bedrooms      50000 non-null  int64  
 2   Bathrooms     50000 non-null  int64  
 3   Neighborhood  50000 non-null  object 
 4   YearBuilt     50000 non-null  int64  
 5   Price         50000 non-null  float64
dtypes: float64(1), int64(4), object(1)
memory usage: 2.3+ MB


In [5]:
df.drop(columns=["Neighborhood","YearBuilt"],inplace=True)

In [6]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 50000 entries, 0 to 49999
Data columns (total 4 columns):
 #   Column      Non-Null Count  Dtype  
---  ------      --------------  -----  
 0   SquareFeet  50000 non-null  int64  
 1   Bedrooms    50000 non-null  int64  
 2   Bathrooms   50000 non-null  int64  
 3   Price       50000 non-null  float64
dtypes: float64(1), int64(3)
memory usage: 1.5 MB


In [7]:

target="Price"
feature=["SquareFeet","Bedrooms","Bathrooms"]
X_train=df[feature]
y_train=df[target]

In [9]:
# Predicting the Baseline mean absolute error
y_mean=y_train.mean()
y_pred_basealine=[y_mean]*len(y_train)
print("Y Mean:  ",y_mean.__round__(2))
print("Y Prediction mean absolute error: ",mean_absolute_error(y_train,y_pred_basealine).round(2))

Y Mean:   224827.33
Y Prediction mean absolute error:  62179.68


In [10]:
model=LinearRegression()
model.fit(X_train,y_train)

In [11]:
check_is_fitted(model)

In [13]:
y_train_predict=model.predict(X_train)
print("Training Mean Absolute Error:", mean_absolute_error(y_train,y_train_predict).round(2))

Training Mean Absolute Error: 39904.35


In [14]:
X_test = pd.read_csv("D:/ML DATA/X_Test.csv")
y_pred_test = pd.Series(model.predict(X_test))
y_pred_test.head()

0    236439.368395
1    267283.621310
2    199862.646099
3    242985.410190
4    244740.425094
dtype: float64

In [15]:
# Deployment of model
def make_prediction(area, bedrooms, bathrooms):
    data={
        "C":area,
        "Bedrooms":bedrooms,
        "Bathrooms":bathrooms
    }
    df=pd.DataFrame(data, index=[0])
    prediction = model.predict(df).round(2)[0]
    return f"Predicted apartment price: ${prediction}"

In [16]:
interact(
    make_prediction,
    area=IntSlider(
        min=X_train["SquareFeet"].min(),
        max=X_train["SquareFeet"].max(),
        value=X_train["SquareFeet"].mean(),
    ),
    bedrooms=FloatSlider(
        min=X_train["Bedrooms"].min(),
        max=X_train["Bedrooms"].max(),
        step=0.01,
        value=X_train["Bedrooms"].mean(),
    ),
    bathrooms=FloatSlider(
        min=X_train["Bathrooms"].min(),
        max=X_train["Bathrooms"].max(),
        step=0.01,
        value=X_train["Bathrooms"].mean(),
    ),
   
);

interactive(children=(IntSlider(value=2006, description='area', max=2999, min=1000), FloatSlider(value=3.4987,…