In [1]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import r2_score


In [2]:
data = pd.read_csv("data/house_data.csv")
data.head()

Unnamed: 0,Id,Area,Bedrooms,Bathrooms,Floors,YearBuilt,Location,Condition,Garage,Price
0,1,1360,5,4,3,1970,Downtown,Excellent,No,149919
1,2,4272,5,4,3,1958,Downtown,Excellent,No,424998
2,3,3592,2,2,3,1938,Downtown,Good,No,266746
3,4,966,4,2,2,1902,Suburban,Fair,Yes,244020
4,5,4926,1,4,2,1975,Downtown,Fair,Yes,636056


In [3]:
data['Garage'] = data['Garage'].map({'No': 0, 'Yes': 1})
data = pd.get_dummies(data, columns=['Location', 'Condition'], drop_first=True)
data.head()


Unnamed: 0,Id,Area,Bedrooms,Bathrooms,Floors,YearBuilt,Garage,Price,Location_Rural,Location_Suburban,Location_Urban,Condition_Fair,Condition_Good,Condition_Poor
0,1,1360,5,4,3,1970,0,149919,False,False,False,False,False,False
1,2,4272,5,4,3,1958,0,424998,False,False,False,False,False,False
2,3,3592,2,2,3,1938,0,266746,False,False,False,False,True,False
3,4,966,4,2,2,1902,1,244020,False,True,False,True,False,False
4,5,4926,1,4,2,1975,1,636056,False,False,False,True,False,False


In [4]:
X = data.drop('Price', axis=1)
y = data['Price']

In [5]:
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

In [6]:
model = LinearRegression()
model.fit(X_train, y_train)


0,1,2
,"fit_intercept  fit_intercept: bool, default=True Whether to calculate the intercept for this model. If set to False, no intercept will be used in calculations (i.e. data is expected to be centered).",True
,"copy_X  copy_X: bool, default=True If True, X will be copied; else, it may be overwritten.",True
,"tol  tol: float, default=1e-6 The precision of the solution (`coef_`) is determined by `tol` which specifies a different convergence criterion for the `lsqr` solver. `tol` is set as `atol` and `btol` of :func:`scipy.sparse.linalg.lsqr` when fitting on sparse training data. This parameter has no effect when fitting on dense data. .. versionadded:: 1.7",1e-06
,"n_jobs  n_jobs: int, default=None The number of jobs to use for the computation. This will only provide speedup in case of sufficiently large problems, that is if firstly `n_targets > 1` and secondly `X` is sparse or if `positive` is set to `True`. ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context. ``-1`` means using all processors. See :term:`Glossary ` for more details.",
,"positive  positive: bool, default=False When set to ``True``, forces the coefficients to be positive. This option is only supported for dense arrays. For a comparison between a linear regression model with positive constraints on the regression coefficients and a linear regression without such constraints, see :ref:`sphx_glr_auto_examples_linear_model_plot_nnls.py`. .. versionadded:: 0.24",False


In [7]:
y_pred = model.predict(X_test)
print("Accuracy:", r2_score(y_test, y_pred))


Accuracy: -0.006181784611834162


In [8]:
def predict_house_price(area, bedrooms, bathrooms, floors,
                         year_built, garage, location, condition):

    input_data = {
        'Id': 0,
        'Area': area,
        'Bedrooms': bedrooms,
        'Bathrooms': bathrooms,
        'Floors': floors,
        'YearBuilt': year_built,
        'Garage': garage,
        'Location_Rural': 0,
        'Location_Suburban': 0,
        'Location_Urban': 0,
        'Condition_Fair': 0,
        'Condition_Good': 0,
        'Condition_Poor': 0
    }

    if location == 'Rural':
        input_data['Location_Rural'] = 1
    elif location == 'Suburban':
        input_data['Location_Suburban'] = 1
    elif location == 'Urban':
        input_data['Location_Urban'] = 1

    if condition == 'Fair':
        input_data['Condition_Fair'] = 1
    elif condition == 'Good':
        input_data['Condition_Good'] = 1
    elif condition == 'Poor':
        input_data['Condition_Poor'] = 1

    input_df = pd.DataFrame([input_data])
    return int(model.predict(input_df)[0])


In [9]:
price = predict_house_price(
    area=2200,
    bedrooms=4,
    bathrooms=3,
    floors=2,
    year_built=2018,
    garage=1,
    location='Urban',
    condition='Good'
)

print("Predicted House Price:", price)

Predicted House Price: 521684


In [10]:
import joblib

joblib.dump(model, "house_price_model.pkl")
print("Model saved successfully")


Model saved successfully
