# **import** **Library**

In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score
import matplotlib.pyplot as plt
import seaborn as sns

# **Read Dataset**

In [3]:
df = pd.read_csv('house_dataset.csv')

In [4]:
df.head()

Unnamed: 0,price,bedrooms,bathrooms,sqft_living,sqft_lot,floors,waterfront,view,condition,sqft_above,sqft_basement,yr_built,yr_renovated,street,city
0,313000,3,1,1340,7912,1,0,0,3,1340,0,1955,2005,18810 Densmore Ave N,36
1,2384000,5,2,3650,9050,2,0,4,5,3370,280,1921,0,709 W Blaine St,35
2,342000,3,2,1930,11947,1,0,0,4,1930,0,1966,0,26206-26214 143rd Ave SE,18
3,420000,3,2,2000,8030,1,0,0,4,1000,1000,1963,0,857 170th Pl NE,3
4,550000,4,2,1940,10500,1,0,0,4,1140,800,1976,1992,9105 170th Ave NE,31


# **split Dataset**

In [15]:
X = df.drop('price', axis=1)
y = df['price']

In [16]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# **train model**

In [17]:
model = LinearRegression()
model.fit(X_train, y_train)

In [12]:
df.head()

Unnamed: 0,price,bedrooms,bathrooms,sqft_living,sqft_lot,floors,waterfront,view,condition,sqft_above,sqft_basement,yr_built,yr_renovated,city
0,313000,3,1,1340,7912,1,0,0,3,1340,0,1955,2005,36
1,2384000,5,2,3650,9050,2,0,4,5,3370,280,1921,0,35
2,342000,3,2,1930,11947,1,0,0,4,1930,0,1966,0,18
3,420000,3,2,2000,8030,1,0,0,4,1000,1000,1963,0,3
4,550000,4,2,1940,10500,1,0,0,4,1140,800,1976,1992,31


# **model predict**

In [18]:
model.predict(X_test.iloc[0].values.reshape(1, -1))



array([311796.84307922])

In [19]:
 y_pred = model.predict(X_test)

# **Getting the model error**

In [20]:
print('Mean Squared Error:', mean_squared_error(y_test, y_pred))
print('R2 Score:', r2_score(y_test, y_pred))

Mean Squared Error: 986420820512.6879
R2 Score: 0.03277505830568417


In [27]:
print(model.coef_)
print("..................................")
print(model.intercept_)
print("..................................")
print(model.score(X_test, y_test))

[-5.97121153e+04  3.18231732e+04  1.88015634e+02 -4.91278201e-01
  6.11633751e+04  3.85032446e+05  4.52448741e+04  3.22021985e+04
  1.00406234e+02  8.76093999e+01 -2.41334764e+03  7.23607067e+00
  1.56577250e+03]
..................................
4580419.7968808245
..................................
0.03277505830568417


# **testing  model**

In [24]:
input_data = [[3, 1, 1340, 7912, 1, 0, 0, 3, 1340, 0, 1955, 2005, 36]]
# Predict using the reshaped input data
print(model.predict(input_data))

[326256.39446227]




# **save the model**

In [25]:
import pickle

# Assuming `model` is your trained model
# Save the model to a file
with open('model.pkl', 'wb') as model_file:
    pickle.dump(model, model_file)