In [7]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

In [8]:
df = pd.read_csv("housing_dataset.csv")

In [9]:
df.head()

Unnamed: 0,Home,Price,SqFt,Bedrooms,Bathrooms,Offers,Brick,Neighborhood
0,1,114300,1790,2,2,2,No,East
1,2,114200,2030,4,2,3,No,East
2,3,114800,1740,3,2,1,No,East
3,4,94700,1980,3,2,3,No,East
4,5,119800,2130,3,3,3,No,East


In [11]:
df.isnull().sum()

Home            0
Price           0
SqFt            0
Bedrooms        0
Bathrooms       0
Offers          0
Brick           0
Neighborhood    0
dtype: int64

In [12]:
df.shape  

(128, 8)

In [13]:
df

Unnamed: 0,Home,Price,SqFt,Bedrooms,Bathrooms,Offers,Brick,Neighborhood
0,1,114300,1790,2,2,2,No,East
1,2,114200,2030,4,2,3,No,East
2,3,114800,1740,3,2,1,No,East
3,4,94700,1980,3,2,3,No,East
4,5,119800,2130,3,3,3,No,East
...,...,...,...,...,...,...,...,...
123,124,119700,1900,3,3,3,Yes,East
124,125,147900,2160,4,3,3,Yes,East
125,126,113500,2070,2,2,2,No,North
126,127,149900,2020,3,3,1,No,West


### Data cleaning

In [15]:
 df.drop(columns=['Offers', 'Brick', 'Neighborhood'],inplace=True)

In [16]:
df

Unnamed: 0,Home,Price,SqFt,Bedrooms,Bathrooms
0,1,114300,1790,2,2
1,2,114200,2030,4,2
2,3,114800,1740,3,2
3,4,94700,1980,3,2
4,5,119800,2130,3,3
...,...,...,...,...,...
123,124,119700,1900,3,3
124,125,147900,2160,4,3
125,126,113500,2070,2,2
126,127,149900,2020,3,3


In [17]:
df.drop(columns=['Home'],inplace=True)

In [18]:
df


Unnamed: 0,Price,SqFt,Bedrooms,Bathrooms
0,114300,1790,2,2
1,114200,2030,4,2
2,114800,1740,3,2
3,94700,1980,3,2
4,119800,2130,3,3
...,...,...,...,...
123,119700,1900,3,3
124,147900,2160,4,3
125,113500,2070,2,2
126,149900,2020,3,3


### Independent and dependent features

In [19]:
X= df[['SqFt', 'Bedrooms', 'Bathrooms']]

In [20]:
y= df['Price']

In [21]:
X

Unnamed: 0,SqFt,Bedrooms,Bathrooms
0,1790,2,2
1,2030,4,2
2,1740,3,2
3,1980,3,2
4,2130,3,3
...,...,...,...
123,1900,3,3
124,2160,4,3
125,2070,2,2
126,2020,3,3


In [22]:
np.array(X).shape

(128, 3)

In [23]:
np.array(y).shape

(128,)

### Train Test Split

In [32]:
from sklearn.model_selection import train_test_split

In [33]:
X_train,X_test,y_train,y_test = train_test_split (X,y,test_size=0.25, random_state =42)

In [34]:
X_train

Unnamed: 0,SqFt,Bedrooms,Bathrooms
126,2020,3,3
24,2210,4,3
67,2040,4,3
111,1740,2,2
89,2010,2,2
...,...,...,...
106,2130,3,2
14,2590,4,3
92,2110,3,2
51,1860,2,2


In [35]:
y_train

126    149900
24     156400
67     151900
111    114900
89      97800
        ...  
106    108500
14     176800
92     142600
51      91100
102    136800
Name: Price, Length: 96, dtype: int64

In [36]:
X_train.shape

(96, 3)

In [37]:
y_train.shape

(96,)

### Standarization

In [38]:
from sklearn.preprocessing import StandardScaler

In [40]:
scaler = StandardScaler()

In [41]:
X_train = scaler.fit_transform(X_train)

In [64]:
X_train

array([[ 0.0478753 , -0.11396058,  0.96076892],
       [ 0.95750607,  1.25356634,  0.96076892],
       [ 0.14362591,  1.25356634,  0.96076892],
       [-1.2926332 , -1.48148749, -0.96076892],
       [ 0.        , -1.48148749, -0.96076892],
       [-1.10113198,  1.25356634, -0.96076892],
       [ 1.2926332 ,  1.25356634,  0.96076892],
       [-1.53200972, -0.11396058, -0.96076892],
       [-0.33512713, -0.11396058,  0.96076892],
       [-2.681017  , -1.48148749, -0.96076892],
       [-0.19150121, -1.48148749, -0.96076892],
       [ 1.14900729,  1.25356634,  0.96076892],
       [ 1.00538138,  1.25356634,  0.96076892],
       [-0.38300243, -0.11396058,  0.96076892],
       [-1.3405085 , -0.11396058,  0.96076892],
       [ 1.2926332 ,  2.62109326,  0.96076892],
       [ 0.95750607, -0.11396058,  0.96076892],
       [ 1.3405085 ,  1.25356634,  0.96076892],
       [-0.52662834, -0.11396058,  0.96076892],
       [-1.96288745, -1.48148749, -0.96076892],
       [-0.0478753 , -1.48148749, -0.960

In [42]:
X_test = scaler.transform(X_test)

### Apply Simple linear regression

In [43]:
from sklearn.linear_model import LinearRegression

In [61]:
LinearRegression()

0,1,2
,fit_intercept,True
,copy_X,True
,tol,1e-06
,n_jobs,
,positive,False


In [62]:
model = LinearRegression()

In [63]:
model.fit(X_train, y_train)

0,1,2
,fit_intercept,True
,copy_X,True
,tol,1e-06
,n_jobs,
,positive,False


In [65]:
model.coef_

array([9531.59006298, 7624.85109685, 5433.58476214])

In [66]:
model.intercept_

np.float64(131618.75)

### Training Data best fit line

In [60]:
X_train.shape

(96, 3)

In [57]:
y_train.shape

(96,)

### Prediction for test data

In [67]:
y_predict = model.predict (X_test)

### performance matrix

In [70]:
from sklearn.metrics import mean_squared_error, mean_absolute_error,r2_score

In [72]:
MSE = mean_squared_error(y_test, y_predict)
MAE = mean_absolute_error(y_test, y_predict)
Score = r2_score(y_test, y_predict)

In [73]:
print(MSE)

359055280.1906175


In [74]:
print(MAE)

15518.953485723323


In [76]:
print(Score)

0.41428156294959395


In [78]:
print(feature)

NameError: name 'feature' is not defined