In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import PolynomialFeatures
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder

In [2]:
dataset = pd.read_csv('C:/Users/miant/Desktop/Python/house_prices_dataset.csv')
dataset.head()

Unnamed: 0,Size,Number of Bedrooms,Number of Bathrooms,Location,Age of the House,Garage Size,Sale Price
0,3974,1,2,Countryside,81,2,594026
1,1660,3,2,Downtown,97,1,847135
2,2094,2,1,Suburb,74,3,163776
3,1930,2,2,Countryside,75,2,230494
4,1895,1,1,Downtown,48,1,242971


In [3]:
#Check how many missing values per column
dataset.isnull().sum()

Size                   0
Number of Bedrooms     0
Number of Bathrooms    0
Location               0
Age of the House       0
Garage Size            0
Sale Price             0
dtype: int64

In [4]:
#We determine the X and y

X = dataset.iloc[:, :-1].values
y = dataset.iloc[:,-1].values


In [5]:
#We encode the categorical variable

ct = ColumnTransformer(transformers = [('encoder', OneHotEncoder(), [3])], remainder ='passthrough')
X = np.array(ct.fit_transform(X))
X

array([[1.0, 0.0, 0.0, ..., 2, 81, 2],
       [0.0, 1.0, 0.0, ..., 2, 97, 1],
       [0.0, 0.0, 1.0, ..., 1, 74, 3],
       ...,
       [1.0, 0.0, 0.0, ..., 2, 61, 0],
       [0.0, 0.0, 1.0, ..., 1, 96, 2],
       [0.0, 0.0, 1.0, ..., 3, 18, 1]], dtype=object)

In [6]:
# Splitting the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)

In [7]:
#  Multiple Linear Regression
model = LinearRegression()
model.fit(X_train, y_train)

In [8]:
#Y pred
y_pred = model.predict(X_test)

In [9]:
r2_score(y_test, y_pred)

-0.038702464202095

In [10]:
####### POLYNOMIAL ##############

In [11]:
# Training the Polynomial Regression model on the Training set
poly_reg = PolynomialFeatures(degree=6)
X_poly = poly_reg.fit_transform(X_train)
regressor = LinearRegression()
regressor.fit(X_poly, y_train)


In [12]:
# Predicting the Test set results
y_pred = regressor.predict(poly_reg.transform(X_test))
np.set_printoptions(precision=2)
print(np.concatenate((y_pred.reshape(len(y_pred), 1), y_test.reshape(len(y_test), 1)), 1))

[[ 412941.23  565249.  ]
 [ 759913.73  613634.  ]
 [ 361576.92  972056.  ]
 [ 406197.98  148064.  ]
 [ 750651.94  161462.  ]
 [ 782483.59  133384.  ]
 [ 417350.31  336073.  ]
 [ 425386.58  912605.  ]
 [ 569466.13  390414.  ]
 [ 399322.48  984550.  ]
 [ 943086.36  376284.  ]
 [ 540833.93  820974.  ]
 [ 775547.56  100519.  ]
 [ 615097.32  490015.  ]
 [ 457524.81  537387.  ]
 [ 393710.32  222502.  ]
 [ 362083.53  590277.  ]
 [ 885685.73  509158.  ]
 [ 516416.34  286371.  ]
 [ 533282.22  936788.  ]
 [ 578940.41  645376.  ]
 [ 793964.98  513685.  ]
 [ 617492.75  906874.  ]
 [ 314043.96  407776.  ]
 [ 443347.2   396030.  ]
 [ 619421.24  408105.  ]
 [ 538453.51  474724.  ]
 [ 220441.11  539415.  ]
 [ 614205.51  565795.  ]
 [ 476817.91  847135.  ]
 [ 490398.7   922437.  ]
 [ 550963.12  972564.  ]
 [ 695515.64  869497.  ]
 [ 504123.98  941490.  ]
 [ 520965.48  359509.  ]
 [ 403821.58  680532.  ]
 [ 477976.03  992585.  ]
 [ 474552.62  528697.  ]
 [ 546597.97  994556.  ]
 [ 577752.46  331373.  ]


In [13]:
r2_score(y_test, y_pred)

-0.9272664553570718