# Housing price regression

#### imports

In [2]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn import ensemble
from sklearn.metrics import mean_absolute_error

In [3]:
df = pd.read_csv("housing_price_dataset.csv")

#### Explore and data prep

In [4]:
df.head()

Unnamed: 0,SquareFeet,Bedrooms,Bathrooms,Neighborhood,YearBuilt,Price
0,2126,4,1,Rural,1969,215355.283618
1,2459,3,2,Rural,1980,195014.221626
2,1860,2,1,Suburb,1970,306891.012076
3,2294,2,1,Urban,1996,206786.787153
4,2130,5,2,Suburb,2001,272436.239065


In [5]:
df = pd.get_dummies(df, columns=['Neighborhood'])

In [6]:
df.head()

Unnamed: 0,SquareFeet,Bedrooms,Bathrooms,YearBuilt,Price,Neighborhood_Rural,Neighborhood_Suburb,Neighborhood_Urban
0,2126,4,1,1969,215355.283618,1,0,0
1,2459,3,2,1980,195014.221626,1,0,0
2,1860,2,1,1970,306891.012076,0,1,0
3,2294,2,1,1996,206786.787153,0,0,1
4,2130,5,2,2001,272436.239065,0,1,0


In [8]:
df.columns

Index(['SquareFeet', 'Bedrooms', 'Bathrooms', 'YearBuilt', 'Price',
       'Neighborhood_Rural', 'Neighborhood_Suburb', 'Neighborhood_Urban'],
      dtype='object')

In [10]:
X = df.drop('Price',axis=1)

In [11]:
y = df['Price']

In [12]:
X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.3,random_state=42, shuffle=True)

#### algorithm set

In [13]:
model = ensemble.GradientBoostingRegressor(n_estimators=350,learning_rate=0.1,max_depth=5,min_samples_split=4,min_samples_leaf=6,max_features=0.6,loss='huber')

In [14]:
model.fit(X_train,y_train)

In [15]:
mae_train = mean_absolute_error(y_train,model.predict(X_train))
print("Training Set Mean Absolute Error: %.2f" % mae_train)

mae_test = mean_absolute_error(y_test,model.predict(X_test))
print("Test Set Mean Absolute Error: %.2f" % mae_test)


Training Set Mean Absolute Error: 37632.13
Test Set Mean Absolute Error: 40175.06


In [24]:
df.head(2)

Unnamed: 0,SquareFeet,Bedrooms,Bathrooms,YearBuilt,Price,Neighborhood_Rural,Neighborhood_Suburb,Neighborhood_Urban
0,2126,4,1,1969,215355.283618,1,0,0
1,2459,3,2,1980,195014.221626,1,0,0


In [20]:
new_property = [2126,4,1,1969,1,0,0]
new_pred = model.predict([new_property])
new_pred



array([243977.37306398])

In [21]:
new_pred[0]

243977.373063981

In [23]:
215355.283618 - new_pred[0]

-28622.089445981022

In [25]:
new_property = [2459,3,2,1980,1,0,0]
new_pred = model.predict([new_property])
new_pred



array([257382.70147229])