In [14]:
import numpy as np
import pandas as pd
house = pd.read_csv('Housing.csv')
house.head()

Unnamed: 0,price,area,bedrooms,bathrooms,stories,mainroad,guestroom,basement,hotwaterheating,airconditioning,parking,prefarea,furnishingstatus
0,13300000,7420,4,2,3,yes,no,no,no,yes,2,yes,furnished
1,12250000,8960,4,4,4,yes,no,no,no,yes,3,no,furnished
2,12250000,9960,3,2,2,yes,no,yes,no,no,2,yes,semi-furnished
3,12215000,7500,4,2,2,yes,no,yes,no,yes,3,yes,furnished
4,11410000,7420,4,1,2,yes,yes,yes,no,yes,2,no,furnished


In [15]:
#data prep
var = ['mainroad','guestroom','basement','airconditioning','hotwaterheating','prefarea']
house[var] = house[var].apply(lambda x : x.map({'yes':1,'no':0}))
make = pd.get_dummies(house['furnishingstatus'],drop_first=True)
house = pd.concat([house,make],axis=1)
house.drop(['furnishingstatus'],axis=1,inplace=True)
house.head()

Unnamed: 0,price,area,bedrooms,bathrooms,stories,mainroad,guestroom,basement,hotwaterheating,airconditioning,parking,prefarea,semi-furnished,unfurnished
0,13300000,7420,4,2,3,1,0,0,0,1,2,1,0,0
1,12250000,8960,4,4,4,1,0,0,0,1,3,0,0,0
2,12250000,9960,3,2,2,1,0,1,0,0,2,1,1,0
3,12215000,7500,4,2,2,1,0,1,0,1,3,1,0,0
4,11410000,7420,4,1,2,1,1,1,0,1,2,0,0,0


In [16]:
from sklearn.model_selection import train_test_split
house_tr,house_te = train_test_split(house,train_size = 0.7,test_size=0.3,random_state=42)

In [17]:
from sklearn.preprocessing import MinMaxScaler
scalar = MinMaxScaler()
num_variables = ['area','bedrooms','bathrooms','stories','parking','price']
house[num_variables] = scalar.fit_transform(house[num_variables])

In [18]:
y_train = house_tr.pop('price')
x_train = house_tr

In [19]:
from sklearn.feature_selection import RFE
from sklearn.linear_model import LinearRegression

lm = LinearRegression()
lm.fit(x_train,y_train)
rfe = RFE(lm,10)
rfe = rfe.fit(x_train,y_train)

In [23]:
list(zip(x_train.columns,rfe.support_,rfe.ranking_))

[('area', False, 4),
 ('bedrooms', False, 3),
 ('bathrooms', True, 1),
 ('stories', True, 1),
 ('mainroad', True, 1),
 ('guestroom', True, 1),
 ('basement', True, 1),
 ('hotwaterheating', True, 1),
 ('airconditioning', True, 1),
 ('parking', True, 1),
 ('prefarea', True, 1),
 ('semi-furnished', False, 2),
 ('unfurnished', True, 1)]

In [27]:
col = x_train.columns[rfe.support_]
col

Index(['bathrooms', 'stories', 'mainroad', 'guestroom', 'basement',
       'hotwaterheating', 'airconditioning', 'parking', 'prefarea',
       'unfurnished'],
      dtype='object')

In [30]:
import statsmodels.api as sm
x_train_lm = sm.add_constant(x_train[col])
lr = sm.OLS(y_train,x_train_lm).fit()
lr.params

const              8.669825e+05
bathrooms          1.321651e+06
stories            3.919812e+05
mainroad           7.367739e+05
guestroom          4.495158e+05
basement           3.848779e+05
hotwaterheating    6.352658e+05
airconditioning    8.717235e+05
parking            4.605750e+05
prefarea           6.763702e+05
unfurnished       -3.496540e+05
dtype: float64

In [31]:
lr.summary()

0,1,2,3
Dep. Variable:,price,R-squared:,0.616
Model:,OLS,Adj. R-squared:,0.606
Method:,Least Squares,F-statistic:,59.43
Date:,"Fri, 05 Apr 2019",Prob (F-statistic):,8.210000000000001e-71
Time:,19:32:47,Log-Likelihood:,-5839.9
No. Observations:,381,AIC:,11700.0
Df Residuals:,370,BIC:,11750.0
Df Model:,10,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,8.67e+05,2.46e+05,3.528,0.000,3.84e+05,1.35e+06
bathrooms,1.322e+06,1.31e+05,10.075,0.000,1.06e+06,1.58e+06
stories,3.92e+05,7.77e+04,5.047,0.000,2.39e+05,5.45e+05
mainroad,7.368e+05,1.74e+05,4.226,0.000,3.94e+05,1.08e+06
guestroom,4.495e+05,1.64e+05,2.745,0.006,1.27e+05,7.72e+05
basement,3.849e+05,1.4e+05,2.759,0.006,1.11e+05,6.59e+05
hotwaterheating,6.353e+05,2.62e+05,2.424,0.016,1.2e+05,1.15e+06
airconditioning,8.717e+05,1.37e+05,6.374,0.000,6.03e+05,1.14e+06
parking,4.606e+05,7.19e+04,6.409,0.000,3.19e+05,6.02e+05

0,1,2,3
Omnibus:,56.522,Durbin-Watson:,1.962
Prob(Omnibus):,0.0,Jarque-Bera (JB):,135.334
Skew:,0.744,Prob(JB):,4.0999999999999996e-30
Kurtosis:,5.512,Cond. No.,14.1
