In [7]:
%matplotlib inline
import numpy as np
import pandas as pd
import statsmodels.api as sm
import statsmodels.formula.api as smf
from statsmodels.formula.api import ols
from IPython.display import HTML, display
import matplotlib.pyplot as plt
from statsmodels.regression.quantile_regression import QuantReg


In [8]:
data = pd.read_csv('rent_clean.csv')
data.head()


Unnamed: 0,id,web_alberlet,web_ingatlan,web_tower,apartment,house,sale,rent,price_per_month,size(sqm),...,orientation_northeast,orientation_northwest,orientation_southwest,orientation_southeast,garden_access,garden_AC,attic,attic_loft_conversion,attic_penthouse,attic_topfloor
0,0,1.0,0.0,0.0,1,0,0,1,386,35,...,,,,,,,,,,
1,1,1.0,0.0,0.0,1,0,0,1,773,56,...,,,,,,,,,,
2,2,1.0,0.0,0.0,1,0,0,1,464,53,...,,,,,,,,,,
3,3,1.0,0.0,0.0,1,0,0,1,835,75,...,,,,,,,,,,
4,4,1.0,0.0,0.0,1,0,0,1,773,69,...,,,,,,,,,,


In [9]:
housing_model = smf.quantreg("price_per_sqm ~ apartment + number_of_whole_rooms + number_of_half_rooms + floor", data=data).fit()

# summarize our model
housing_model_summary = housing_model.summary()

# convert our table to HTML and add colors to headers for explanatory purposes
HTML(
(housing_model_summary
    .as_html()
    .replace('<th>  Adj. R-squared:    </th>', '<th style="background-color:#aec7e8;"> Adj. R-squared: </th>')
    .replace('<th>coef</th>', '<th style="background-color:#ffbb78;">coef</th>')
    .replace('<th>std err</th>', '<th style="background-color:#c7e9c0;">std err</th>')
    .replace('<th>P>|t|</th>', '<th style="background-color:#bcbddc;">P>|t|</th>')
    .replace('<th>[0.025</th>    <th>0.975]</th>', '<th style="background-color:#ff9896;">[0.025</th>    <th style="background-color:#ff9896;">0.975]</th>'))
)


0,1,2,3
Dep. Variable:,price_per_sqm,Pseudo R-squared:,0.01335
Model:,QuantReg,Bandwidth:,0.752
Method:,Least Squares,Sparsity:,7.923
Date:,"Thu, 18 Apr 2019",No. Observations:,9759.0
Time:,07:20:40,Df Residuals:,9754.0
,,Df Model:,4.0

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Intercept,7.9858,0.460,17.346,0.000,7.083,8.888
apartment,2.9494,0.437,6.746,0.000,2.092,3.806
number_of_whole_rooms,0.0582,0.042,1.385,0.166,-0.024,0.140
number_of_half_rooms,-0.8257,0.077,-10.770,0.000,-0.976,-0.675
floor,0.0867,0.019,4.521,0.000,0.049,0.124


In [10]:
housing_model = smf.quantreg("price_per_sqm ~ apartment + number_of_whole_rooms + number_of_half_rooms + floor + lift + air_conditioner + furnish_furnished", data=data).fit()

# summarize our model
housing_model_summary = housing_model.summary()

# convert our table to HTML and add colors to headers for explanatory purposes
HTML(
(housing_model_summary
    .as_html()
    .replace('<th>  Adj. R-squared:    </th>', '<th style="background-color:#aec7e8;"> Adj. R-squared: </th>')
    .replace('<th>coef</th>', '<th style="background-color:#ffbb78;">coef</th>')
    .replace('<th>std err</th>', '<th style="background-color:#c7e9c0;">std err</th>')
    .replace('<th>P>|t|</th>', '<th style="background-color:#bcbddc;">P>|t|</th>')
    .replace('<th>[0.025</th>    <th>0.975]</th>', '<th style="background-color:#ff9896;">[0.025</th>    <th style="background-color:#ff9896;">0.975]</th>'))
)


0,1,2,3
Dep. Variable:,price_per_sqm,Pseudo R-squared:,0.1019
Model:,QuantReg,Bandwidth:,0.9969
Method:,Least Squares,Sparsity:,8.079
Date:,"Thu, 18 Apr 2019",No. Observations:,3884.0
Time:,07:20:41,Df Residuals:,3876.0
,,Df Model:,7.0

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Intercept,9.2483,2.370,3.902,0.000,4.602,13.895
apartment,-0.0477,2.350,-0.020,0.984,-4.655,4.559
number_of_whole_rooms,0.0326,0.072,0.452,0.652,-0.109,0.174
number_of_half_rooms,-0.8114,0.125,-6.512,0.000,-1.056,-0.567
floor,-0.0903,0.036,-2.493,0.013,-0.161,-0.019
lift,1.1386,0.155,7.350,0.000,0.835,1.442
air_conditioner,2.1377,0.142,15.011,0.000,1.858,2.417
furnish_furnished,1.2646,0.139,9.091,0.000,0.992,1.537


In [16]:
# basic specifications + basic decoration + basic view + district
# fit our model with .fit() and show results
# we use statsmodels' formula API to invoke the syntax below,
# where we write out the formula using ~
housing_model = smf.quantreg("price_per_sqm ~ apartment + number_of_whole_rooms + number_of_half_rooms + floor + lift + air_conditioner + furnish_furnished + view_garden + view_panoramic + view_street + district_i + district_ii + district_iii + district_iv + district_v + district_vi + district_vii + district_viii + district_ix + district_x + district_xi + district_xii + district_xiii + district_xiv + district_xv + district_xvi + district_xvii + district_xviii + district_xix + district_xx + district_xxi + district_xxii + district_xxiii", data=data).fit(q=.5)

# summarize our model
housing_model_summary = housing_model.summary()

# convert our table to HTML and add colors to headers for explanatory purposes
HTML(
(housing_model_summary
    .as_html()
    .replace('<th>  Adj. R-squared:    </th>', '<th style="background-color:#aec7e8;"> Adj. R-squared: </th>')
    .replace('<th>coef</th>', '<th style="background-color:#ffbb78;">coef</th>')
    .replace('<th>std err</th>', '<th style="background-color:#c7e9c0;">std err</th>')
    .replace('<th>P>|t|</th>', '<th style="background-color:#bcbddc;">P>|t|</th>')
    .replace('<th>[0.025</th>    <th>0.975]</th>', '<th style="background-color:#ff9896;">[0.025</th>    <th style="background-color:#ff9896;">0.975]</th>'))
)


ValueError: operands could not be broadcast together with shapes (34,) (33,) 

In [13]:
# basic specifications + basic decoration + basic view + district + heating 
# fit our model with .fit() and show results
# we use statsmodels' formula API to invoke the syntax below,
# where we write out the formula using ~
housing_model = smf.quantreg("price_per_sqm ~ apartment + number_of_whole_rooms + number_of_half_rooms + floor + lift + air_conditioner + furnish_furnished + view_garden + view_panoramic + view_street + district_i + district_ii + district_iii + district_iv + district_v + district_vi + district_vii + district_viii + district_ix + district_x + district_xi + district_xii + district_xiii + district_xiv + district_xv + district_xvi + district_xvii + district_xviii + district_xix + district_xx + district_xxi + district_xxii + district_xxiii + heating_gas + heating_wall_heating + heating_combination + heating_ceiling + heating_central_heating + heating_electric + heating_floor + heating_circulating + heating_district", data=data).fit()

# summarize our model
housing_model_summary = housing_model.summary()

# convert our table to HTML and add colors to headers for explanatory purposes
HTML(
(housing_model_summary
    .as_html()
    .replace('<th>  Adj. R-squared:    </th>', '<th style="background-color:#aec7e8;"> Adj. R-squared: </th>')
    .replace('<th>coef</th>', '<th style="background-color:#ffbb78;">coef</th>')
    .replace('<th>std err</th>', '<th style="background-color:#c7e9c0;">std err</th>')
    .replace('<th>P>|t|</th>', '<th style="background-color:#bcbddc;">P>|t|</th>')
    .replace('<th>[0.025</th>    <th>0.975]</th>', '<th style="background-color:#ff9896;">[0.025</th>    <th style="background-color:#ff9896;">0.975]</th>'))
)


ValueError: operands could not be broadcast together with shapes (43,) (42,) 