In [1]:
import pandas as pd
import numpy as np
from sklearn import datasets
from sklearn.linear_model import LinearRegression
from sklearn.linear_model import RidgeCV, LassoCV, Lasso, Ridge
from sklearn.model_selection import train_test_split, GridSearchCV, KFold , cross_val_score
from sklearn.metrics import mean_squared_error
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import PolynomialFeatures

import statsmodels.api as sm
import statsmodels.formula.api as smf

import matplotlib.pyplot as plt
import matplotlib
import seaborn as sns
import datetime

plt.style.use('seaborn')
sns.set_palette("husl")
%matplotlib inline
%config InlineBackend.figure_format = 'svg'

In [2]:
abnb_df = pd.read_csv('data/listing_final.csv')
abnb_df = abnb_df.drop(columns= ['Unnamed: 0'])

In [3]:
abnb_df.rename(index = str, columns ={'f': 'not_superhost', 't': 'is_superhost', 'zvi' :'rent_index', 'strict_14_with_grace_period' :'strict_14',\
                                     'super_strict_30': 'strict_30', 'super_strict_60': 'strict_60'}, inplace = True)

abnb_df.rename(str.lower , axis ='columns', inplace=True)

In [4]:
abnb_df.columns

Index(['host_response_rate', 'accommodates', 'bathrooms', 'bedrooms', 'beds',
       'price', 'security_deposit', 'cleaning_fee', 'guests_included',
       'extra_people', 'availability_60', 'availability_90',
       'number_of_reviews', 'review_scores_rating', 'review_scores_accuracy',
       'review_scores_cleanliness', 'review_scores_checkin',
       'review_scores_communication', 'review_scores_location',
       'review_scores_value', 'rent_index', 'number_of_days_as_host',
       'number_of_amenities', 'review_period', 'not_superhost', 'is_superhost',
       'moderate', 'strict', 'strict_14', 'strict_30', 'strict_60'],
      dtype='object')

In [7]:
X = abnb_df[['host_response_rate', 'accommodates', 'bathrooms', 'bedrooms', 'beds',
       'security_deposit', 'cleaning_fee', 'guests_included',
       'extra_people',
       'number_of_reviews', 'review_scores_cleanliness','rent_index', 'number_of_days_as_host',
       'number_of_amenities', 'review_period', 'not_superhost', 'is_superhost']]
y = abnb_df['price']
#X_train, X_test, y_train, y_test = train_test_split(X, y, test_size =0.3, random_state = 42)

In [6]:
degree = 4
model_poly1 = make_pipeline(PolynomialFeatures(degree), LinearRegression())
model_poly1.fit(X, y)
s = model_poly1.score(X, y)
r = np.sqrt(mean_squared_error(y, model_poly1.predict(X)))
print('R^2:', s, 'RMSE:' , r)

R^2: 0.8941168048520444 RMSE: 38.072107400972435


In [8]:
model_poly1.get_params()

{'memory': None,
 'steps': [('polynomialfeatures',
   PolynomialFeatures(degree=4, include_bias=True, interaction_only=False)),
  ('linearregression',
   LinearRegression(copy_X=True, fit_intercept=True, n_jobs=1, normalize=False))],
 'polynomialfeatures': PolynomialFeatures(degree=4, include_bias=True, interaction_only=False),
 'linearregression': LinearRegression(copy_X=True, fit_intercept=True, n_jobs=1, normalize=False),
 'polynomialfeatures__degree': 4,
 'polynomialfeatures__include_bias': True,
 'polynomialfeatures__interaction_only': False,
 'linearregression__copy_X': True,
 'linearregression__fit_intercept': True,
 'linearregression__n_jobs': 1,
 'linearregression__normalize': False}

In [18]:
poly1=PolynomialFeatures(4)
X_poly=poly1.fit_transform(X)

model = LinearRegression()

model.fit(X_poly,y)
betas = model.coef_

betas [ 1.87409696e-05 -6.48585703e+01 -6.33276503e+00 ...  0.00000000e+00
  0.00000000e+00  4.24897986e-01]


In [28]:
model.intercept_
betas = model.coef_
len(betas)

5985

In [None]:
['host_response_rate', 'accommodates', 'bathrooms', 'bedrooms', 'beds',
       'security_deposit', 'cleaning_fee', 'guests_included',
       'extra_people',
       'number_of_reviews', 'review_scores_cleanliness','rent_index', 'number_of_days_as_host',
       'number_of_amenities', 'review_period', 'not_superhost', 'is_superhost']

### Statsmodel.api

In [17]:
# Define the model
lm1 = smf.ols('price ~ host_response_rate + accommodates + bathrooms + bedrooms + beds + security_deposit + cleaning_fee + guests_included + extra_people + number_of_reviews + review_scores_cleanliness + rent_index + number_of_days_as_host + number_of_amenities + review_period + not_superhost + is_superhost', data = abnb_df)

# Fit your model to your training set
fit_lm1 = lm1.fit()

# Print summary statistics of the model's performance
fit_lm1.summary()

0,1,2,3
Dep. Variable:,price,R-squared:,0.378
Model:,OLS,Adj. R-squared:,0.376
Method:,Least Squares,F-statistic:,274.9
Date:,"Thu, 11 Oct 2018",Prob (F-statistic):,0.0
Time:,00:30:24,Log-Likelihood:,-45857.0
No. Observations:,7715,AIC:,91750.0
Df Residuals:,7697,BIC:,91880.0
Df Model:,17,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Intercept,-114.0062,92.969,-1.226,0.220,-296.251,68.239
host_response_rate,-0.1483,0.035,-4.251,0.000,-0.217,-0.080
accommodates,16.8625,0.730,23.102,0.000,15.432,18.293
bathrooms,13.2872,1.711,7.764,0.000,9.932,16.642
bedrooms,9.0213,1.798,5.017,0.000,5.497,12.546
beds,-7.5325,1.095,-6.876,0.000,-9.680,-5.385
security_deposit,0.0062,0.007,0.885,0.376,-0.008,0.020
cleaning_fee,0.5364,0.029,18.731,0.000,0.480,0.592
guests_included,-1.3198,0.808,-1.634,0.102,-2.903,0.264

0,1,2,3
Omnibus:,4341.068,Durbin-Watson:,1.61
Prob(Omnibus):,0.0,Jarque-Bera (JB):,76199.529
Skew:,2.326,Prob(JB):,0.0
Kurtosis:,17.676,Cond. No.,348000.0


In [None]:
from statsmodels.stats.outliers_influence import variance_inflation_factor
from statsmodels.tools.tools import add_constant



In [29]:
fit_poly = sm.GLS(y, X_poly).fit()

In [31]:
fit_poly.summary().tables[0]

0,1,2,3
Dep. Variable:,price,R-squared:,0.892
Model:,GLS,Adj. R-squared:,0.801
Method:,Least Squares,F-statistic:,9.734
Date:,"Thu, 11 Oct 2018",Prob (F-statistic):,0.0
Time:,08:57:22,Log-Likelihood:,-39096.0
No. Observations:,7715,AIC:,85280.0
Df Residuals:,4170,BIC:,109900.0
Df Model:,3544,,
Covariance Type:,nonrobust,,


In [32]:
fit_poly.summary().tables[1]

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,-0.0003,7.84e-05,-3.265,0.001,-0.000,-0.000
x1,-14.0441,4.329,-3.244,0.001,-22.532,-5.556
x2,-0.1577,0.658,-0.240,0.811,-1.447,1.132
x3,0.1573,0.378,0.416,0.677,-0.584,0.899
x4,0.4654,0.412,1.128,0.259,-0.343,1.274
x5,-0.0187,0.264,-0.071,0.944,-0.536,0.499
x6,-5.1163,3.510,-1.458,0.145,-11.998,1.766
x7,-7.7834,2.949,-2.639,0.008,-13.565,-2.002
x8,-0.0237,0.153,-0.155,0.877,-0.324,0.276


In [10]:
import pprint
poly_features = PolynomialFeatures(4)
poly_ft = poly_features.fit_transform(X)
poly_model3 = LinearRegression()
poly_model3.fit(poly_ft, y)
poly_model3_coef = poly_model3.coef_
pprint.pprint(sorted(dict(zip(poly_features.get_feature_names(list(X.columns)),poly_model3_coef)).items(), key=lambda kv:kv[1]))

{'1': 1.8740969633705772e-05,
 'accommodates': -6.332765028575909,
 'accommodates bathrooms': -18.496773721648644,
 'accommodates bathrooms bedrooms': 34.812550132481974,
 'accommodates bathrooms bedrooms beds': -9.482157211417293,
 'accommodates bathrooms bedrooms cleaning_fee': -0.16344477671609775,
 'accommodates bathrooms bedrooms extra_people': -0.9953307691028829,
 'accommodates bathrooms bedrooms guests_included': 16.344753516561745,
 'accommodates bathrooms bedrooms is_superhost': 56.820162119529606,
 'accommodates bathrooms bedrooms not_superhost': -22.007251326783972,
 'accommodates bathrooms bedrooms number_of_amenities': 0.7076899615696582,
 'accommodates bathrooms bedrooms number_of_days_as_host': 0.04712025486595778,
 'accommodates bathrooms bedrooms number_of_reviews': -0.41741862641054245,
 'accommodates bathrooms bedrooms rent_index': 0.0015238153698562895,
 'accommodates bathrooms bedrooms review_period': -0.06156922712794412,
 'accommodates bathrooms bedrooms review_

 'accommodates review_scores_cleanliness rent_index number_of_days_as_host': -2.6452512526611047e-06,
 'accommodates review_scores_cleanliness rent_index review_period': 5.041635341125625e-06,
 'accommodates review_scores_cleanliness rent_index^2': 1.6954988750969325e-05,
 'accommodates review_scores_cleanliness review_period': 0.1134047539675991,
 'accommodates review_scores_cleanliness review_period is_superhost': 0.053079325360686734,
 'accommodates review_scores_cleanliness review_period not_superhost': 0.06027670555164896,
 'accommodates review_scores_cleanliness review_period^2': 8.230077768702457e-06,
 'accommodates review_scores_cleanliness^2': -2.7738831977818035,
 'accommodates review_scores_cleanliness^2 is_superhost': -1.7290601930110525,
 'accommodates review_scores_cleanliness^2 not_superhost': -1.044757994154355,
 'accommodates review_scores_cleanliness^2 number_of_amenities': 0.0010440695283657601,
 'accommodates review_scores_cleanliness^2 number_of_days_as_host': 0.00

 'bathrooms number_of_reviews rent_index number_of_days_as_host': -4.6541841707784404e-07,
 'bathrooms number_of_reviews rent_index review_period': -6.567467688870318e-07,
 'bathrooms number_of_reviews rent_index^2': -3.691866911944685e-06,
 'bathrooms number_of_reviews review_period': -0.023634129627079697,
 'bathrooms number_of_reviews review_period is_superhost': -0.012556554254528684,
 'bathrooms number_of_reviews review_period not_superhost': -0.011014966475894741,
 'bathrooms number_of_reviews review_period^2': -1.9824188682846574e-07,
 'bathrooms number_of_reviews review_scores_cleanliness': 6.430779001949747,
 'bathrooms number_of_reviews review_scores_cleanliness is_superhost': 3.42337542348838,
 'bathrooms number_of_reviews review_scores_cleanliness not_superhost': 3.0074928533647025,
 'bathrooms number_of_reviews review_scores_cleanliness number_of_amenities': -0.004888049586441626,
 'bathrooms number_of_reviews review_scores_cleanliness number_of_days_as_host': -0.001945684

 'bedrooms review_period is_superhost^2': -0.5812423984216807,
 'bedrooms review_period not_superhost': -1.0502331441223713,
 'bedrooms review_period not_superhost is_superhost': 2.0836665726164938e-12,
 'bedrooms review_period not_superhost^2': -1.0502382644539432,
 'bedrooms review_period^2': -0.000689266910930434,
 'bedrooms review_period^2 is_superhost': -0.00030487860089064103,
 'bedrooms review_period^2 not_superhost': -0.0004397049520208043,
 'bedrooms review_period^3': 1.8146740100345726e-07,
 'bedrooms review_scores_cleanliness': -37.98747043080298,
 'bedrooms review_scores_cleanliness is_superhost': 10.980876988489143,
 'bedrooms review_scores_cleanliness is_superhost^2': 10.980780883738552,
 'bedrooms review_scores_cleanliness not_superhost': -48.98372012411559,
 'bedrooms review_scores_cleanliness not_superhost is_superhost': 6.447287148603209e-12,
 'bedrooms review_scores_cleanliness not_superhost^2': -48.9836565502578,
 'bedrooms review_scores_cleanliness number_of_amenit

 'beds security_deposit number_of_reviews review_scores_cleanliness': 0.002432602459976838,
 'beds security_deposit number_of_reviews^2': -2.9387696801699167e-05,
 'beds security_deposit rent_index': -0.002349463727557483,
 'beds security_deposit rent_index is_superhost': -0.0011711921383955222,
 'beds security_deposit rent_index not_superhost': -0.0011442289252399137,
 'beds security_deposit rent_index number_of_amenities': 9.973877770459902e-06,
 'beds security_deposit rent_index number_of_days_as_host': -2.4082218232860525e-07,
 'beds security_deposit rent_index review_period': -5.346755016688749e-07,
 'beds security_deposit rent_index^2': 7.575449236585981e-07,
 'beds security_deposit review_period': -0.0032167849455901026,
 'beds security_deposit review_period is_superhost': -0.001881916039218475,
 'beds security_deposit review_period not_superhost': -0.0013489595021400089,
 'beds security_deposit review_period^2': 1.1245800025667246e-07,
 'beds security_deposit review_scores_clea

 'cleaning_fee number_of_reviews rent_index': 0.00036168019522616474,
 'cleaning_fee number_of_reviews rent_index is_superhost': 0.00016254888876644465,
 'cleaning_fee number_of_reviews rent_index not_superhost': 0.00016627608918827775,
 'cleaning_fee number_of_reviews rent_index number_of_amenities': 1.6711680309740951e-06,
 'cleaning_fee number_of_reviews rent_index number_of_days_as_host': -1.9565189801618033e-08,
 'cleaning_fee number_of_reviews rent_index review_period': -7.33391536979211e-08,
 'cleaning_fee number_of_reviews rent_index^2': -3.610408613394234e-08,
 'cleaning_fee number_of_reviews review_period': -7.505066821018725e-05,
 'cleaning_fee number_of_reviews review_period is_superhost': -4.938586762687093e-05,
 'cleaning_fee number_of_reviews review_period not_superhost': -4.7652427742659615e-05,
 'cleaning_fee number_of_reviews review_period^2': -3.5686716505500815e-08,
 'cleaning_fee number_of_reviews review_scores_cleanliness': 0.0220819256406545,
 'cleaning_fee numbe

 'guests_included rent_index number_of_amenities review_period': -1.7369793762406202e-06,
 'guests_included rent_index number_of_amenities^2': -0.00032205733391938823,
 'guests_included rent_index number_of_days_as_host': -9.723682869692391e-05,
 'guests_included rent_index number_of_days_as_host is_superhost': -2.574438962205262e-05,
 'guests_included rent_index number_of_days_as_host not_superhost': 1.303625255812238e-06,
 'guests_included rent_index number_of_days_as_host number_of_amenities': 1.0653606430602466e-06,
 'guests_included rent_index number_of_days_as_host review_period': -9.360596602524252e-08,
 'guests_included rent_index number_of_days_as_host^2': 2.5101439867641906e-08,
 'guests_included rent_index review_period': -0.00015479592549504242,
 'guests_included rent_index review_period is_superhost': -0.00013095637118836317,
 'guests_included rent_index review_period not_superhost': -9.585336028397951e-05,
 'guests_included rent_index review_period^2': 7.101435000288203e-

 'host_response_rate cleaning_fee rent_index not_superhost': 0.0006225125943292706,
 'host_response_rate cleaning_fee rent_index number_of_amenities': 5.0377241569824795e-06,
 'host_response_rate cleaning_fee rent_index number_of_days_as_host': 8.436338842354035e-09,
 'host_response_rate cleaning_fee rent_index review_period': -4.682435997965806e-09,
 'host_response_rate cleaning_fee rent_index^2': -9.723517165570461e-08,
 'host_response_rate cleaning_fee review_period': -9.517554409923378e-05,
 'host_response_rate cleaning_fee review_period is_superhost': -0.00010552121643031221,
 'host_response_rate cleaning_fee review_period not_superhost': -0.00010478345575807095,
 'host_response_rate cleaning_fee review_period^2': -8.798028793000068e-08,
 'host_response_rate cleaning_fee review_scores_cleanliness': -0.01641107303323425,
 'host_response_rate cleaning_fee review_scores_cleanliness is_superhost': -0.017814905560915178,
 'host_response_rate cleaning_fee review_scores_cleanliness not_s

 'host_response_rate^2 guests_included^2': 0.004216132651993054,
 'host_response_rate^2 is_superhost': 0.31601533996653525,
 'host_response_rate^2 is_superhost^2': 0.3190967044784796,
 'host_response_rate^2 not_superhost': 0.21634795083364106,
 'host_response_rate^2 not_superhost is_superhost': 3.6335777633278866e-06,
 'host_response_rate^2 not_superhost^2': 0.22319428663742794,
 'host_response_rate^2 number_of_amenities': 0.013996595959719426,
 'host_response_rate^2 number_of_amenities is_superhost': 0.007585861810512605,
 'host_response_rate^2 number_of_amenities not_superhost': 0.004224975283138821,
 'host_response_rate^2 number_of_amenities review_period': -1.3428256769004151e-06,
 'host_response_rate^2 number_of_amenities^2': 3.139417250354818e-06,
 'host_response_rate^2 number_of_days_as_host': 0.00014246989042052106,
 'host_response_rate^2 number_of_days_as_host is_superhost': -0.00035180967574875553,
 'host_response_rate^2 number_of_days_as_host not_superhost': -0.0003020523220

 'security_deposit guests_included review_period not_superhost': 0.0007673065563110656,
 'security_deposit guests_included review_period^2': 3.1233360157055523e-07,
 'security_deposit guests_included review_scores_cleanliness': -0.06988821762413877,
 'security_deposit guests_included review_scores_cleanliness is_superhost': -0.042651962233446264,
 'security_deposit guests_included review_scores_cleanliness not_superhost': -0.027211655145714296,
 'security_deposit guests_included review_scores_cleanliness number_of_amenities': -0.000479163456147464,
 'security_deposit guests_included review_scores_cleanliness number_of_days_as_host': -4.460480226779784e-05,
 'security_deposit guests_included review_scores_cleanliness rent_index': 7.907800802010279e-05,
 'security_deposit guests_included review_scores_cleanliness review_period': -0.00019122743180136648,
 'security_deposit guests_included review_scores_cleanliness^2': 0.0032373106911460945,
 'security_deposit guests_included^2': -0.114060

In [12]:
pprint.pprint(sorted(dict(zip(poly_features.get_feature_names(list(X.columns)),poly_model3_coef)).items(), key=lambda kv:kv[1]))

[('guests_included review_scores_cleanliness', -95.84692273372256),
 ('accommodates beds is_superhost^2', -87.35739488838313),
 ('accommodates beds is_superhost', -87.35726757476323),
 ('bathrooms guests_included^2', -82.59159759757642),
 ('guests_included review_scores_cleanliness is_superhost', -74.12023287852715),
 ('guests_included review_scores_cleanliness is_superhost^2',
  -74.12022257004568),
 ('bedrooms^2 not_superhost^2', -72.2292222458426),
 ('bedrooms^2 not_superhost', -72.2291922098124),
 ('bedrooms^2 beds', -68.03544598909914),
 ('accommodates bedrooms not_superhost', -65.21070086523336),
 ('accommodates bedrooms not_superhost^2', -65.21069232931875),
 ('host_response_rate', -64.8585702833838),
 ('bathrooms number_of_amenities', -62.450913063356495),
 ('bathrooms^3', -61.60775803188805),
 ('beds^2', -60.5614332608087),
 ('bedrooms beds guests_included', -57.30349366035393),
 ('bathrooms bedrooms review_scores_cleanliness', -56.30670856364064),
 ('accommodates bedrooms', -

  -0.008694318102750011),
 ('host_response_rate^2 accommodates not_superhost', -0.008615609716285264),
 ('number_of_days_as_host number_of_amenities', -0.008585602025382926),
 ('bathrooms guests_included review_scores_cleanliness number_of_days_as_host',
  -0.008572556645221005),
 ('host_response_rate bedrooms security_deposit', -0.008543948517579924),
 ('accommodates beds cleaning_fee number_of_reviews', -0.00851622794176336),
 ('host_response_rate bedrooms security_deposit is_superhost',
  -0.008509541359760085),
 ('host_response_rate cleaning_fee number_of_amenities', -0.008463567755073688),
 ('extra_people number_of_reviews number_of_amenities', -0.008452267409460635),
 ('bathrooms bedrooms security_deposit number_of_reviews',
  -0.008409370083209588),
 ('rent_index^3 not_superhost', -0.00803515041405515),
 ('rent_index^3 is_superhost', -0.008034958332334677),
 ('host_response_rate bathrooms guests_included number_of_amenities',
  -0.00797120782239827),
 ('bathrooms beds security_d

 ('beds security_deposit guests_included extra_people', -0.0003256917847978548),
 ('accommodates extra_people number_of_days_as_host is_superhost',
  -0.00032391209946985925),
 ('beds^2 extra_people review_period', -0.0003236258177026432),
 ('extra_people number_of_days_as_host not_superhost', -0.00032358255481177085),
 ('rent_index number_of_amenities^2 not_superhost', -0.00032329166295909275),
 ('guests_included rent_index number_of_amenities^2', -0.00032205733391938823),
 ('security_deposit^2 review_scores_cleanliness is_superhost',
  -0.0003217356198294303),
 ('host_response_rate bathrooms extra_people rent_index',
  -0.0003216995983724889),
 ('host_response_rate^2 beds cleaning_fee', -0.00031923719546674104),
 ('extra_people review_scores_cleanliness number_of_days_as_host not_superhost',
  -0.0003186266871315158),
 ('extra_people number_of_days_as_host not_superhost^2',
  -0.00031789982798132094),
 ('beds extra_people^2 number_of_amenities', -0.0003146088709300021),
 ('cleaning_f

 ('host_response_rate extra_people review_scores_cleanliness rent_index',
  -6.592522650231186e-06),
 ('guests_included number_of_reviews review_period is_superhost',
  -6.519970660356272e-06),
 ('accommodates beds security_deposit review_period', -6.417010650698874e-06),
 ('host_response_rate security_deposit guests_included rent_index',
  -6.264636717019705e-06),
 ('review_scores_cleanliness^2 rent_index^2', -6.154436701069846e-06),
 ('security_deposit number_of_amenities^3', -6.1038759068039475e-06),
 ('extra_people number_of_days_as_host number_of_amenities is_superhost',
  -6.018451211939538e-06),
 ('cleaning_fee^2 review_scores_cleanliness rent_index', -5.90626026395307e-06),
 ('bedrooms security_deposit^2 number_of_reviews', -5.858938669556698e-06),
 ('host_response_rate accommodates extra_people review_period',
  -5.738054775892189e-06),
 ('bedrooms extra_people number_of_reviews number_of_days_as_host',
  -5.723391729937543e-06),
 ('beds number_of_reviews^2 review_period', -5.

 ('accommodates not_superhost is_superhost^2', -3.247230815836555e-09),
 ('accommodates security_deposit not_superhost is_superhost',
  -2.9123357996041488e-09),
 ('rent_index number_of_amenities^2 review_period', -2.8655809058649763e-09),
 ('rent_index number_of_days_as_host number_of_amenities review_period',
  -2.6954766127462726e-09),
 ('host_response_rate security_deposit^2 number_of_days_as_host',
  -2.6164176103460335e-09),
 ('cleaning_fee number_of_days_as_host review_period^2',
  -2.463852455392157e-09),
 ('number_of_reviews^2 rent_index^2', -2.334259764646962e-09),
 ('host_response_rate review_period^3', -2.3216246425317166e-09),
 ('security_deposit cleaning_fee^2 number_of_days_as_host',
  -2.199168614575031e-09),
 ('number_of_reviews^2 rent_index number_of_days_as_host',
  -2.0516551790805693e-09),
 ('security_deposit extra_people rent_index number_of_days_as_host',
  -1.9499111283960424e-09),
 ('number_of_days_as_host number_of_amenities review_period^2',
  -1.847701908141

  3.381818319985365e-05),
 ('bedrooms^2 security_deposit number_of_days_as_host', 3.394047605184518e-05),
 ('security_deposit review_scores_cleanliness number_of_days_as_host '
  'not_superhost',
  3.450943296701607e-05),
 ('bedrooms extra_people^2 rent_index', 3.4765141985106943e-05),
 ('cleaning_fee extra_people number_of_days_as_host is_superhost',
  3.497355773917538e-05),
 ('guests_included^2 rent_index^2', 3.498708240464808e-05),
 ('bedrooms security_deposit^2 number_of_amenities', 3.5028840143737834e-05),
 ('bedrooms beds review_period^2', 3.5793028268049964e-05),
 ('accommodates guests_included rent_index^2', 3.580060545741939e-05),
 ('extra_people number_of_reviews review_period is_superhost',
  3.591906877733244e-05),
 ('host_response_rate bathrooms security_deposit rent_index',
  3.61276561839042e-05),
 ('security_deposit number_of_reviews^2 not_superhost', 3.618566433656318e-05),
 ('host_response_rate cleaning_fee number_of_amenities^2',
  3.624617517941009e-05),
 ('securit

 ('beds number_of_days_as_host number_of_amenities is_superhost',
  0.001137949153021051),
 ('beds number_of_reviews review_scores_cleanliness number_of_days_as_host',
  0.0011384348178067802),
 ('guests_included number_of_amenities review_period not_superhost',
  0.0011395804895862841),
 ('bathrooms^2 extra_people number_of_days_as_host', 0.0011472330626542868),
 ('host_response_rate guests_included extra_people number_of_reviews',
  0.0011670801400517586),
 ('accommodates security_deposit review_period is_superhost',
  0.001169444767029979),
 ('host_response_rate accommodates number_of_reviews number_of_amenities',
  0.001170540857994883),
 ('accommodates cleaning_fee number_of_amenities^2', 0.001184213246035369),
 ('number_of_reviews rent_index number_of_days_as_host not_superhost',
  0.0012063250955678614),
 ('number_of_reviews rent_index number_of_days_as_host is_superhost',
  0.0012079523803174815),
 ('bedrooms cleaning_fee guests_included number_of_amenities',
  0.00121354300839

  0.053079325360686734),
 ('accommodates number_of_days_as_host', 0.053334125889080486),
 ('bedrooms cleaning_fee number_of_amenities not_superhost',
  0.053368436241402105),
 ('host_response_rate number_of_reviews review_scores_cleanliness',
  0.05343147728980527),
 ('cleaning_fee guests_included^3', 0.05366056723837815),
 ('host_response_rate bathrooms extra_people not_superhost',
  0.0536912150301894),
 ('bedrooms security_deposit number_of_reviews', 0.05381711152100198),
 ('beds guests_included number_of_days_as_host not_superhost',
  0.054217123461081936),
 ('host_response_rate bedrooms cleaning_fee is_superhost', 0.05435072977365955),
 ('bathrooms extra_people number_of_reviews', 0.05461300532405447),
 ('host_response_rate bathrooms bedrooms guests_included', 0.05554215924880368),
 ('beds rent_index is_superhost^2', 0.05561778226314391),
 ('beds rent_index is_superhost', 0.05562340019554244),
 ('host_response_rate^2 bathrooms not_superhost', 0.05588019622893205),
 ('host_response

In [None]:
polynomial_predict_residual = Y_train.values - polynom_predict #polynom_predict is prediction values
plt.scatter(Y_train,polynomial_predict_residual, marker='+') #shows nonlinear pattern
plt.scatter(Y_train,Y_pred, marker='+') #shows nonlinear pattern