# Airbnb House Analysis and Recommender Application

##### Jie Bao, Kuangyi Zhang, Lanny Xu
##### Dr. Bamshad Mobasher, Spring 2018

### Import Library

In [1]:
%pylab inline
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from sklearn import *
from sklearn import neighbors, tree, naive_bayes, cross_validation
from sklearn.cross_validation import KFold
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.linear_model import LinearRegression, Lasso, Ridge, ElasticNet, SGDRegressor

from sklearn import preprocessing
from sklearn import feature_selection
from sklearn import cross_validation
import operator

Populating the interactive namespace from numpy and matplotlib




### Read in data and preprocessing

#### Read in data

In [2]:
listings = pd.read_table("listings_edited.csv", index_col=0, header='infer', delimiter=",")
print listings.shape
listings.head(5)

(5207, 38)


Unnamed: 0_level_0,listing_url,name,summary,picture_url,host_response_time,host_response_rate,host_is_superhost,host_identity_verified,neighbourhood_cleansed,state,...,review_scores_rating,review_scores_accuracy,review_scores_cleanliness,review_scores_checkin,review_scores_communication,review_scores_location,review_scores_value,instant_bookable,cancellation_policy,reviews_per_month
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
13824783,https://www.airbnb.com/rooms/13824783,Full floor of a city cottage (up to 4 guests),This 3rd-floor garrett apartment (built-out at...,https://a0.muscache.com/im/pictures/510d45f8-e...,within an hour,100%,t,t,Lincoln Square,IL,...,99.0,10.0,10.0,10.0,10.0,10.0,10.0,t,moderate,2.57
16740225,https://www.airbnb.com/rooms/16740225,Guest Room of Two Bedroom Condo,Warm and secure room in a Two Bedroom Condo lo...,https://a0.muscache.com/im/pictures/697182d3-d...,within an hour,100%,f,t,Lincoln Square,IL,...,,,,,,,,t,strict,
18125245,https://www.airbnb.com/rooms/18125245,"Cozy, spacious 2 flat in Lincoln Square!","This cute, spacious 2 flat in Lincoln Square i...",https://a0.muscache.com/im/pictures/bf761217-c...,within an hour,100%,f,t,Lincoln Square,IL,...,95.0,10.0,10.0,10.0,10.0,10.0,10.0,f,moderate,4.0
8362570,https://www.airbnb.com/rooms/8362570,Lincoln Square Ravenswood,"Lincoln square, Ravenswood and Andersonville g...",https://a0.muscache.com/im/pictures/224a38c0-a...,within a few hours,100%,t,t,Lincoln Square,IL,...,98.0,10.0,10.0,10.0,10.0,10.0,10.0,f,flexible,0.87
789867,https://www.airbnb.com/rooms/789867,Cozy Private Room in a Classic Chicago Appartm...,$40 OFF COUPON FOR ALL NEW AIRBNB GUESTS - VIS...,https://a0.muscache.com/im/pictures/72031963/0...,within an hour,100%,f,t,Lincoln Square,IL,...,94.0,9.0,9.0,10.0,10.0,10.0,9.0,t,strict,1.96


In [3]:
listings.describe(include="all").T

Unnamed: 0,count,unique,top,freq,mean,std,min,25%,50%,75%,max
listing_url,5207,5207.0,https://www.airbnb.com/rooms/6558625,1.0,,,,,,,
name,5206,5169.0,Wells St Suites - Old Town 3 Bed,3.0,,,,,,,
summary,5125,4841.0,This property offers luxury in Chicago’s trend...,23.0,,,,,,,
picture_url,5207,5202.0,https://a0.muscache.com/im/pictures/70087089/b...,3.0,,,,,,,
host_response_time,4951,4.0,within an hour,3257.0,,,,,,,
host_response_rate,4951,55.0,100%,3873.0,,,,,,,
host_is_superhost,5207,2.0,f,3840.0,,,,,,,
host_identity_verified,5207,2.0,t,4093.0,,,,,,,
neighbourhood_cleansed,5207,72.0,West Town,729.0,,,,,,,
state,5207,3.0,IL,5204.0,,,,,,,


In [4]:
listings.dtypes

listing_url                     object
name                            object
summary                         object
picture_url                     object
host_response_time              object
host_response_rate              object
host_is_superhost               object
host_identity_verified          object
neighbourhood_cleansed          object
state                           object
city                            object
zipcode                         object
property_type                   object
room_type                       object
accommodates                     int64
bathrooms                      float64
bedrooms                       float64
beds                           float64
bed_type                        object
amenities                       object
price                           object
security_deposit                object
cleaning_fee                    object
guests_included                  int64
extra_people                    object
minimum_nights           

#### Drop unwanted attributes

In [5]:
# state, city, zipcode, drop neighbourhood_cleansed, reviews_per_month
listings_edit = listings.drop(['state', 'city', 'zipcode', 'neighbourhood_cleansed', 'reviews_per_month'], axis=1)

#### Handle missing values

Remove listing without review data

In [6]:
listings_edit = listings_edit.dropna(subset=['host_response_time','host_response_rate','review_scores_rating', 'review_scores_accuracy', 'review_scores_cleanliness', 'review_scores_checkin', 'review_scores_communication', 'review_scores_location', 'review_scores_value'])

In [7]:
listings_edit.shape

(4273, 33)

Fill in missing values

In [8]:
# rooms
listings_edit = listings_edit.fillna({"bathrooms": "0", "bedrooms": "0", "beds": "0"})

In [9]:
# fee
listings_edit = listings_edit.fillna({"price": "$0", "security_deposit": "$0", "cleaning_fee": "$0", "extra_people": "$0"})

#### Transfer object to numeric values

In [10]:
# list all object columns
obj_listings_edit = listings_edit.select_dtypes(include=['object']).copy()
obj_listings_edit.head()

Unnamed: 0_level_0,listing_url,name,summary,picture_url,host_response_time,host_response_rate,host_is_superhost,host_identity_verified,property_type,room_type,...,bedrooms,beds,bed_type,amenities,price,security_deposit,cleaning_fee,extra_people,instant_bookable,cancellation_policy
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
13824783,https://www.airbnb.com/rooms/13824783,Full floor of a city cottage (up to 4 guests),This 3rd-floor garrett apartment (built-out at...,https://a0.muscache.com/im/pictures/510d45f8-e...,within an hour,100%,t,t,House,Private room,...,1,2,Real Bed,"{TV,""Cable TV"",""Wireless Internet"",""Air condit...",$56.00,$0,$30.00,$15.00,t,moderate
18125245,https://www.airbnb.com/rooms/18125245,"Cozy, spacious 2 flat in Lincoln Square!","This cute, spacious 2 flat in Lincoln Square i...",https://a0.muscache.com/im/pictures/bf761217-c...,within an hour,100%,f,t,Townhouse,Private room,...,2,3,Real Bed,"{TV,""Cable TV"",Internet,""Wireless Internet"",""A...",$80.00,$150.00,$65.00,$15.00,f,moderate
8362570,https://www.airbnb.com/rooms/8362570,Lincoln Square Ravenswood,"Lincoln square, Ravenswood and Andersonville g...",https://a0.muscache.com/im/pictures/224a38c0-a...,within a few hours,100%,t,t,Condominium,Private room,...,1,1,Real Bed,"{TV,""Cable TV"",Internet,""Wireless Internet"",""A...",$80.00,$0,$10.00,$10.00,f,flexible
789867,https://www.airbnb.com/rooms/789867,Cozy Private Room in a Classic Chicago Appartm...,$40 OFF COUPON FOR ALL NEW AIRBNB GUESTS - VIS...,https://a0.muscache.com/im/pictures/72031963/0...,within an hour,100%,f,t,Apartment,Private room,...,1,1,Real Bed,"{TV,""Cable TV"",Internet,""Wireless Internet"",""A...",$20.00,$0,$0,$10.00,t,strict
16701336,https://www.airbnb.com/rooms/16701336,Master Bedroom of Two Bedroom Condo,Warm and secure room in a Two Bedroom Condo lo...,https://a0.muscache.com/im/pictures/83ff781b-f...,within an hour,100%,f,t,Condominium,Private room,...,1,1,Real Bed,"{TV,""Wireless Internet"",""Air conditioning"",Kit...",$28.00,$0,$35.00,$20.00,t,strict


Transfer categorical to numerical values using manually input

In [11]:
# host_response_time
# check counts for each type
listings_edit["host_response_time"].value_counts()

within an hour        2865
within a few hours     802
within a day           580
a few days or more      26
Name: host_response_time, dtype: int64

In [12]:
# replace the value manually
response_time_num = {"host_response_time": {"within an hour": 0, "within a few hours": 1, 
                                                "within a day": 2, "a few days or more": 3, "none": 4}}
listings_edit.replace(response_time_num, inplace=True)

In [13]:
listings_edit["host_response_time"].value_counts()

0    2865
1     802
2     580
3      26
Name: host_response_time, dtype: int64

In [14]:
# host_is_superhost
listings_edit["host_is_superhost"].value_counts()

f    2989
t    1284
Name: host_is_superhost, dtype: int64

In [15]:
superhost_num = {"host_is_superhost": {"t": 1, "f": 0}}
listings_edit.replace(superhost_num, inplace=True)

In [16]:
print listings_edit["host_is_superhost"].value_counts()

0    2989
1    1284
Name: host_is_superhost, dtype: int64


Transfer categorical to numerical values using sklearn.LabelEncoder

In [17]:
from sklearn.preprocessing import LabelEncoder  

In [18]:
# host_id_verified
le = LabelEncoder()
print listings_edit["host_identity_verified"].value_counts()
listings_edit["host_identity_verified"] = le.fit_transform(listings_edit["host_identity_verified"])
print listings_edit["host_identity_verified"].value_counts()

t    3488
f     785
Name: host_identity_verified, dtype: int64
1    3488
0     785
Name: host_identity_verified, dtype: int64


In [19]:
# property_type_code
print listings_edit["property_type"].value_counts()
listings_edit["property_type"] = le.fit_transform(listings_edit["property_type"])
print listings_edit["property_type"].value_counts()

Apartment             2911
House                  639
Condominium            466
Loft                    88
Townhouse               72
Other                   26
Guesthouse              13
Bed & Breakfast         12
Dorm                    12
Boat                     8
Guest suite              5
Bungalow                 5
Hostel                   4
Boutique hotel           4
Villa                    3
In-law                   2
Timeshare                1
Vacation home            1
Serviced apartment       1
Name: property_type, dtype: int64
0     2911
10     639
5      466
12      88
16      72
13      26
8       13
6       12
1       12
2        8
7        5
4        5
3        4
9        4
18       3
11       2
15       1
14       1
17       1
Name: property_type, dtype: int64


In [20]:
# room_type_code
print listings_edit["room_type"].value_counts()
listings_edit["room_type"] = le.fit_transform(listings_edit["room_type"])
print listings_edit["room_type"].value_counts()

Entire home/apt    2544
Private room       1602
Shared room         127
Name: room_type, dtype: int64
0    2544
1    1602
2     127
Name: room_type, dtype: int64


In [21]:
# bed_type_code
print listings_edit["bed_type"].value_counts()
listings_edit["bed_type"] = le.fit_transform(listings_edit["bed_type"])
print listings_edit["bed_type"].value_counts()

Real Bed         4126
Airbed             56
Futon              48
Pull-out Sofa      27
Couch              16
Name: bed_type, dtype: int64
4    4126
0      56
2      48
3      27
1      16
Name: bed_type, dtype: int64


Transfer categorical to numerical values using pandas LabelEncoding

In [22]:
# instant_bookable
print listings_edit["instant_bookable"].value_counts()
listings_edit["instant_bookable"] = listings_edit["instant_bookable"].astype('category')
listings_edit["instant_bookable"] = listings_edit["instant_bookable"].cat.codes
print listings_edit["instant_bookable"].value_counts()

f    3035
t    1238
Name: instant_bookable, dtype: int64
0    3035
1    1238
Name: instant_bookable, dtype: int64


In [23]:
# cancellation_policy
print listings_edit["cancellation_policy"].value_counts()

listings_edit["cancellation_policy"] = listings_edit["cancellation_policy"].astype('category')
listings_edit["cancellation_policy"] = listings_edit["cancellation_policy"].cat.codes

print listings_edit["cancellation_policy"].value_counts()

strict             1946
moderate           1372
flexible            949
super_strict_30       5
super_strict_60       1
Name: cancellation_policy, dtype: int64
2    1946
1    1372
0     949
3       5
4       1
Name: cancellation_policy, dtype: int64


Transfer strings to integers

In [24]:
listings_edit['bathrooms'] = listings_edit['bathrooms'].astype('int')
listings_edit['bedrooms'] = listings_edit['bedrooms'].astype('int')
listings_edit[ 'beds'] = listings_edit[ 'beds'].astype('int')

Transfer percentages to integers

In [25]:
listings_edit['host_response_rate'] = listings_edit['host_response_rate'].str[:-1].astype('int')

In [26]:
listings_edit['host_response_rate'].head()

id
13824783    100
18125245    100
8362570     100
789867      100
16701336    100
Name: host_response_rate, dtype: int32

Transfer dollar prices to floats

In [27]:
listings_edit[['price']] = (listings_edit['price'].replace( '[\$,)]','', regex=True ).astype(float))
listings_edit[['security_deposit']] = (listings_edit['security_deposit'].replace( '[\$,)]','', regex=True ).astype(float))
listings_edit[['cleaning_fee']] = (listings_edit['cleaning_fee'].replace( '[\$,)]','', regex=True ).astype(float))
listings_edit[['extra_people']] = (listings_edit['extra_people'].replace( '[\$,)]','', regex=True ).astype(float))

#### Extract amenities values and add new columns

In [28]:
# TV, wireless internet, air condition, heating, pets, washer, dryer
attrs = ['TV', 'Internet', 'Air conditioning', 'Kitchen' , 'Heating', 'Washer', 'Dryer']
rows = listings_edit.shape[0]
for attr in attrs:
    listings_edit[attr] = pd.Series(np.zeros(rows), index=listings_edit.index).astype(integer)
print listings_edit.amenities[13824783]
print listings_edit.TV[13824783]
print listings_edit.Kitchen[13824783]

{TV,"Cable TV","Wireless Internet","Air conditioning",Kitchen,"Free parking on premises",Breakfast,"Pets live on this property",Dog(s),Heating,"Family/kid friendly",Washer,Dryer,"Smoke detector","Fire extinguisher",Essentials,Shampoo,"Lock on bedroom door","24-hour check-in",Hangers,"Hair dryer",Iron,"Laptop friendly workspace","Room-darkening shades"}
0
0


In [29]:
for index, row in listings_edit.iterrows():
    for attr in attrs:
        if (row['amenities'].find(attr)>=0):
            listings_edit.set_value(index, attr, 1)
print listings_edit.amenities[13824783]
print listings_edit.TV[13824783]
print listings_edit.Kitchen[13824783]

{TV,"Cable TV","Wireless Internet","Air conditioning",Kitchen,"Free parking on premises",Breakfast,"Pets live on this property",Dog(s),Heating,"Family/kid friendly",Washer,Dryer,"Smoke detector","Fire extinguisher",Essentials,Shampoo,"Lock on bedroom door","24-hour check-in",Hangers,"Hair dryer",Iron,"Laptop friendly workspace","Room-darkening shades"}
1
1


In [30]:
#check with the objects now
obj_listings_edit = listings_edit.select_dtypes(include=['object']).copy()
obj_listings_edit.head()

Unnamed: 0_level_0,listing_url,name,summary,picture_url,amenities
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
13824783,https://www.airbnb.com/rooms/13824783,Full floor of a city cottage (up to 4 guests),This 3rd-floor garrett apartment (built-out at...,https://a0.muscache.com/im/pictures/510d45f8-e...,"{TV,""Cable TV"",""Wireless Internet"",""Air condit..."
18125245,https://www.airbnb.com/rooms/18125245,"Cozy, spacious 2 flat in Lincoln Square!","This cute, spacious 2 flat in Lincoln Square i...",https://a0.muscache.com/im/pictures/bf761217-c...,"{TV,""Cable TV"",Internet,""Wireless Internet"",""A..."
8362570,https://www.airbnb.com/rooms/8362570,Lincoln Square Ravenswood,"Lincoln square, Ravenswood and Andersonville g...",https://a0.muscache.com/im/pictures/224a38c0-a...,"{TV,""Cable TV"",Internet,""Wireless Internet"",""A..."
789867,https://www.airbnb.com/rooms/789867,Cozy Private Room in a Classic Chicago Appartm...,$40 OFF COUPON FOR ALL NEW AIRBNB GUESTS - VIS...,https://a0.muscache.com/im/pictures/72031963/0...,"{TV,""Cable TV"",Internet,""Wireless Internet"",""A..."
16701336,https://www.airbnb.com/rooms/16701336,Master Bedroom of Two Bedroom Condo,Warm and secure room in a Two Bedroom Condo lo...,https://a0.muscache.com/im/pictures/83ff781b-f...,"{TV,""Wireless Internet"",""Air conditioning"",Kit..."


#### Extract attributes for feature analysis

In [31]:
listings_eval = listings_edit.drop(['listing_url','name','summary','picture_url','amenities', 'review_scores_accuracy', 'review_scores_cleanliness', 'review_scores_checkin', 'review_scores_communication', 'review_scores_location', 'review_scores_value'], axis=1)

In [32]:
listings_eval.head()

Unnamed: 0_level_0,host_response_time,host_response_rate,host_is_superhost,host_identity_verified,property_type,room_type,accommodates,bathrooms,bedrooms,beds,...,review_scores_rating,instant_bookable,cancellation_policy,TV,Internet,Air conditioning,Kitchen,Heating,Washer,Dryer
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
13824783,0,100,1,1,10,1,4,1,1,2,...,99.0,1,1,1,1,1,1,1,1,1
18125245,0,100,0,1,16,1,6,1,2,3,...,95.0,0,1,1,1,1,1,1,1,1
8362570,1,100,1,1,5,1,2,1,1,1,...,98.0,0,0,1,1,1,1,1,1,1
789867,0,100,0,1,0,1,3,1,1,1,...,94.0,1,2,1,1,1,1,1,1,1
16701336,0,100,0,1,5,1,2,1,1,1,...,100.0,1,2,1,1,1,1,1,1,1


## Feature Selection

#### Split data and target attributes

In [33]:
# Feature analysis
fa_target = np.array(listings_eval['review_scores_rating'])
data = listings_eval.drop(['review_scores_rating'], axis=1)
print data.T.shape
fa_train = np.array(data)

(28, 4273)


#### Define a function to compute the optimal percentile for FS

In [61]:
# Take as input the training data, target variable, the model and any other parameters
# return the optimal percentage of the most informative features to use
def calc_percent(train, target, model, interval, K):
    percentiles = range(1, 100, interval)
    results = []
    minScore = 100
    print "Percentile\tMAE"
    for i in range(1, 100, interval):
        fs = feature_selection.SelectPercentile(feature_selection.f_regression, percentile=i)
        train_fs = fs.fit_transform(train, target)
        scores = abs(cross_validation.cross_val_score(model, train_fs, target, cv=K, scoring='neg_mean_absolute_error'))
        # cross validation, return score array for each croos-validation
        # neg_mean_absolute_error returns negative MAE
        if (scores.mean() < minScore):
            minScore = scores.mean()
            optimal_percentile = i
        results = np.append(results, scores.mean())
        print "%d\t\t%.4f" %(i,scores.mean())
        
    print "\nOptimal percentile of features:{0}".format(optimal_percentile)
    optimal_num_features = int(optimal_percentile*len(train.T)/100)
    print "Optimal number of features:{0}".format(optimal_num_features)
    
    return optimal_percentile

In [62]:
# Try Linear regression
linreg = LinearRegression()
linreg.fit(fa_train, fa_target)

lr_opPer = calc_percent(fa_train, fa_target, linreg, 5, 5)

Percentile	MAE
1		3.9073
6		3.8844
11		3.8668
16		3.8581
21		3.8570
26		3.8347
31		3.8360
36		3.8345
41		3.8165
46		3.8167
51		3.8131
56		3.8212
61		3.7853
66		3.7844
71		3.7895
76		3.7913
81		3.7916
86		3.7915
91		3.7848
96		3.7866

Optimal percentile of features:66
Optimal number of features:18


In [63]:
fs = feature_selection.SelectPercentile(feature_selection.f_regression, percentile=lr_opPer)
train_lr_fs = fs.fit(fa_train, fa_target)
print "Selcted features:"
print data.columns[fs.get_support()].values

Selcted features:
['host_response_rate' 'host_is_superhost' 'host_identity_verified'
 'accommodates' 'beds' 'bed_type' 'price' 'minimum_nights'
 'number_of_reviews' 'instant_bookable' 'cancellation_policy' 'TV'
 'Internet' 'Air conditioning' 'Kitchen' 'Heating' 'Washer' 'Dryer']


In [64]:
# Try Ridge regression
# 这里还可以试试不同的参数，像作业那样用GridSearch系统分析
rireg = Ridge(fit_intercept=True, alpha = 0.3)
rireg.fit(fa_train, fa_target)

rr_opPer= calc_percent(fa_train, fa_target, rireg, 5, 5)

Percentile	MAE
1		3.9073
6		3.8845
11		3.8667
16		3.8581
21		3.8569
26		3.8346
31		3.8359
36		3.8344
41		3.8164
46		3.8166
51		3.8130
56		3.8211
61		3.7852
66		3.7843
71		3.7894
76		3.7912
81		3.7915
86		3.7914
91		3.7847
96		3.7865

Optimal percentile of features:66
Optimal number of features:18


In [65]:
fs = feature_selection.SelectPercentile(feature_selection.f_regression, percentile=rr_opPer)
train_rr_fs = fs.fit(fa_train, fa_target)
print "Selcted features:"
print data.columns[fs.get_support()].values

Selcted features:
['host_response_rate' 'host_is_superhost' 'host_identity_verified'
 'accommodates' 'beds' 'bed_type' 'price' 'minimum_nights'
 'number_of_reviews' 'instant_bookable' 'cancellation_policy' 'TV'
 'Internet' 'Air conditioning' 'Kitchen' 'Heating' 'Washer' 'Dryer']


In [66]:
# Try Lasso regression
lareg = Lasso(fit_intercept=True, alpha = 0.3)
lareg.fit(fa_train, fa_target)

la_opPer= calc_percent(fa_train, fa_target, lareg, 5, 5)

Percentile	MAE
1		3.9713
6		3.9661
11		3.9661
16		3.9661
21		3.9661
26		3.9592
31		3.9568
36		3.9564
41		3.9302
46		3.9302
51		3.9302
56		3.9346
61		3.9091
66		3.9091
71		3.9091
76		3.9091
81		3.9098
86		3.9098
91		3.9098
96		3.9134

Optimal percentile of features:61
Optimal number of features:17


In [67]:
fs = feature_selection.SelectPercentile(feature_selection.f_regression, percentile=la_opPer)
train_la_fs = fs.fit(fa_train, fa_target)
print "Selcted features:"
print data.columns[fs.get_support()].values

Selcted features:
['host_response_rate' 'host_is_superhost' 'accommodates' 'beds' 'bed_type'
 'price' 'minimum_nights' 'number_of_reviews' 'instant_bookable'
 'cancellation_policy' 'TV' 'Internet' 'Air conditioning' 'Kitchen'
 'Heating' 'Washer' 'Dryer']
