## Importing Necessary Libraries

In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import csv
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score, mean_squared_error

## Reading in Data Files
The original datasets are too large to upload to Github. Therefore, I merged the three datasets and downloaded a smaller sample labeled "airbnb_combined.csv". This sample is less than 25 MB which is the size limit for Github.

In [3]:
#calendar = pd.read_csv("calendar.csv")
#listings = pd.read_csv("listings.csv")
#reviews = pd.read_csv("reviews.csv")

## Merging datasets via "id" and "listing_id"

In [9]:
#merge datasets
#listing_reviews = listings.merge(reviews,on='id',how='left')
#all_df = listing_reviews.merge(calendar, on='listing_id',how='left')

In [48]:
#sample_all_df = all_df.sample(n=5500)
#sample_all_df.to_csv('airbnb_combined.csv')
all_df = pd.read_csv("airbnb_combined.csv")

## 1. Can I accurately predict price using just neighborhood location and size (max accommodation)?

In [49]:
#price_x, #neighborhood_group_cleaned
price_loc = all_df[['price_x','neighbourhood_group_cleansed','accommodates']].copy()

### Identifying NaN Values

In [50]:
#Determining percentage of missing values in the dataset
def missing(ml_dataset):
    '''
    Provides the percentage of missing rows per column for a dataframe.
    
    Parameters:
    ml_dataset: The dataframe you want to find the percentage of missing values for
    
    Returns:
    Print statement that shows each column in the dataframe and the percentage of null values per column
    '''
    print("Missing values in %")
    print((round((ml_dataset.isnull().sum() * 100/ len(ml_dataset)),2).sort_values(ascending=False)))

In [51]:
missing(price_loc)

Missing values in %
price_x                         0.0
neighbourhood_group_cleansed    0.0
accommodates                    0.0
dtype: float64


In [52]:
price_loc['neighbourhood_group_cleansed'].unique()

array(['Central Area', 'Beacon Hill', 'Downtown', 'Seward Park',
       'Northgate', 'Delridge', 'Ballard', 'Queen Anne', 'Magnolia',
       'University District', 'Other neighborhoods', 'Capitol Hill',
       'Cascade', 'West Seattle', 'Rainier Valley', 'Lake City',
       'Interbay'], dtype=object)

### Encoding Categorical Variables

In [53]:
dummies = pd.get_dummies(price_loc['neighbourhood_group_cleansed'])
price_loc = pd.concat([price_loc.drop('neighbourhood_group_cleansed', axis=1),dummies], axis=1)
price_loc

Unnamed: 0,price_x,accommodates,Ballard,Beacon Hill,Capitol Hill,Cascade,Central Area,Delridge,Downtown,Interbay,Lake City,Magnolia,Northgate,Other neighborhoods,Queen Anne,Rainier Valley,Seward Park,University District,West Seattle
0,$129.00,4,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0
1,$95.00,2,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0
2,$85.00,2,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3,$250.00,6,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0
4,$34.00,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5495,$118.00,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0
5496,$95.00,6,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0
5497,$95.00,6,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0
5498,$80.00,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0


In [54]:
price_loc['price_x'] = pd.to_numeric(price_loc['price_x'].str.replace('[^-.0-9]', ''))

  price_loc['price_x'] = pd.to_numeric(price_loc['price_x'].str.replace('[^-.0-9]', ''))


### Model Development

In [55]:
X = price_loc.drop(['price_x'], axis=1)
y = price_loc['price_x']

X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=3)

lr_model = LinearRegression(normalize=True)
lr_model.fit(X_train, y_train)

y_test_preds = lr_model.predict(X_test)
"The r-squared score for the model using only quantitative variables was {} on {} values.".format(r2_score(y_test, y_test_preds), len(y_test))

If you wish to scale the data, use Pipeline with a StandardScaler in a preprocessing stage. To reproduce the previous behavior:

from sklearn.pipeline import make_pipeline

model = make_pipeline(StandardScaler(with_mean=False), LinearRegression())

If you wish to pass a sample_weight parameter, you need to pass it as a fit parameter to each step of the pipeline as follows:

kwargs = {s[0] + '__sample_weight': sample_weight for s in model.steps}
model.fit(X, y, **kwargs)




'The r-squared score for the model using only quantitative variables was 0.5661762396437526 on 1375 values.'

## 2. Is there a relationship between review score rating and annual availability?

### Identifying NaN Values

In [56]:
review_available = all_df[['review_scores_rating','availability_365']]
missing(review_available)

Missing values in %
review_scores_rating    14.47
availability_365         0.00
dtype: float64


In [57]:
#dropping null values in the column we are trying to predict
review_available = review_available.dropna(axis=0)
missing(review_available)

Missing values in %
review_scores_rating    0.0
availability_365        0.0
dtype: float64


### Model Development

In [58]:
X2 = pd.DataFrame(review_available[['review_scores_rating']])
y2 = review_available['availability_365']

X2_train, X2_test, y2_train, y2_test = train_test_split(X2, y2, random_state=3)

lr_model2 = LinearRegression(normalize=True)
lr_model2.fit(X2_train, y2_train)

y2_test_preds = lr_model2.predict(X2_test)
"The r-squared score for the model using only quantitative variables was {} on {} values.".format(r2_score(y2_test, y2_test_preds), len(y2_test))

If you wish to scale the data, use Pipeline with a StandardScaler in a preprocessing stage. To reproduce the previous behavior:

from sklearn.pipeline import make_pipeline

model = make_pipeline(StandardScaler(with_mean=False), LinearRegression())

If you wish to pass a sample_weight parameter, you need to pass it as a fit parameter to each step of the pipeline as follows:

kwargs = {s[0] + '__sample_weight': sample_weight for s in model.steps}
model.fit(X, y, **kwargs)




'The r-squared score for the model using only quantitative variables was -0.00047657688631774775 on 1176 values.'

## 3. Can the data be used to predict review scores? If so, what variables are the greatest contributors to a high review score?

In [59]:
all_df.head()

Unnamed: 0.1,Unnamed: 0,id,listing_url,scrape_id,last_scraped,name,summary,space,description,experiences_offered,neighborhood_overview,notes,transit,thumbnail_url,medium_url,picture_url,xl_picture_url,host_id,host_url,host_name,host_since,host_location,host_about,host_response_time,host_response_rate,host_acceptance_rate,host_is_superhost,host_thumbnail_url,host_picture_url,host_neighbourhood,host_listings_count,host_total_listings_count,host_verifications,host_has_profile_pic,host_identity_verified,street,neighbourhood,neighbourhood_cleansed,neighbourhood_group_cleansed,city,state,zipcode,market,smart_location,country_code,country,latitude,longitude,is_location_exact,property_type,room_type,accommodates,bathrooms,bedrooms,beds,bed_type,amenities,square_feet,price_x,weekly_price,monthly_price,security_deposit,cleaning_fee,guests_included,extra_people,minimum_nights,maximum_nights,calendar_updated,has_availability,availability_30,availability_60,availability_90,availability_365,calendar_last_scraped,number_of_reviews,first_review,last_review,review_scores_rating,review_scores_accuracy,review_scores_cleanliness,review_scores_checkin,review_scores_communication,review_scores_location,review_scores_value,requires_license,license,jurisdiction_names,instant_bookable,cancellation_policy,require_guest_profile_picture,require_guest_phone_verification,calculated_host_listings_count,reviews_per_month,listing_id,date_x,reviewer_id,reviewer_name,comments,date_y,available,price_y
0,2451,8016610,https://www.airbnb.com/rooms/8016610,20160104002432,2016-01-04,Cute 2 BR/1 BA in Central Seattle,"This cute two bedroom, one bathroom house in C...","The Space This cute two bedroom, one bathroom ...","This cute two bedroom, one bathroom house in C...",none,,,,https://a0.muscache.com/ac/pictures/105984563/...,https://a0.muscache.com/im/pictures/105984563/...,https://a0.muscache.com/ac/pictures/105984563/...,https://a0.muscache.com/ac/pictures/105984563/...,31672756,https://www.airbnb.com/users/show/31672756,Simon,2015-04-21,"Seattle, Washington, United States",,within an hour,100%,100%,f,https://a1.muscache.com/ac/users/31672756/prof...,https://a1.muscache.com/ac/users/31672756/prof...,Leschi,1.0,1.0,"['email', 'phone', 'reviews', 'jumio']",t,t,"28th Avenue South, Seattle, WA 98144, United S...",Leschi,Leschi,Central Area,Seattle,WA,98144,Seattle,"Seattle, WA",US,United States,47.59481,-122.296164,t,House,Entire home/apt,4,1.0,2.0,2.0,Real Bed,"{TV,Internet,""Wireless Internet"",Kitchen,Heati...",,$129.00,,,$200.00,$78.00,4,$50.00,2,15,2 days ago,t,24,54,84,229,2016-01-04,7,2015-09-07,2015-12-29,100.0,10.0,10.0,10.0,10.0,9.0,10.0,f,,WASHINGTON,f,strict,f,f,1,1.75,,,,,,,,
1,1755,3959442,https://www.airbnb.com/rooms/3959442,20160104002432,2016-01-04,"Modern Studio, Lovely Neighborhood","Just outside the bustle of Capitol Hill, our h...",The little studio (which accommodates one or t...,"Just outside the bustle of Capitol Hill, our h...",none,Just outside the lively bustle of Capitol Hill...,"My partner designed and built our home, and I'...",We provide one bicycle (with helmet) for you t...,https://a2.muscache.com/ac/pictures/51049675/b...,https://a2.muscache.com/im/pictures/51049675/b...,https://a2.muscache.com/ac/pictures/51049675/b...,https://a2.muscache.com/ac/pictures/51049675/b...,20519441,https://www.airbnb.com/users/show/20519441,Rebecca,2014-08-25,"Seattle, Washington, United States",I work in real estate and am also a poet (an o...,within an hour,100%,100%,t,https://a0.muscache.com/ac/users/20519441/prof...,https://a0.muscache.com/ac/users/20519441/prof...,,1.0,1.0,"['email', 'phone', 'google', 'reviews', 'kba']",t,t,"15th Avenue, Seattle, WA 98122, United States",,Minor,Central Area,Seattle,WA,98122,Seattle,"Seattle, WA",US,United States,47.609502,-122.311772,t,Apartment,Entire home/apt,2,1.0,0.0,1.0,Real Bed,"{""Wireless Internet"",Kitchen,""Free Parking on ...",,$95.00,$550.00,"$2,000.00",,$50.00,1,$0.00,3,32,a week ago,t,20,50,80,351,2016-01-04,68,2014-09-14,2015-12-27,98.0,10.0,10.0,10.0,10.0,10.0,10.0,f,,WASHINGTON,f,moderate,f,f,1,4.27,365550.0,2013-03-31,4881286.0,Ivy,Dirk&Jaq responded to all our inquiries and co...,2016-07-27,f,
2,4122,7800238,https://www.airbnb.com/rooms/7800238,20160104002432,2016-01-04,Private stand alone garden cottage,The cottage is like being in the country in th...,A large fig tree separates the cottage from th...,The cottage is like being in the country in th...,none,"Beacon Hill is very residential, multi ethnic ...",,The cottage is located between the light rail ...,https://a0.muscache.com/ac/pictures/106473940/...,https://a0.muscache.com/im/pictures/106473940/...,https://a0.muscache.com/ac/pictures/106473940/...,https://a0.muscache.com/ac/pictures/106473940/...,41056418,https://www.airbnb.com/users/show/41056418,Andy And Betty,2015-08-10,"Seattle, Washington, United States",We enjoy sharing our space in the city. Life-...,within an hour,100%,100%,f,https://a2.muscache.com/ac/users/41056418/prof...,https://a2.muscache.com/ac/users/41056418/prof...,North Beacon Hill,1.0,1.0,"['email', 'phone', 'reviews', 'kba']",t,t,"South Dawson Street, Seattle, WA 98108, United...",North Beacon Hill,Mid-Beacon Hill,Beacon Hill,Seattle,WA,98108,Seattle,"Seattle, WA",US,United States,47.557082,-122.309203,t,Cabin,Entire home/apt,2,1.0,1.0,1.0,Real Bed,"{TV,""Cable TV"",Internet,""Wireless Internet"",Ki...",,$85.00,$500.00,"$1,700.00",,,1,$0.00,2,90,4 days ago,t,8,34,62,320,2016-01-04,18,2015-10-19,2015-12-29,99.0,10.0,10.0,10.0,10.0,10.0,10.0,f,,WASHINGTON,t,flexible,f,f,1,6.92,,,,,,,,
3,3058,607788,https://www.airbnb.com/rooms/607788,20160104002432,2016-01-04,"condo Seattle, Wa.",studio (2 ppl) through 2 bedroom units (6 ppl)...,studio through 2 bedroom units available. The...,studio (2 ppl) through 2 bedroom units (6 ppl)...,none,,,,https://a1.muscache.com/ac/pictures/7677277/e9...,https://a1.muscache.com/im/pictures/7677277/e9...,https://a1.muscache.com/ac/pictures/7677277/e9...,https://a1.muscache.com/ac/pictures/7677277/e9...,103427,https://www.airbnb.com/users/show/103427,Magalie,2010-04-04,"West Hollywood, California, United States",Hi everyone!! I am a bay area native and I lov...,within a day,87%,100%,f,https://a1.muscache.com/ac/users/103427/profil...,https://a1.muscache.com/ac/users/103427/profil...,Anaheim,84.0,84.0,"['email', 'phone', 'facebook', 'reviews', 'jum...",t,t,"9th Ave, Seattle, WA 98101, United States",Central Business District,Central Business District,Downtown,Seattle,WA,98101,Seattle,"Seattle, WA",US,United States,47.613135,-122.331764,t,Apartment,Private room,6,2.0,1.0,3.0,Real Bed,"{""Cable TV"",""Elevator in Building"",Heating,Was...",,$250.00,,,$500.00,$50.00,1,$0.00,1,5,5 months ago,t,30,60,90,365,2016-01-04,2,2012-07-27,2013-10-17,83.0,4.0,8.0,10.0,10.0,10.0,6.0,f,,WASHINGTON,f,strict,f,f,2,0.05,5682.0,2011-10-08,759299.0,Cathy,Hostess with the most-est! The room was exactl...,2016-04-20,t,$53.00
4,4886,8409941,https://www.airbnb.com/rooms/8409941,20160104002432,2016-01-04,Bird's Nest Queen Room w/Breakfast,"This clean, quiet & cozy queen bedroom shares ...","Your room is on the upper floor, up the stairs...","This clean, quiet & cozy queen bedroom shares ...",none,This Seward Park neighborhood is a quiet Jewis...,Allessandro is the on site host at the house n...,There are a multitude of ways to travel to our...,https://a2.muscache.com/ac/pictures/1378ed05-4...,https://a2.muscache.com/im/pictures/1378ed05-4...,https://a2.muscache.com/ac/pictures/1378ed05-4...,https://a2.muscache.com/ac/pictures/1378ed05-4...,26967583,https://www.airbnb.com/users/show/26967583,Dario,2015-01-30,"Seattle, Washington, United States","Welcome, my name is Dario.\r\n\r\nI was born i...",within an hour,98%,100%,f,https://a2.muscache.com/ac/users/26967583/prof...,https://a2.muscache.com/ac/users/26967583/prof...,University District,21.0,21.0,"['email', 'phone', 'facebook', 'reviews']",t,f,"Morgan St. south, Seattle, WA 98118, United St...",Seward Park,Seward Park,Seward Park,Seattle,WA,98118,Seattle,"Seattle, WA",US,United States,47.543614,-122.267834,t,House,Private room,2,1.0,1.0,1.0,Real Bed,"{Internet,""Wireless Internet"",Kitchen,""Free Pa...",,$34.00,,,$500.00,$10.00,1,$10.00,1,1125,3 days ago,t,16,44,73,73,2016-01-04,12,2015-11-02,2015-12-24,96.0,10.0,10.0,10.0,10.0,10.0,9.0,f,,WASHINGTON,t,strict,f,f,21,5.63,,,,,,,,


In [60]:
#host_response_time, host_response_rate, host_is_superhost, neighbourhood_group_cleansed, property_type, bathrooms, beds, price_x
review_var = all_df[['host_response_time','host_response_rate','host_is_superhost','neighbourhood_group_cleansed','property_type','bathrooms','beds','price_x','review_scores_rating']]
review_var.head()

Unnamed: 0,host_response_time,host_response_rate,host_is_superhost,neighbourhood_group_cleansed,property_type,bathrooms,beds,price_x,review_scores_rating
0,within an hour,100%,f,Central Area,House,1.0,2.0,$129.00,100.0
1,within an hour,100%,t,Central Area,Apartment,1.0,1.0,$95.00,98.0
2,within an hour,100%,f,Beacon Hill,Cabin,1.0,1.0,$85.00,99.0
3,within a day,87%,f,Downtown,Apartment,2.0,3.0,$250.00,83.0
4,within an hour,98%,f,Seward Park,House,1.0,1.0,$34.00,96.0


### Identifying NaN Values

In [61]:
missing(review_var)

Missing values in %
review_scores_rating            14.47
host_response_time              12.89
host_response_rate              12.89
bathrooms                        0.24
host_is_superhost                0.04
property_type                    0.02
neighbourhood_group_cleansed     0.00
beds                             0.00
price_x                          0.00
dtype: float64


#### A. Dropping NaN values from the variable we're trying to predict

In [62]:
review_var = review_var.dropna(subset='review_scores_rating', axis=0)

#### B. Filling NaN values in the Response Time column with "no time given"

In [63]:
review_var['host_response_time'].value_counts()

within an hour        2024
within a few hours    1477
within a day           644
a few days or more      22
Name: host_response_time, dtype: int64

In [64]:
review_var['host_response_time'].fillna(value='no time given', inplace=True)

In [65]:
review_var['host_response_time'].value_counts()

within an hour        2024
within a few hours    1477
within a day           644
no time given          537
a few days or more      22
Name: host_response_time, dtype: int64

#### C. Imputing NaN values in response rate with the mean

In [66]:
review_var['host_response_rate'].value_counts().head()

100%    2436
90%      400
98%      307
75%      302
87%      287
Name: host_response_rate, dtype: int64

In [67]:
review_var['host_response_rate'] = review_var['host_response_rate'].str.rstrip('%').astype('float') / 100.0

In [68]:
review_var['host_response_rate'].fillna(value=review_var['host_response_rate'].mean(), inplace=True)

#### D. Imputing NaN values in "bathrooms" with the median

In [69]:
review_var['bathrooms'].value_counts()

1.0    3838
2.0     515
1.5     151
2.5      82
3.0      41
3.5      40
0.5      16
0.0       3
4.0       3
8.0       2
4.5       2
Name: bathrooms, dtype: int64

In [70]:
review_var['bathrooms'].fillna(value=1, inplace=True)

In [71]:
missing(review_var)

Missing values in %
host_response_time              0.0
host_response_rate              0.0
host_is_superhost               0.0
neighbourhood_group_cleansed    0.0
property_type                   0.0
bathrooms                       0.0
beds                            0.0
price_x                         0.0
review_scores_rating            0.0
dtype: float64


In [72]:
review_var.dtypes

host_response_time               object
host_response_rate              float64
host_is_superhost                object
neighbourhood_group_cleansed     object
property_type                    object
bathrooms                       float64
beds                            float64
price_x                          object
review_scores_rating            float64
dtype: object

#### E. Changing price_x data type to float

In [73]:
review_var['price_x'] = review_var['price_x'].str.lstrip('$').str.replace(',','').astype('float')

### Encoding Categorical Variables

In [74]:
review_var['property_type'].value_counts()

House              2526
Apartment          1669
Condominium         343
Townhouse            70
Loft                 24
Other                17
Bed & Breakfast      17
Cabin                16
Camper/RV             8
Bungalow              6
Boat                  3
Yurt                  1
Treehouse             1
Tent                  1
Chalet                1
Dorm                  1
Name: property_type, dtype: int64

#### A. Condensing property types

In [75]:
review_var['property_type'] = review_var['property_type'].replace(to_replace=["Camper/RV","Bungalow","Boat","Tent","Treehouse","Dorm","Chalet","Yurt"], 
                                    value="Other")

In [76]:
review_var['property_type'].value_counts()

House              2526
Apartment          1669
Condominium         343
Townhouse            70
Other                39
Loft                 24
Bed & Breakfast      17
Cabin                16
Name: property_type, dtype: int64

#### B. Creating dummy variables

In [77]:
def get_dummies(df):
    """
    Creates dummy variables for categorical columns in a dataframe
    
    Parameters:
    df: dataframe of choice
    
    Returns:
    df: dataframe with categorical variables replaced with dummy variables
    
    """
    categorical_columns = df.select_dtypes(include=['object']).columns.tolist()
    for col in categorical_columns:
        df = pd.concat([df.drop(columns=[col], axis=1), pd.get_dummies(df[col], prefix=col, prefix_sep='_', drop_first=True)], axis=1)
    return df

In [78]:
cleaned_review_var = get_dummies(review_var)

In [79]:
cleaned_review_var.head()

Unnamed: 0,host_response_rate,bathrooms,beds,price_x,review_scores_rating,host_response_time_no time given,host_response_time_within a day,host_response_time_within a few hours,host_response_time_within an hour,host_is_superhost_t,neighbourhood_group_cleansed_Beacon Hill,neighbourhood_group_cleansed_Capitol Hill,neighbourhood_group_cleansed_Cascade,neighbourhood_group_cleansed_Central Area,neighbourhood_group_cleansed_Delridge,neighbourhood_group_cleansed_Downtown,neighbourhood_group_cleansed_Interbay,neighbourhood_group_cleansed_Lake City,neighbourhood_group_cleansed_Magnolia,neighbourhood_group_cleansed_Northgate,neighbourhood_group_cleansed_Other neighborhoods,neighbourhood_group_cleansed_Queen Anne,neighbourhood_group_cleansed_Rainier Valley,neighbourhood_group_cleansed_Seward Park,neighbourhood_group_cleansed_University District,neighbourhood_group_cleansed_West Seattle,property_type_Bed & Breakfast,property_type_Cabin,property_type_Condominium,property_type_House,property_type_Loft,property_type_Other,property_type_Townhouse
0,1.0,1.0,2.0,129.0,100.0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0
1,1.0,1.0,1.0,95.0,98.0,0,0,0,1,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2,1.0,1.0,1.0,85.0,99.0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0
3,0.87,2.0,3.0,250.0,83.0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4,0.98,1.0,1.0,34.0,96.0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0


## Model Development

In [80]:
X3 = cleaned_review_var.drop(columns=['review_scores_rating'])
y3 = cleaned_review_var['review_scores_rating']

X3_train, X3_test, y3_train, y3_test = train_test_split(X3,y3, random_state=3)

lr_model3 = RandomForestRegressor()
lr_model3.fit(X3_train, y3_train)

y3_test_preds = lr_model3.predict(X3_test)
"The r-squared score for the model using only quantitative variables was {} on {} values.".format(r2_score(y3_test, y3_test_preds), len(y3_test))

'The r-squared score for the model using only quantitative variables was 0.2875022230069286 on 1176 values.'

#### Feature Importance

In [81]:
feature_importances = pd.DataFrame(lr_model3.feature_importances_, index=X3_train.columns.tolist(), columns=['importance'])
feature_importances.sort_values('importance', ascending=False)

Unnamed: 0,importance
price_x,0.314062
host_response_rate,0.166599
neighbourhood_group_cleansed_Downtown,0.085948
host_response_time_within a day,0.06675
beds,0.050339
bathrooms,0.04431
host_is_superhost_t,0.032701
neighbourhood_group_cleansed_University District,0.028593
property_type_Condominium,0.025438
property_type_House,0.023764
