-------------------------------------------------------------------------------------------------------------------
# 1. Import library
-------------------------------------------------------------------------------------------------------------------

In [1]:
# Dataframe manipulation
import numpy as np
import pandas as pd
import math

# Visualization
import seaborn as sns
import matplotlib.pyplot as plt
# sns.set(style = 'whitegrid',context = 'notebook')

# Preprocessing
from sklearn.preprocessing import MinMaxScaler, StandardScaler, Imputer, LabelEncoder

# Modelling Helpers:
from sklearn.preprocessing import Imputer, Normalizer, scale
from sklearn.model_selection import train_test_split
from sklearn.feature_selection import RFECV
from sklearn.model_selection import GridSearchCV, KFold, cross_val_score, ShuffleSplit, cross_validate
from sklearn import model_selection
from sklearn.model_selection import train_test_split

# Classification
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB
from lightgbm import LGBMClassifier
from sklearn.svm import SVC, LinearSVC
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier, AdaBoostClassifier
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis, QuadraticDiscriminantAnalysis
from xgboost import XGBClassifier
from sklearn.neural_network import MLPClassifier
# Evaluation metrics for Classification
from sklearn.metrics import accuracy_score, confusion_matrix, precision_score, recall_score, f1_score, classification_report, roc_auc_score, roc_curve, precision_recall_curve, average_precision_score

# Regression
from sklearn.linear_model import LinearRegression,Ridge,Lasso,RidgeCV,ElasticNet,LogisticRegression
from sklearn.ensemble import RandomForestRegressor,BaggingRegressor,GradientBoostingRegressor,AdaBoostRegressor
from sklearn.svm import SVR
from sklearn.neighbors import KNeighborsRegressor
from sklearn.neural_network import MLPRegressor
from xgboost import XGBRegressor
# Evaluation metrics for Regression 
from sklearn.metrics import mean_squared_log_error, mean_squared_error, r2_score, mean_absolute_error

# Configuration
pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', None)
pd.set_option('display.width', 500)

# Supress warnings
import warnings
warnings.filterwarnings("ignore")

print("Setup complete...")

Setup complete...


-------------------------------------------------------------------------------------------------------------------
# 2. Common function
-------------------------------------------------------------------------------------------------------------------

In [2]:
def Check_Missing_Data(df):    
    # count all missing values of each column
    total = df.isnull().sum().sort_values(ascending=False)
    # calculate percentage of null values for each column
    percent = (df.isnull().sum()/df.isnull().count()).sort_values(ascending=False)
    missing_data = pd.concat([total, percent], axis=1, keys=['Total', 'Percent'])
    return missing_data

-------------------------------------------------------------------------------------------------------------------
# 3. Import dataset
-------------------------------------------------------------------------------------------------------------------

In [3]:
# load dataset
df = pd.read_csv('explored_ds.csv')
print(df.shape)
df.head()

(77096, 64)


Unnamed: 0,host_response_time,host_neighbourhood,state,neighbourhood,zipcode,market,host_location,city,host_identity_verified,host_is_superhost,host_name,host_verifications,street,neighbourhood_cleansed,experiences_offered,is_location_exact,smart_location,country_code,cancellation_policy,instant_bookable,has_availability,calendar_updated,bed_type,room_type,property_type,country,neighborhood_overview,transit,amenities,summary,space,description,host_response_rate,security_deposit,cleaning_fee,review_scores_value,review_scores_location,review_scores_checkin,review_scores_accuracy,review_scores_communication,review_scores_cleanliness,review_scores_rating,reviews_per_month,bathrooms,beds,bedrooms,host_total_listings_count,host_listings_count,number_of_reviews,availability_365,availability_90,availability_60,availability_30,maximum_nights,minimum_nights,extra_people,guests_included,price,accommodates,calculated_host_listings_count,longitude,latitude,id,host_since
0,within a few hours,LB of Haringey,,LB of Haringey,N8 0EY,London,"London, England, United Kingdom",London,f,t,Guy,"['email', 'phone', 'manual_online', 'reviews',...","London, United Kingdom",Haringey,none,f,"London, United Kingdom",GB,strict_14_with_grace_period,f,t,a week ago,Real Bed,Private room,Apartment,United Kingdom,Details to follow..,details to follow when i get a chance..,"{TV,Internet,Wifi,Kitchen,""Smoking allowed"",Br...",PLEASE CONTACT ME BEFORE BOOKING Homely apartm...,"Hello people, This is a bright, comfortable ro...",PLEASE CONTACT ME BEFORE BOOKING Homely apartm...,100.0,,7.0,10.0,9.0,10.0,10.0,10.0,9.0,97.0,1.65,,1.0,1.0,4.0,4.0,133,291,18,18,18,730,1,15.0,1,35.0,2,4,-0.105666,51.587767,9554,2009-08-14
1,within a day,LB of Ealing,Greater London,LB of Ealing,W13 8,London,"London, England, United Kingdom",Ealing,f,f,Rosa,"['email', 'phone', 'facebook', 'reviews', 'wor...","Ealing, Greater London, United Kingdom",Ealing,none,t,"Ealing, United Kingdom",GB,strict_14_with_grace_period,t,t,4 months ago,Real Bed,Private room,Apartment,United Kingdom,"Ealing Broadway, as short walk from our place ...",extemely good transport links to central londo...,"{TV,""Cable TV"",Internet,Wifi,Breakfast,""Pets l...",The room has a double bed and a single foldawa...,This Listing is for The Sanctury The accommoda...,The room has a double bed and a single foldawa...,67.0,,,9.0,9.0,10.0,8.0,9.0,9.0,90.0,0.09,,1.0,1.0,6.0,6.0,3,0,0,0,0,1125,2,35.0,2,70.0,2,6,-0.314508,51.515645,11076,2009-09-22
2,within a day,LB of Islington,Greater London,LB of Islington,N4 3,London,"London, England, United Kingdom",Islington,f,f,Alina,"['email', 'phone', 'facebook', 'reviews']","Islington, Greater London, United Kingdom",Islington,business,t,"Islington, United Kingdom",GB,moderate,f,t,2 weeks ago,Real Bed,Private room,Apartment,United Kingdom,Finsbury Park is a friendly melting pot commun...,The flat only a 10 minute walk to Finsbury Par...,"{TV,""Cable TV"",Wifi,Kitchen,""Paid parking off ...",My bright double bedroom with a large window h...,"Hello Everyone, I'm offering my lovely double ...",My bright double bedroom with a large window h...,67.0,100.0,15.0,9.0,9.0,9.0,9.0,10.0,10.0,95.0,0.14,1.0,0.0,1.0,4.0,4.0,14,364,89,59,29,29,1,15.0,1,65.0,2,3,-0.111208,51.568017,13913,2009-11-16
3,within an hour,Fitzrovia,Fitzrovia,Fitzrovia,W1T4BP,London,"London, England, United Kingdom",London,t,t,Liz,"['email', 'phone', 'reviews', 'jumio', 'offlin...","London, Fitzrovia, United Kingdom",Westminster,none,t,"London, United Kingdom",GB,strict_14_with_grace_period,f,t,yesterday,Real Bed,Entire home/apt,Apartment,United Kingdom,"Location, location, location! You won't find b...",You can walk to tourist London or take numerou...,"{TV,Wifi,Kitchen,""Paid parking off premises"",E...","Open from June 2018 after a 3-year break, we a...",Ready again from June 2018 for bookings after ...,"Open from June 2018 after a 3-year break, we a...",91.0,350.0,65.0,9.0,10.0,9.0,10.0,9.0,9.0,93.0,0.42,2.0,3.0,3.0,15.0,15.0,39,360,86,56,26,365,3,10.0,4,300.0,6,13,-0.140024,51.520982,17402,2010-01-04
4,,Battersea,,Battersea,SW11 5GX,London,"Florence, Tuscany, Italy",London,t,f,Joe,"['email', 'phone', 'reviews', 'jumio', 'offlin...","London, United Kingdom",Wandsworth,family,t,"London, United Kingdom",GB,strict_14_with_grace_period,f,t,9 months ago,Real Bed,Entire home/apt,Townhouse,United Kingdom,"- Battersea is a quiet family area, easy acces...","- 5 mins walk to Battersea Park, 15 mins walk ...","{TV,""Cable TV"",Internet,Wifi,Kitchen,""Free par...","Artist house, high ceiling bedrooms, private p...",- End of terrace two bedroom house close to So...,"Artist house, high ceiling bedrooms, private p...",,250.0,70.0,9.0,9.0,10.0,10.0,10.0,10.0,98.0,0.94,1.5,2.0,2.0,2.0,2.0,92,363,88,58,28,90,90,15.0,2,150.0,4,1,-0.163764,51.472981,24328,2009-09-28


### remove redundant features

In [4]:
remove_list = []
selected_features = [e for e in df.columns.values if e not in remove_list]
len(selected_features)   # 70
df = df.loc[:,selected_features]
df.shape

(77096, 64)

-------------------------------------------------------------------------------------------------------------------
# 4. Fixing incorrect data
-------------------------------------------------------------------------------------------------------------------

The incorrect data is found during exploration stage

### Remove outliers (price > 1000)

In [5]:
df = df.loc[df.price <= 1000,:]
df.shape

(76889, 64)

### Converting format of price

In [122]:
# df['price'] = (df['price'].str.replace(r'[^-+\d.]','').astype(float))
# df['monthly_price'] = (df['monthly_price'].str.replace(r'[^-+\d.]','').astype(float))
# df['weekly_price'] = (df['weekly_price'].str.replace(r'[^-+\d.]','').astype(float))
# df['extra_people'] = (df['extra_people'].str.replace(r'[^-+\d.]','').astype(float))
# df['cleaning_fee'] = (df['cleaning_fee'].str.replace(r'[^-+\d.]','').astype(float))

### remove rows with target variable as 0

In [6]:
print(df[df.price == 0].shape)
print(df[df.price > 0].shape)
# remove rows with price ==0
df = df.loc[df.price > 0,:]
df.shape

(33, 64)
(76856, 64)


(76856, 64)

-------------------------------------------------------------------------------------------------------------------
# 5. Construct Input & Ouput variables
-------------------------------------------------------------------------------------------------------------------

### Selected vars for independent vars

In [7]:
# List of numerical variables
num_vars = [var for var in df.columns if df[var].dtypes != 'O' and var not in ['price']]
print('Number of numerical variables', len(num_vars))
X = df[num_vars]
print(X.shape)
X.head()

Number of numerical variables 30
(76856, 30)


Unnamed: 0,host_response_rate,security_deposit,cleaning_fee,review_scores_value,review_scores_location,review_scores_checkin,review_scores_accuracy,review_scores_communication,review_scores_cleanliness,review_scores_rating,reviews_per_month,bathrooms,beds,bedrooms,host_total_listings_count,host_listings_count,number_of_reviews,availability_365,availability_90,availability_60,availability_30,maximum_nights,minimum_nights,extra_people,guests_included,accommodates,calculated_host_listings_count,longitude,latitude,id
0,100.0,,7.0,10.0,9.0,10.0,10.0,10.0,9.0,97.0,1.65,,1.0,1.0,4.0,4.0,133,291,18,18,18,730,1,15.0,1,2,4,-0.105666,51.587767,9554
1,67.0,,,9.0,9.0,10.0,8.0,9.0,9.0,90.0,0.09,,1.0,1.0,6.0,6.0,3,0,0,0,0,1125,2,35.0,2,2,6,-0.314508,51.515645,11076
2,67.0,100.0,15.0,9.0,9.0,9.0,9.0,10.0,10.0,95.0,0.14,1.0,0.0,1.0,4.0,4.0,14,364,89,59,29,29,1,15.0,1,2,3,-0.111208,51.568017,13913
3,91.0,350.0,65.0,9.0,10.0,9.0,10.0,9.0,9.0,93.0,0.42,2.0,3.0,3.0,15.0,15.0,39,360,86,56,26,365,3,10.0,4,6,13,-0.140024,51.520982,17402
4,,250.0,70.0,9.0,9.0,10.0,10.0,10.0,10.0,98.0,0.94,1.5,2.0,2.0,2.0,2.0,92,363,88,58,28,90,90,15.0,2,4,1,-0.163764,51.472981,24328


### Selected vars for dependent vars

In [8]:
## df['log_price'] = np.log(df.price)
## df['log_price'][0:3]
Y = df['price']

In [9]:
df = pd.concat([X,Y],1)
print(df.shape)
df.head(3)

(76856, 31)


Unnamed: 0,host_response_rate,security_deposit,cleaning_fee,review_scores_value,review_scores_location,review_scores_checkin,review_scores_accuracy,review_scores_communication,review_scores_cleanliness,review_scores_rating,reviews_per_month,bathrooms,beds,bedrooms,host_total_listings_count,host_listings_count,number_of_reviews,availability_365,availability_90,availability_60,availability_30,maximum_nights,minimum_nights,extra_people,guests_included,accommodates,calculated_host_listings_count,longitude,latitude,id,price
0,100.0,,7.0,10.0,9.0,10.0,10.0,10.0,9.0,97.0,1.65,,1.0,1.0,4.0,4.0,133,291,18,18,18,730,1,15.0,1,2,4,-0.105666,51.587767,9554,35.0
1,67.0,,,9.0,9.0,10.0,8.0,9.0,9.0,90.0,0.09,,1.0,1.0,6.0,6.0,3,0,0,0,0,1125,2,35.0,2,2,6,-0.314508,51.515645,11076,70.0
2,67.0,100.0,15.0,9.0,9.0,9.0,9.0,10.0,10.0,95.0,0.14,1.0,0.0,1.0,4.0,4.0,14,364,89,59,29,29,1,15.0,1,2,3,-0.111208,51.568017,13913,65.0


-------------------------------------------------------------------------------------------------------------------
# 6. Separate dataset into train and test
-------------------------------------------------------------------------------------------------------------------

This is to avoid over-fitting. This step involves randomness, therefore, it's needed to set the seed.

In [10]:
# Let's separate into train and test set
# Remember to set the seed (random_state for this sklearn function)

X_train, X_test, y_train, y_test = train_test_split(X, Y,test_size=0.1,random_state=0) # we are setting the seed here
X_train.shape, X_test.shape

((69170, 30), (7686, 30))

In [11]:
X_train.head()

Unnamed: 0,host_response_rate,security_deposit,cleaning_fee,review_scores_value,review_scores_location,review_scores_checkin,review_scores_accuracy,review_scores_communication,review_scores_cleanliness,review_scores_rating,reviews_per_month,bathrooms,beds,bedrooms,host_total_listings_count,host_listings_count,number_of_reviews,availability_365,availability_90,availability_60,availability_30,maximum_nights,minimum_nights,extra_people,guests_included,accommodates,calculated_host_listings_count,longitude,latitude,id
8135,,250.0,20.0,,,,,,,,,1.0,1.0,1.0,1.0,1.0,0,0,0,0,0,1125,5,0.0,1,4,1,0.01281,51.496015,5434392
12227,,80.0,20.0,8.0,8.0,8.0,7.0,8.0,8.0,73.0,0.92,1.5,1.0,1.0,7.0,7.0,37,345,76,46,16,1125,2,30.0,2,2,7,0.014548,51.52117,7711381
38388,,0.0,0.0,,,,,,,,,1.0,1.0,1.0,1.0,1.0,0,0,0,0,0,14,2,0.0,1,1,1,0.070736,51.586956,19707420
51128,100.0,150.0,5.0,9.0,10.0,10.0,10.0,10.0,9.0,96.0,3.74,1.0,1.0,1.0,1.0,1.0,35,0,0,0,0,1,1,0.0,1,2,1,-0.099387,51.483301,23352798
52695,80.0,,,10.0,9.0,9.0,9.0,9.0,8.0,87.0,1.25,1.0,0.0,1.0,4.0,4.0,11,4,4,4,4,1125,1,0.0,1,1,3,-0.251184,51.467573,23821005


----------------------------------------------------------------------------------------------------------------------

# 7. Handling missing values

### 2.1. Categorical variables

For categorical variables, fill missing information by adding an additional category: "missing"

In [12]:
# make a list of the categorical variables that contain missing values
categorical_list = [var for var in df.columns if df[var].dtypes=='O']
missing_df = Check_Missing_Data(df[categorical_list])
missing_df.head()

Unnamed: 0,Total,Percent


In [13]:
missing_df = missing_df.loc[missing_df.Percent < 0.3,]
missing_df.shape
categorical_list = missing_df.index.values
len(categorical_list)

0

In [14]:
# function to replace NA in categorical variables
def fill_categorical_na(df, var_list):
    X = df.copy()
    X[var_list] = df[var_list].fillna('Missing')
    return X
                                      
# replace missing values with new label: "Missing"
X_train = fill_categorical_na(X_train, categorical_list)
# check that we have no missing information in the engineered variables
[var for var in missing_df.index.values if X_train[var].isnull().sum()>0]

[]

In [15]:
# replace missing values with new label: "Missing"
X_test = fill_categorical_na(X_test, missing_df.index.values)
# check that we have no missing information in the engineered variables
[var for var in missing_df.index.values if X_test[var].isnull().sum()>0]

[]

### 2.2. Numerical variables

For numerical variables, add an additional variable to capture the missing information, and then replace the missing information in the original variable by the mode, or most frequent value:

In [16]:
removed_list = ['id','price']
# make a list of the categorical variables that contain missing values
numerical_list = [var for var in df.columns if df[var].dtypes!='O' and var not in removed_list]

# alternative approach
# numerics= ['int16', 'int32', 'int64', 'float16', 'float32', 'float64']
# numdf = df.select_dtypes(include=numerics)
# numerical_list = numdf.columns
# numerical_list

missing_df = Check_Missing_Data(df[numerical_list])
missing_df.shape

(29, 2)

In [17]:
missing_df = missing_df.loc[missing_df.Percent < 0.3,]
missing_df.shape
numerical_list = missing_df.index.values
len(numerical_list)

27

In [18]:
# function to replace NA in categorical variables
def fill_numerical_na(df, var_list):
    X = df.copy()
    for var in var_list:
        # calculate the mode
        mode_val = X[var].mode()[0]
        X[var].fillna(mode_val, inplace=True)
    return X

In [19]:
# replace mode the missing values
X_train = fill_numerical_na(X_train, numerical_list)
# check that we have no missing information in the engineered variables
[var for var in missing_df.index.values if X_train[var].isnull().sum()>0]

[]

In [20]:
# replace mode the missing values
X_test = fill_numerical_na(X_test, missing_df.index.values)
# check that we have no missing information in the engineered variables
[var for var in missing_df.index.values if X_test[var].isnull().sum()>0]


[]

In [21]:
print(X_train.shape)
print(X_test.shape)

(69170, 30)
(7686, 30)


----------------------------------------------------------------------------------------------------------------------

# 8. Data Transformation 

## 8.1 Numerical variables

### Log Transform Non-Gaussian distributed variables

We will log transform the numerical variables that do not contain zeros in order to get a more Gaussian-like distribution. This tends to help Linear machine learning models. 

In [139]:
# df['log_price'] = np.log(df['price'])
# df['log_price'] = np.log(df['price'])

## 8.2. Categorical variables

In [22]:
cat_df = pd.DataFrame()
full_ds = pd.concat(objs=[X_train[categorical_list], X_test[categorical_list]], axis=0)
for var in categorical_list:
    var_dummies = pd.get_dummies(full_ds[var], prefix=var, prefix_sep='_',drop_first=True)
    cat_df = pd.concat([cat_df, var_dummies], axis=1)
    
l = X_train.shape[0]
train_cat_df = cat_df[:l].reset_index(drop=True)
test_cat_df = cat_df[l:].reset_index(drop=True)

## 8.3. Text vars

## 8.4. Temporal vars

## 8.5. Selected features

In [23]:
selected_features = numerical_list
print(len(selected_features))
selected_features

27


array(['cleaning_fee', 'review_scores_value', 'review_scores_location',
       'review_scores_checkin', 'review_scores_accuracy',
       'review_scores_communication', 'review_scores_cleanliness',
       'review_scores_rating', 'reviews_per_month', 'bathrooms', 'beds',
       'bedrooms', 'host_total_listings_count', 'host_listings_count',
       'longitude', 'number_of_reviews', 'availability_365',
       'availability_90', 'availability_60', 'availability_30',
       'maximum_nights', 'minimum_nights', 'extra_people',
       'guests_included', 'accommodates',
       'calculated_host_listings_count', 'latitude'], dtype=object)

In [24]:
[var for var in selected_features if X_train[var].isnull().sum()>0]

[]

In [25]:
[var for var in selected_features if X_test[var].isnull().sum()>0]

[]

# 9. Feature Scaling

In [26]:
# fit scaler
scaler = MinMaxScaler() # create an instance
scaler.fit(X_train[selected_features]) #  fit  the scaler to the train set for later use

# transform the train and test set, and add on the Id and SalePrice variables
train_num_df = pd.DataFrame(scaler.transform(X_train[selected_features]), columns=selected_features)
test_num_df = pd.DataFrame(scaler.transform(X_test[selected_features]), columns=selected_features)

In [27]:
train_num_df.head()

Unnamed: 0,cleaning_fee,review_scores_value,review_scores_location,review_scores_checkin,review_scores_accuracy,review_scores_communication,review_scores_cleanliness,review_scores_rating,reviews_per_month,bathrooms,beds,bedrooms,host_total_listings_count,host_listings_count,longitude,number_of_reviews,availability_365,availability_90,availability_60,availability_30,maximum_nights,minimum_nights,extra_people,guests_included,accommodates,calculated_host_listings_count,latitude
0,0.032573,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.059819,0.071429,0.05,0.022727,0.000674,0.000674,0.653508,0.0,0.0,0.0,0.0,0.0,5.234033e-07,0.003559,0.0,0.0,0.157895,0.0,0.516919
1,0.032573,0.75,0.75,0.75,0.625,0.75,0.75,0.6625,0.054985,0.107143,0.05,0.022727,0.004717,0.004717,0.655717,0.069943,0.945205,0.844444,0.766667,0.533333,5.234033e-07,0.00089,0.12766,0.043478,0.052632,0.00553,0.582364
2,0.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.059819,0.071429,0.05,0.022727,0.000674,0.000674,0.72714,0.0,0.0,0.0,0.0,0.0,6.053597e-09,0.00089,0.0,0.0,0.0,0.0,0.753521
3,0.008143,0.875,1.0,1.0,1.0,1.0,0.875,0.95,0.225378,0.071429,0.05,0.022727,0.000674,0.000674,0.510891,0.066163,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.052632,0.0,0.483841
4,0.032573,1.0,0.875,0.875,0.875,0.875,0.75,0.8375,0.074924,0.071429,0.0,0.022727,0.002695,0.002695,0.317936,0.020794,0.010959,0.044444,0.066667,0.133333,5.234033e-07,0.0,0.0,0.0,0.0,0.001843,0.442923


In [147]:
print(train_num_df.shape)
print(test_num_df.shape)

(69356, 27)
(7707, 27)


----------------------------------------------------------------------------------------------------------------------

## Save preprocessed dataset

In [30]:
train_ds = pd.concat([pd.DataFrame(X_train[['id']]).reset_index(drop=True),train_num_df,pd.DataFrame(y_train).reset_index(drop=True)],axis = 1)
train_ds.to_csv('preprocessed_train_exp02.csv',index=False)
test_ds = pd.concat([pd.DataFrame(X_test[['id']]).reset_index(drop=True),test_num_df,pd.DataFrame(y_test).reset_index(drop=True)],axis = 1)
test_ds.to_csv('preprocessed_test_exp02.csv',index=False)

In [31]:
print(train_ds.shape)
print(test_ds.shape)

(69170, 29)
(7686, 29)


In [32]:
# now we save the selected list of features
pd.Series(train_ds.columns).to_csv('selected_features.csv', index=False)

In [33]:
pd.Series(train_ds.columns)

0                                 id
1                       cleaning_fee
2                review_scores_value
3             review_scores_location
4              review_scores_checkin
5             review_scores_accuracy
6        review_scores_communication
7          review_scores_cleanliness
8               review_scores_rating
9                  reviews_per_month
10                         bathrooms
11                              beds
12                          bedrooms
13         host_total_listings_count
14               host_listings_count
15                         longitude
16                 number_of_reviews
17                  availability_365
18                   availability_90
19                   availability_60
20                   availability_30
21                    maximum_nights
22                    minimum_nights
23                      extra_people
24                   guests_included
25                      accommodates
26    calculated_host_listings_count
2