# Predicting the price of a flat given a list of predictors using Airbnb dataset.

# Data Dictionary

- **calculated_host_listings_count** - continious value which is actual number of host listings - another metric to measure host experience or to distinguish buisness from individual
- **review_scores_accuracy** - discrete value - numbers between 2 and 10
- **review_scores_cleanliness** - discrete value - numbers between 2 and 10
- **review_scores_checkin** - discrete value - numbers between 2 and 10
- **review_scores_communication** - discrete value - numbers between 2 and 10
- **review_scores_location** - discrete value - numbers between 2 and 10
- **review_scores_value** - discrete value - numbers between 2 and 10
- **instant_bookable** - categorical value - t or false
- **cancellation_policy** - ordinal value with 5 categories that can be ordered from lowest to highest level of flexibility

In [1]:
import os
import gmaps
import pandas as pd
import numpy as np
from sklearn import metrics
import matplotlib.pyplot as plt
import seaborn as sns; sns.set(style="whitegrid", color_codes=True)
from dotenv import load_dotenv
from pandas_profiling import ProfileReport
from math import radians, cos, sin, asin, sqrt
from pandarallel import pandarallel

pandarallel.initialize(progress_bar=True)
import locale
locale.setlocale(locale.LC_ALL, 'en_US.UTF-8');

  import pandas.util.testing as tm


INFO: Pandarallel will run on 4 workers.
INFO: Pandarallel will use standard multiprocessing data transfer (pipe) to transfer data between the main process and workers.


In [2]:
data  = pd.read_csv('../data/interest-features.csv', sep=';')
data.shape

(50796, 51)

In [199]:
pd.set_option("display.max_columns",300)

In [4]:
data.sample(5)

Unnamed: 0,host_response_time,host_response_rate,host_acceptance_rate,host_is_superhost,host_total_listings_count,host_has_profile_pic,host_identity_verified,neighbourhood_cleansed,neighbourhood_group_cleansed,smart_location,is_location_exact,property_type,room_type,accommodates,bathrooms,bedrooms,beds,bed_type,amenities,price,security_deposit,cleaning_fee,guests_included,extra_people,minimum_nights,maximum_nights,has_availability,calendar_last_scraped,number_of_reviews,number_of_reviews_ltm,review_scores_rating,review_scores_accuracy,review_scores_cleanliness,review_scores_checkin,review_scores_communication,review_scores_location,review_scores_value,requires_license,instant_bookable,is_business_travel_ready,cancellation_policy,require_guest_profile_picture,require_guest_phone_verification,calculated_host_listings_count,calculated_host_listings_count_entire_homes,calculated_host_listings_count_private_rooms,calculated_host_listings_count_shared_rooms,reviews_per_month,first_review_days,last_review_days,host_since_days
11827,0,0,0,f,1.0,t,f,Ditmars Steinway,Queens,"Queens, NY",t,Apartment,Entire home/apt,5,1.0,2.0,2.0,Real Bed,"{TV,""Cable TV"",Internet,Wifi,""Air conditioning...",250.0,0.0,0,1,$0.00,1,1125,t,2020-03-14,0,0,,9.613552,9.284211,9.734848,9.740066,9.599812,9.386351,f,f,f,flexible,f,f,1,1,0,0,1.282087,0.0,0.0,1594.0
50028,within a few hours,98%,97%,f,42.0,t,t,Hell's Kitchen,Manhattan,"New York, NY",t,Loft,Entire home/apt,4,1.0,0.0,0.0,Real Bed,"{TV,""Cable TV"",Internet,Wifi,""Air conditioning...",180.0,1000.0,$150.00,1,$0.00,30,365,t,2020-03-14,0,0,,9.613552,9.284211,9.734848,9.740066,9.599812,9.386351,f,t,f,flexible,t,f,42,42,0,0,1.282087,0.0,0.0,2935.0
42040,within an hour,100%,67%,t,2345.0,t,t,Financial District,Manhattan,"New York, NY",t,Serviced apartment,Entire home/apt,2,1.0,0.0,0.0,Real Bed,"{TV,""Cable TV"",Internet,Wifi,""Air conditioning...",179.0,500.0,$300.00,1,$0.00,30,1125,t,2020-03-14,0,0,,9.613552,9.284211,9.734848,9.740066,9.599812,9.386351,f,f,f,strict,f,f,103,103,0,0,1.282087,0.0,0.0,1635.0
31649,within an hour,100%,100%,t,3.0,t,t,Bedford-Stuyvesant,Brooklyn,"Brooklyn, NY",t,Apartment,Private room,2,1.0,1.0,1.0,Real Bed,"{TV,Wifi,""Air conditioning"",Kitchen,Heating,Wa...",72.0,500.0,$40.00,1,$25.00,6,60,t,2020-03-14,13,11,98.0,10.0,10.0,10.0,10.0,10.0,10.0,f,t,f,moderate,f,f,3,0,3,0,0.8,525.0,107.0,1309.0
7434,0,0,0,f,1.0,t,t,Clinton Hill,Brooklyn,"Brooklyn, NY",t,Apartment,Entire home/apt,2,1.5,1.0,1.0,Real Bed,"{TV,""Cable TV"",Internet,Wifi,""Air conditioning...",145.0,0.0,0,1,$0.00,2,1125,t,2020-03-14,2,0,90.0,9.0,10.0,10.0,10.0,10.0,8.0,f,f,f,moderate,f,f,1,1,0,0,0.04,1375.0,1353.0,1940.0


In [5]:
data['host_response_time'].value_counts()

within an hour        20445
0                     19006
within a few hours     6009
within a day           4218
a few days or more     1118
Name: host_response_time, dtype: int64

In [6]:
df_interest_features = data

In [7]:
df_interest_features.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 50796 entries, 0 to 50795
Data columns (total 51 columns):
 #   Column                                        Non-Null Count  Dtype  
---  ------                                        --------------  -----  
 0   host_response_time                            50796 non-null  object 
 1   host_response_rate                            50796 non-null  object 
 2   host_acceptance_rate                          50796 non-null  object 
 3   host_is_superhost                             50796 non-null  object 
 4   host_total_listings_count                     50796 non-null  float64
 5   host_has_profile_pic                          50796 non-null  object 
 6   host_identity_verified                        50796 non-null  object 
 7   neighbourhood_cleansed                        50796 non-null  object 
 8   neighbourhood_group_cleansed                  50796 non-null  object 
 9   smart_location                                50796 non-null 

### Encode Boolean Features

In [8]:
df_interest_features['host_is_superhost'] = df_interest_features['host_is_superhost'].parallel_apply(lambda x: 1 if x=='t' else 0)
df_interest_features['host_has_profile_pic'] = df_interest_features['host_has_profile_pic'].parallel_apply(lambda x: 1 if x=='t' else 0)
df_interest_features['host_identity_verified'] = df_interest_features['host_identity_verified'].parallel_apply(lambda x: 1 if x=='t' else 0)
df_interest_features['require_guest_profile_picture'] = df_interest_features['require_guest_profile_picture'].parallel_apply(lambda x: 1 if x=='t' else 0)
df_interest_features['require_guest_phone_verification'] = df_interest_features['require_guest_phone_verification'].parallel_apply(lambda x: 1 if x=='t' else 0)
df_interest_features['is_location_exact'] = df_interest_features['is_location_exact'].parallel_apply(lambda x: 1 if x=='t' else 0)
df_interest_features['requires_license'] = df_interest_features['requires_license'].parallel_apply(lambda x: 1 if x=='t' else 0)
df_interest_features['instant_bookable'] = df_interest_features['instant_bookable'].parallel_apply(lambda x: 1 if x=='t' else 0)
df_interest_features['is_business_travel_ready'] = df_interest_features['is_business_travel_ready'].parallel_apply(lambda x: 1 if x=='t' else 0)
df_interest_features['has_availability'] = df_interest_features['has_availability'].parallel_apply(lambda x: 1 if x=='t' else 0)

VBox(children=(HBox(children=(IntProgress(value=0, description='0.00%', max=12699), Label(value='0 / 12699')))…

VBox(children=(HBox(children=(IntProgress(value=0, description='0.00%', max=12699), Label(value='0 / 12699')))…

VBox(children=(HBox(children=(IntProgress(value=0, description='0.00%', max=12699), Label(value='0 / 12699')))…

VBox(children=(HBox(children=(IntProgress(value=0, description='0.00%', max=12699), Label(value='0 / 12699')))…

VBox(children=(HBox(children=(IntProgress(value=0, description='0.00%', max=12699), Label(value='0 / 12699')))…

VBox(children=(HBox(children=(IntProgress(value=0, description='0.00%', max=12699), Label(value='0 / 12699')))…

VBox(children=(HBox(children=(IntProgress(value=0, description='0.00%', max=12699), Label(value='0 / 12699')))…

VBox(children=(HBox(children=(IntProgress(value=0, description='0.00%', max=12699), Label(value='0 / 12699')))…

VBox(children=(HBox(children=(IntProgress(value=0, description='0.00%', max=12699), Label(value='0 / 12699')))…

### Ajusting labels for categorical features

In [9]:
df_interest_features['host_response_time']= df_interest_features['host_response_time'].replace('0', 'no answer') 
df_interest_features['host_response_time'].value_counts()

within an hour        20445
no answer             19006
within a few hours     6009
within a day           4218
a few days or more     1118
Name: host_response_time, dtype: int64

### Convert Numbers as String to Number

In [10]:
df_interest_features['host_response_rate'] = df_interest_features['host_response_rate'].parallel_apply(lambda x: float(x.rstrip("%")))
df_interest_features['host_acceptance_rate'] = df_interest_features['host_acceptance_rate'].parallel_apply(lambda x: float(x.rstrip("%")))

VBox(children=(HBox(children=(IntProgress(value=0, description='0.00%', max=12699), Label(value='0 / 12699')))…

VBox(children=(HBox(children=(IntProgress(value=0, description='0.00%', max=12699), Label(value='0 / 12699')))…

### Drop Unuseful Columns

In [11]:
df_interest_features = df_interest_features.drop(['smart_location'], axis=1)

In [12]:
df_interest_features['cleaning_fee'] = df_interest_features['cleaning_fee'].parallel_apply(lambda x: re.sub(r'\$', '', x))
df_interest_features['cleaning_fee'] = df_interest_features['cleaning_fee'].parallel_apply(lambda x: float(locale.atof(x)))

df_interest_features['extra_people'] = df_interest_features['extra_people'].parallel_apply(lambda x: re.sub(r'\$', '', x))
df_interest_features['extra_people'] = df_interest_features['extra_people'].parallel_apply(lambda x: float(locale.atof(x)))

VBox(children=(HBox(children=(IntProgress(value=0, description='0.00%', max=12699), Label(value='0 / 12699')))…

VBox(children=(HBox(children=(IntProgress(value=0, description='0.00%', max=12699), Label(value='0 / 12699')))…

VBox(children=(HBox(children=(IntProgress(value=0, description='0.00%', max=12699), Label(value='0 / 12699')))…

VBox(children=(HBox(children=(IntProgress(value=0, description='0.00%', max=12699), Label(value='0 / 12699')))…

In [13]:
df_interest_features['review_scores_rating'] = df_interest_features['review_scores_rating'].fillna(0)

In [14]:
df_interest_features['calendar_last_scraped'] = pd.to_datetime(df_interest_features['calendar_last_scraped'])
df_interest_features['calendar_last_scraped_days'] = (pd.datetime.now() - df_interest_features['calendar_last_scraped']).dt.days
df_interest_features = df_interest_features.drop(['calendar_last_scraped'], axis=1)

  


In [15]:
df_interest_features.to_csv('../data/interest-features-2.csv', sep=';', index=False)

In [16]:
data  = pd.read_csv(r'../data/interest-features-2.csv', sep=';')
data.shape

(50796, 50)

In [17]:
with_cat_df = data

### Ordinal Encoder to response_time_dict

In [18]:
response_time_dict = {'no answer' : 1,
                      'within an hour': 2,
                      'within a few hours': 3,
                      'within a day': 4,
                      'a few days or more': 5}

with_cat_df['host_response_time_ordinal'] = with_cat_df['host_response_time'].map(response_time_dict)
with_cat_df['host_response_time'] = with_cat_df.drop(['host_response_time'], axis=1)

### One Hot Encoding to bed_type

In [19]:
with_cat_df = pd.get_dummies(with_cat_df, prefix=['bed_type'], columns=['bed_type'])

### One Hot Encoding to neighbourhood_group

In [20]:
with_cat_df = pd.get_dummies(with_cat_df, prefix=['neighbourhood_group'], columns=['neighbourhood_group_cleansed'])
with_cat_df = with_cat_df.drop(['neighbourhood_cleansed'], axis=1)

### One Hot Encoding to room_type

In [21]:
with_cat_df = pd.get_dummies(with_cat_df, prefix=['room_type'], columns=['room_type'])

### Ordinal Encoder to cancellation_policy

In [22]:
response_time_dict = {'flexible' : 1,
                      'moderate': 2,
                      'strict': 3,
                      'strict_14_with_grace_period': 4,
                      'super_strict_30': 5,
                      'super_strict_60': 6}

with_cat_df['cancellation_policy_ordinal'] = with_cat_df['cancellation_policy'].map(response_time_dict)
with_cat_df['cancellation_policy'] = with_cat_df.drop(['cancellation_policy'], axis=1)

### One Hot Encoding to property_type

In [23]:
with_cat_df = pd.get_dummies(with_cat_df, prefix=['property_type'], columns=['property_type'])

# Features Creation

- **listing_duration** = (last_review - first_review) - value to be evaluated in days
- **hosting_duration** = (last_review - host_since) - value to be evaluated in days
- **price_per_person** - (price/accommodates)

In [24]:
# listing_duration = (last_review - first_review) - value to be evaluated in days
with_cat_df['listing_duration'] = with_cat_df['first_review_days'] - with_cat_df['last_review_days']

# hosting_duration = (last_review - host_since) - value to be evaluated in days
with_cat_df['hosting_duration'] = with_cat_df['host_since_days'] - with_cat_df['last_review_days']

# price_per_person = (price/accommodates)
with_cat_df['price_per_person'] = (with_cat_df['price']/with_cat_df['accommodates'])

In [143]:
amenities_df = pd.DataFrame(columns = ['name'])
amenities_df['name'] = with_cat_df['amenities']

In [144]:
amenities_df['name'] = amenities_df['name'].parallel_apply(lambda x: x.strip('}{').split(','))
amenities_df['name'] = amenities_df['name'].parallel_apply(lambda x: [re.sub("['\"]","", l ) for l in x])

In [177]:
amenities_df.shape

(50796, 1)

In [180]:
import pandas
from sklearn.preprocessing import MultiLabelBinarizer

# Binarise labels
mlb = MultiLabelBinarizer()
expandedLabelData = mlb.fit_transform(amenities_df["name"])
labelClasses = mlb.classes_
expandedLabels = pandas.DataFrame(expandedLabelData, columns=labelClasses)

In [197]:
final_df = pd.concat([with_cat_df, expandedLabels], axis=1).drop(['amenities'], axis=1).reset_index()

In [202]:
final_df.sample(5)

Unnamed: 0,index,host_response_time,host_response_rate,host_acceptance_rate,host_is_superhost,host_total_listings_count,host_has_profile_pic,host_identity_verified,is_location_exact,accommodates,bathrooms,bedrooms,beds,price,security_deposit,cleaning_fee,guests_included,extra_people,minimum_nights,maximum_nights,has_availability,number_of_reviews,number_of_reviews_ltm,review_scores_rating,review_scores_accuracy,review_scores_cleanliness,review_scores_checkin,review_scores_communication,review_scores_location,review_scores_value,requires_license,instant_bookable,is_business_travel_ready,cancellation_policy,require_guest_profile_picture,require_guest_phone_verification,calculated_host_listings_count,calculated_host_listings_count_entire_homes,calculated_host_listings_count_private_rooms,calculated_host_listings_count_shared_rooms,reviews_per_month,first_review_days,last_review_days,host_since_days,calendar_last_scraped_days,host_response_time_ordinal,bed_type_Airbed,bed_type_Couch,bed_type_Futon,bed_type_Pull-out Sofa,bed_type_Real Bed,neighbourhood_group_Bronx,neighbourhood_group_Brooklyn,neighbourhood_group_Manhattan,neighbourhood_group_Queens,neighbourhood_group_Staten Island,room_type_Entire home/apt,room_type_Hotel room,room_type_Private room,room_type_Shared room,cancellation_policy_ordinal,property_type_Aparthotel,property_type_Apartment,property_type_Barn,property_type_Bed and breakfast,property_type_Boat,property_type_Boutique hotel,property_type_Bungalow,property_type_Bus,property_type_Cabin,property_type_Camper/RV,property_type_Casa particular (Cuba),property_type_Castle,property_type_Cave,property_type_Condominium,property_type_Cottage,property_type_Dome house,property_type_Dorm,property_type_Earth house,property_type_Farm stay,property_type_Guest suite,property_type_Guesthouse,property_type_Hostel,property_type_Hotel,property_type_House,property_type_Houseboat,property_type_In-law,property_type_Island,property_type_Lighthouse,property_type_Loft,property_type_Other,property_type_Resort,property_type_Serviced apartment,property_type_Tent,property_type_Timeshare,property_type_Tiny house,property_type_Townhouse,property_type_Train,property_type_Treehouse,property_type_Villa,property_type_Yurt,listing_duration,hosting_duration,price_per_person,Unnamed: 105,toilet,24-hour check-in,Accessible-height bed,Accessible-height toilet,Air conditioning,Air purifier,BBQ grill,Baby bath,Baby monitor,Babysitter recommendations,Baking sheet,Barbecue utensils,Bath towel,Bathrobes,Bathroom essentials,Bathtub,Bathtub with bath chair,Beach essentials,Beachfront,Bed linens,Bedroom comforts,Bluetooth speaker,Body soap,Bottled water,Bread maker,Breakfast,Building staff,Buzzer/wireless intercom,Cable TV,Carbon monoxide detector,Cat(s),Changing table,Children’s books and toys,Children’s dinnerware,Cleaning before checkout,Coffee maker,Cooking basics,Crib,Disabled parking spot,Dishes and silverware,Dishwasher,Dog(s),Doorman,Dryer,EV charger,Electric profiling bed,Elevator,Essentials,Ethernet connection,Extra pillows and blankets,Extra space around bed,Family/kid friendly,Fire extinguisher,Fireplace guards,Firm mattress,First aid kit,Fixed grab bars for shower,Fixed grab bars for toilet,Flat path to guest entrance,Free parking on premises,Free street parking,Full kitchen,Game console,Garden or backyard,Ground floor access,Gym,Hair dryer,Handheld shower head,Hangers,Heating,High chair,Host greets you,Hot tub,Hot water,Hot water kettle,Indoor fireplace,Internet,Iron,Keypad,Kitchen,Kitchenette,Lake access,Laptop friendly workspace,Lock on bedroom door,Lockbox,Long term stays allowed,Luggage dropoff allowed,Microwave,Mini bar,Mini fridge,Mobile hoist,No stairs or steps to enter,Other,Other pet(s),Outlet covers,Oven,Pack ’n Play/travel crib,Paid parking off premises,Paid parking on premises,Patio or balcony,Pets allowed,Pets live on this property,Pocket wifi,Pool,Private bathroom,Private entrance,Private living room,Record player,Refrigerator,Room-darkening shades,Safe,Safety card,Self check-in,Shampoo,Shower chair,Shower gel,Single level home,Ski-in/Ski-out,Slippers,Smart lock,Smoke detector,Smoking allowed,Snacks,Stair gates,Step-free shower,Stove,Suitable for events,TV,Table corner guards,Toilet paper,Trash can,Turndown service,Washer,Washer/Dryer,Waterfront,Well-lit path to entrance,Wheelchair accessible,Wide clearance to shower,Wide doorway to guest bathroom,Wide entrance,Wide entrance for guests,Wide entryway,Wide hallways,Wifi,Window guards,translation missing: en.hosting_amenity_49,translation missing: en.hosting_amenity_50
9928,9928,0,0.0,0.0,0,1.0,1,1,1,3,1.0,0.0,2.0,97.0,200.0,30.0,1,0.0,5,15,t,2,0,90.0,8.0,6.0,9.0,9.0,7.0,7.0,0,0,0,0,0,0,1,1,0,0,0.04,1655.0,1271.0,1964.0,42,1,0,0,0,0,1,0,1,0,0,0,1,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,384.0,693.0,32.333333,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0
50627,50627,0,0.0,0.0,0,1.0,1,0,1,4,1.0,1.0,2.0,200.0,0.0,90.0,2,50.0,1,1125,t,0,0,0.0,9.613552,9.284211,9.734848,9.740066,9.599812,9.386351,0,0,0,0,0,0,1,1,0,0,1.282087,0.0,0.0,46.0,42,1,0,0,0,0,1,0,0,1,0,0,1,0,0,0,4,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,46.0,50.0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,1,1,0,0,1,1,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,1,1,0,0,0,1,0,0,0,1,0,1,0,0,1,0,0,1,1,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,1,0,0,1,1,0,0,0,1,0,1,0,0,0,0,1,0,0,0,0,1,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0
8758,8758,0,0.0,0.0,0,1.0,1,1,1,2,1.0,1.0,1.0,150.0,0.0,50.0,1,0.0,6,10,t,0,0,0.0,9.613552,9.284211,9.734848,9.740066,9.599812,9.386351,0,0,0,0,0,0,1,1,0,0,1.282087,0.0,0.0,1992.0,42,1,0,0,0,0,1,0,1,0,0,0,1,0,0,0,4,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,1992.0,75.0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0
28677,28677,98,98.0,99.0,0,36.0,1,0,1,1,2.0,1.0,1.0,60.0,0.0,24.0,1,0.0,2,20,t,38,31,90.0,9.0,9.0,10.0,10.0,9.0,9.0,0,1,0,98,0,0,5,0,5,0,1.96,621.0,59.0,1144.0,42,2,0,0,0,0,1,0,1,0,0,0,0,0,1,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,562.0,1085.0,60.0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,1,0,0,1,0,0,0,0,0,0,0,1,0,1,0,0,1,0,0,1,0,0,0,0,1,0,0,0,0,0,1,0,1,1,0,0,0,1,0,0,0,1,0,1,0,0,1,1,0,0,0,1,0,0,0,0,1,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0
16411,16411,0,0.0,0.0,0,1.0,1,1,1,2,1.0,1.0,1.0,55.0,0.0,0.0,1,0.0,2,1125,t,1,0,100.0,10.0,8.0,10.0,10.0,10.0,10.0,0,0,0,0,0,0,1,0,1,0,0.02,1352.0,1352.0,2082.0,42,1,0,0,0,0,1,0,1,0,0,0,0,0,1,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,730.0,27.5,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1


In [203]:
final_df.to_csv('data/final-features.csv', sep=';', index=False)

### It's expensive to travel in the weekends ?
### Which price average in most hosted neighborhood?