# Airbnb Data for Predicting a House
- [blog](https://towardsdatascience.com/an-end-to-end-data-science-project-that-will-boost-your-portfolio-c53cfe16f0e3)
- [dataset](https://www.kaggle.com/adityadeshpande23/amsterdam-airbnb)

In [1]:
import pandas as pd
from pandas_profiling import ProfileReport

import ipywidgets as widgets
import gmaps

# load data
data = pd.read_json("data/amsterdam.json.zip")

# convert price to numeric
data['price'] = data['price'].apply(lambda x: x.replace('$', ''))
data['price'] = data['price'].apply(lambda x: x.replace(',', ''))
data['price'] = pd.to_numeric(data['price'])

In [4]:
# the dataset contains 14 variables, 10 are numeric and 2 categorical
data.columns

Index(['host_listings_count', 'accommodates', 'bathrooms', 'bedrooms',
       'guests_included', 'minimum_nights', 'number_of_reviews',
       'calculated_host_listings_count', 'price', 'latitude', 'longitude',
       'room_type', 'instant_bookable'],
      dtype='object')

## 01_Explore_Data

In [7]:
#  Total of 15k listing 
data.describe()

Unnamed: 0,host_listings_count,accommodates,bathrooms,bedrooms,guests_included,minimum_nights,number_of_reviews,calculated_host_listings_count,price,latitude,longitude
count,15178.0,15181.0,15163.0,15169.0,15181.0,15181.0,15181.0,15181.0,15181.0,15181.0,15181.0
mean,3.404994,2.917858,1.122832,1.412222,1.516106,2.882551,17.785456,2.934787,134.799223,52.365353,4.889025
std,11.324173,1.404973,0.356547,0.885127,1.004709,7.215722,34.747999,8.968696,85.751273,0.015738,0.034901
min,0.0,1.0,0.0,0.0,1.0,1.0,0.0,1.0,19.0,52.290308,4.753513
25%,1.0,2.0,1.0,1.0,1.0,2.0,2.0,1.0,90.0,52.355616,4.865016
50%,1.0,2.0,1.0,1.0,1.0,2.0,7.0,1.0,118.0,52.365028,4.886344
75%,1.0,4.0,1.0,2.0,2.0,3.0,18.0,1.0,150.0,52.374829,4.907857
max,106.0,17.0,8.0,10.0,16.0,523.0,496.0,91.0,3142.0,52.426224,5.027689


In [4]:
profile = ProfileReport(data, title='Amsterdam Airbnb Profiling Report')
profile

HBox(children=(HTML(value='Summarize dataset'), FloatProgress(value=0.0, max=27.0), HTML(value='')))




HBox(children=(HTML(value='Generate report structure'), FloatProgress(value=0.0, max=1.0), HTML(value='')))




HBox(children=(HTML(value='Render HTML'), FloatProgress(value=0.0, max=1.0), HTML(value='')))






In [2]:
# The map shows that locations in the city centre are more expensive, while the outskirts are cheaper.
fig = gmaps.Map(
    layout={'width':'1000px', 
            'height':'500px', 
            'padding':'10px',})

fig.add_layer(
    gmaps.heatmap_layer(
        data[['latitude','longitude']],
        weights=data['price']))

fig

Map(configuration={'api_key': None}, data_bounds=[(52.33387821596733, 4.819224643627479), (52.39682818292789, …