In [29]:
import pandas as pd
import plotly.express as px
import geopandas as gpd

## Summary

### Analysis ideas for this project filtered by Amsterdam or Neighbourhoods

General:
- Total listings (out of x and insert %)

Rooms:
- Number of listings by room type (number and %)

Values:
- Average price per night
- Average nights booked last 365 dyas (use 365 - avg'availability_365')
- Average income for hosts (avg price * avg nights booked)
- Average minimum cost for booking (avg'minimum_nights' * avg price)

Hosts:
- Total superhosts (out of x and insert %)
- Histogram with new hosts ('host_since')




### Analysis ideas for this project only filtered by Amsterdam

All the analysis above

General:
- Total of listings by Neighbourhood
- Rank of avg prices per night by Neighbourhood

## Load cleansed data

In [30]:
listings = pd.read_csv('../data_cleansed/listings_cleansed.csv', converters={'review_scores_rating': str}, parse_dates=['host_since'])
neighbourhoods = gpd.read_file('../data_raw/neighbourhoods.geojson')

In [39]:
# List of all neighbourhoods
neighbourhood_list = ["Amsterdam (all areas)"] + sorted(listings["neighbourhood_cleansed"].unique())
neighbourhood_list

['Amsterdam (all areas)',
 'Bijlmer-Centrum',
 'Bijlmer-Oost',
 'Bos en Lommer',
 'Buitenveldert - Zuidas',
 'Centrum-Oost',
 'Centrum-West',
 'De Aker - Nieuw Sloten',
 'De Baarsjes - Oud-West',
 'De Pijp - Rivierenbuurt',
 'Gaasperdam - Driemond',
 'Geuzenveld - Slotermeer',
 'IJburg - Zeeburgereiland',
 'Noord-Oost',
 'Noord-West',
 'Oostelijk Havengebied - Indische Buurt',
 'Osdorp',
 'Oud-Noord',
 'Oud-Oost',
 'Slotervaart',
 'Watergraafsmeer',
 'Westerpark',
 'Zuid']

Function to filter the neighbourhood:

In [42]:
def neighbourhood_filter(neighbourhood):     
    # Filter data
    if neighbourhood == "Amsterdam (all areas)":
        # Show all listings
        filtered_listings = listings
    else:
        # Show listings for the selected neighborhood and price range
        filtered_listings = listings[
            (listings["neighbourhood_cleansed"] == neighbourhood)
        ]
    return filtered_listings

## EDA

### Neighbourhoods Filter

In [93]:
# Insert one neighbourhood from the list above into the function
filtered_listings = neighbourhood_filter('Geuzenveld - Slotermeer')

### Amsterdam and Neighbourhoods

General:

In [65]:
print(f"Total Listings: {len(filtered_listings)} ({(len(filtered_listings)/len(listings))*100:.1f}% of Amsterdam's listings)")

Total Listings: 6290 (100.0% of Amsterdam's listings)


Rooms:

In [95]:
print(f'Entire home/apt: {filtered_listings.loc[filtered_listings['room_type']=='Entire home/apt']['room_type'].count()} ({(filtered_listings.loc[filtered_listings['room_type']=='Entire home/apt']['room_type'].count())/len(filtered_listings)*100:.1f}%)')
print(f'Hotel room: {filtered_listings.loc[filtered_listings['room_type']=='Hotel room']['room_type'].count()} ({(filtered_listings.loc[filtered_listings['room_type']=='Hotel room']['room_type'].count())/len(filtered_listings)*100:.1f}%)')
print(f'Private room: {filtered_listings.loc[filtered_listings['room_type']=='Private room']['room_type'].count()} ({(filtered_listings.loc[filtered_listings['room_type']=='Private room']['room_type'].count())/len(filtered_listings)*100:.1f}%)')
print(f'Shared room: {filtered_listings.loc[filtered_listings['room_type']=='Shared room']['room_type'].count()} ({(filtered_listings.loc[filtered_listings['room_type']=='Shared room']['room_type'].count())/len(filtered_listings)*100:.1f}%)')

Entire home/apt: 57 (67.9%)
Hotel room: 0 (0.0%)
Private room: 25 (29.8%)
Shared room: 2 (2.4%)


Activity:

In [103]:
print(f'Avg Price p/ Night: €{filtered_listings['price_cleansed'].mean():.2f}')
print(f'Avg Minimum Cost for Booking: €{filtered_listings['price_cleansed'].mean()*filtered_listings['minimum_nights'].mean():.2f}')
print(f'Avg Monthly Income of Hosts: €{(filtered_listings['price_cleansed'].mean())*(365-filtered_listings['availability_365'].mean())/12:.2f}')

Avg Price p/ Night: €182.68
Avg Minimum Cost for Booking: €524.11
Avg Monthly Income of Hosts: €3184.91


Hosts:

In [118]:
print(f'SuperHosts: {filtered_listings['host_is_superhost'].sum()} ({filtered_listings['host_is_superhost'].sum()/filtered_listings['host_is_superhost'].count()*100:.1f}%)')

# Line chart for quantity of new hosts
date_counts = filtered_listings.groupby('host_since').size().reset_index(name='count')
fig = px.histogram(date_counts, x='host_since', y='count', title='New Hosts')
fig.update_layout(
    xaxis_title='Date',
    yaxis_title='New Hosts',
    template='plotly_white',
    bargap=0.1
)
fig.show()

SuperHosts: 22 (26.2%)
