In [155]:
import streamlit as st
import pandas as pd
import psycopg2
import pandas as pds
import numpy as np
import plotly.express as px
import os 
from airbnb_config import ABConfig

# Intro

## Config

In [156]:
config = ABConfig()
px_color = px.colors.qualitative.Pastel
px.set_mapbox_access_token(config.MAPBOX_API_KEY)


2021-11-10 01:01:38.557 INFO    root: Reading configuration file mazelx.config
2021-11-10 01:01:38.561 INFO    root: Complete proxy list has 0 proxies


## Get data

In [167]:
def load_data(nrows=100000):
    conn = psycopg2.connect(database=config.DB_NAME, user=config.DB_USER, password=config.DB_PASSWORD, host=config.DB_HOST, port=config.DB_PORT)
    data = pd.read_sql_query("SELECT * FROM room",con=conn)
    data = data.head(nrows)
    data = data.fillna("")
    data["host_id"] = data.host_id.astype(str)
    return data

In [168]:
df = load_data()
df.head(3)

Unnamed: 0,room_id,host_id,room_type,country,city,neighborhood,address,reviews,overall_satisfaction,accommodates,...,longitude,survey_id,location,coworker_hosted,extra_host_languages,name,property_type,currency,rate_type,license
0,9018945,34938209,Entire home/apt,,,,"Biarritz, Aquitaine, France",537,5.0,2,...,-1.55377,2,0101000020E6100000401878EE3DDCF8BF842A357BA0BD...,,,City Center - Cosy + private courtyard,,,nightly,
1,44087402,88858538,Entire home/apt,,,,"Biarritz, Nouvelle-Aquitaine, France",27,5.0,2,...,-1.558055,2,0101000020E610000092E86514CBEDF8BF9E62D520CCBD...,,,Appartement cosy à 150m de la plage,,,nightly,
2,30008238,225567806,Entire home/apt,,,,"Biarritz, Nouvelle-Aquitaine, France",93,5.0,2,...,-1.55719,2,0101000020E61000002788BA0F40EAF8BF0BEF7211DFBD...,,,"NOUVELLE VAGUE, studio w/ balcony, seaview & p...",,,nightly,6412200041509.0


## Data enrichment

In [210]:
# data enrichment
df["room_url"] = "http://www.airbnb.com/rooms/" + df.room_id.astype(str)
df.head(10)

Unnamed: 0,room_id,host_id,room_type,country,city,neighborhood,address,reviews,overall_satisfaction,accommodates,...,coworker_hosted,extra_host_languages,name,property_type,currency,rate_type,license,room_link,has_license,room_url
0,9018945,34938209,Entire home/apt,,,,"Biarritz, Aquitaine, France",537,5.0,2,...,,,City Center - Cosy + private courtyard,,,nightly,,http://www.airbnb.com/rooms/9018945,False,http://www.airbnb.com/rooms/9018945
1,44087402,88858538,Entire home/apt,,,,"Biarritz, Nouvelle-Aquitaine, France",27,5.0,2,...,,,Appartement cosy à 150m de la plage,,,nightly,,http://www.airbnb.com/rooms/44087402,False,http://www.airbnb.com/rooms/44087402
2,30008238,225567806,Entire home/apt,,,,"Biarritz, Nouvelle-Aquitaine, France",93,5.0,2,...,,,"NOUVELLE VAGUE, studio w/ balcony, seaview & p...",,,nightly,6412200041509,http://www.airbnb.com/rooms/30008238,True,http://www.airbnb.com/rooms/30008238
3,50582066,113038136,Entire home/apt,,,,"Biarritz, Nouvelle-Aquitaine, France",1,,3,...,,,Bellevue Plage for 3 persons.,,,nightly,3,http://www.airbnb.com/rooms/50582066,False,http://www.airbnb.com/rooms/50582066
4,913348,4902721,Entire home/apt,,,,"Biarritz, Aquitaine, France",115,4.5,2,...,,,BIARRITZ 30 METERS BEACH AT VICTORIA SURF 1,,,nightly,64122172650VS,http://www.airbnb.com/rooms/913348,True,http://www.airbnb.com/rooms/913348
5,50295878,13319954,Entire home/apt,,,,"Biarritz, Nouvelle-Aquitaine, France",41,4.5,2,...,,,Chez « Alfred »,,,nightly,,http://www.airbnb.com/rooms/50295878,False,http://www.airbnb.com/rooms/50295878
6,39991402,308352152,Entire home/apt,,,,"Biarritz, Nouvelle-Aquitaine, France",86,5.0,2,...,,,Studio Climatisé GrandePlage Biarritz Victoria...,,,nightly,,http://www.airbnb.com/rooms/39991402,False,http://www.airbnb.com/rooms/39991402
7,49992273,403176056,Entire home/apt,,,,"Biarritz, Nouvelle-Aquitaine, France",2,,5,...,,,"Biarritz tout à pied, avec vue mer !",,,nightly,,http://www.airbnb.com/rooms/49992273,False,http://www.airbnb.com/rooms/49992273
8,31720299,140887097,Entire home/apt,,,,"Biarritz, France",210,5.0,2,...,,,Biarritz Grande Plage 25m2 avec Balcon,,,nightly,,http://www.airbnb.com/rooms/31720299,False,http://www.airbnb.com/rooms/31720299
9,50173663,404931065,Entire home/apt,,,,"Biarritz, Nouvelle-Aquitaine, France",21,5.0,2,...,,,Biarritz- Accès direct à la Grande Plage T2 33 m2,,,nightly,64122001942F6,http://www.airbnb.com/rooms/50173663,True,http://www.airbnb.com/rooms/50173663


# Rate of entire appartment vs private rooms ?

In [170]:
px.histogram(
        df.room_type, 
        histnorm='percent')

In [171]:
fig = px.scatter_mapbox(
    df, 
    lat="latitude", 
    lon="longitude",     
    hover_name="name",
    hover_data=["room_type","room_url", "license"],
    color="room_type", 
    color_discrete_map={
                "Entire home/apt": "lightgray",
                "Private room": "red",
                "Hotel room": "blue",
                },
    zoom=14
    )

fig.show()

# How many with license ? (for entire apt only)

What is a proper length for license number ?

In [172]:
px.histogram(
    df.license.str.len(),
    histnorm='percent',
)

We will accept license number with 2 digits or more (it is likely that license with 5 to 12 digits are fake though) 

In [173]:
df["has_license"] = df.license.str.len() > 2
df_entire_apt_only = df[df.room_type == "Entire home/apt"]
print(f"With license (entire apt only) : {len(df_entire_apt_only[df_entire_apt_only.has_license])} on {len(df_entire_apt_only)} ({ 100 * len(df_entire_apt_only[df_entire_apt_only.has_license]) / len(df_entire_apt_only):.0f}%)")

With license (entire apt only) : 245 on 588 (42%)


Plot rooms with license vs non-licensed

In [174]:
fig = px.scatter_mapbox(
    df_entire_apt_only, 
    lat="latitude", 
    lon="longitude",     
    hover_name="name",
    hover_data=["room_type","room_url", "license"],
    color="has_license", 
    zoom=14,   
    color_discrete_map={
            True: "lightgray",
            False: "red",
            },
    )

fig.show()

# What about Hosts with multiple rooms ?

In [195]:
px.histogram(
    df.host_id.value_counts(),
    histnorm="percent"
    )

In [194]:
px.histogram(
    df.host_id.value_counts()[df.host_id.value_counts()>1],
    histnorm="percent"
    )

Hosts with more rooms in the area

In [202]:
top_multi_hosts = df.host_id.value_counts()[df.host_id.value_counts() > 1][:3].index
top_multi_hosts
fig = px.scatter_mapbox(
    df[df.host_id.isin(top_multi_hosts)], 
    lat="latitude", 
    lon="longitude",     
    hover_name="name",
    hover_data=["room_type","room_url", "license"],
    color="host_id", 
    zoom=14,   
    )

fig.show()

In [237]:
from IPython.core.display import display, HTML
for host_id in top_multi_hosts:
    print(f"For host {host_id} ({len(df[df.host_id == host_id])} rooms, {len(df[(df.host_id == host_id) & (df.has_license==False)])} without license):")
    [display(HTML(f"""<a href="{url}">{url}</a>""")) for url in list(df[df.host_id == host_id].room_url)]


For host 113038136 (22 rooms, 22 without license):


For host 184945495 (13 rooms, 0 without license):


For host 133058806 (12 rooms, 12 without license):


Next : 
- identify pro accounts ?
- more data