In [1]:
import plotly.graph_objects as go
import plotly.express as px
import plotly

import pandas as pd

In [2]:
listings = pd.read_csv("../data/airbnb/listings_subset.csv")

In [3]:
listings = listings.sort_values(['host_id', 'firstreview_year'])

In [4]:
listings.head()

Unnamed: 0,id,neighbourhood,state,host_id,host_since,first_review,host_neighbourhood,property_type,room_type,host_listings_count,monthly_price,firstreview_year,hostsince_year
7,4501,Shaw,DC,1585,2008-08-08,2009-01-21,Shaw,House,Private room,1.0,,2009,2008
1,3362,Shaw,DC,2798,2008-09-07,2009-01-21,Shaw,Townhouse,Entire home/apt,5.0,,2009,2008
29,25366,Shaw,DC,2798,2008-09-07,2010-09-06,Shaw,Townhouse,Entire home/apt,5.0,,2010,2008
2694,14005162,Near Northeast/H Street Corridor,DC,2798,2008-09-07,2016-09-14,Shaw,Apartment,Entire home/apt,5.0,,2016,2008
3844,16587642,Gallaudet,DC,2798,2008-09-07,2017-03-16,Shaw,Townhouse,Entire home/apt,5.0,,2017,2008


In [6]:
#subset listing data  

ll = listings[['id', 'host_id', 'state','neighbourhood', 'firstreview_year','hostsince_year', 'host_listings_count']].copy()


In [7]:
ll.head()

Unnamed: 0,id,host_id,state,neighbourhood,firstreview_year,hostsince_year,host_listings_count
7,4501,1585,DC,Shaw,2009,2008,1.0
1,3362,2798,DC,Shaw,2009,2008,5.0
29,25366,2798,DC,Shaw,2010,2008,5.0
2694,14005162,2798,DC,Near Northeast/H Street Corridor,2016,2008,5.0
3844,16587642,2798,DC,Gallaudet,2017,2008,5.0


- To get the number of unique hosts by year, I am using the first review year as a proxy for when the listing was established

In [8]:
#groupby listings by nbd and review_year
ll = ll.groupby(['neighbourhood', 'firstreview_year', 'state']).firstreview_year.agg('count').to_frame('listcount').reset_index()


#get cumulative sum
ll['listings_cumulative'] = ll.groupby(['neighbourhood'])['listcount'].apply(lambda x: x.cumsum())

In [9]:
#to get host data we must drop duplicates a host can have multiple listings

hostdata = listings.drop_duplicates("host_id", keep = 'first')

In [10]:
hh = hostdata.groupby(['neighbourhood', 'firstreview_year']).firstreview_year.agg('count').to_frame('hostcount').reset_index()

hh['hosts_cumulative'] = hh.groupby(['neighbourhood'])['hostcount'].apply(lambda x: x.cumsum())

To create the dropdown plot, I need a list of dataframes organized by neighbourhood and year

In [12]:
labels = []

labels = hh.neighbourhood.unique().tolist()
labels.append('All Neighbourhoods')

In [13]:
data_list = []

for l in labels:
    k = ll[ll['neighbourhood']==l]
    if not k.empty:
        data_list.append(k)
    

In [14]:
data_host = []

for l in labels:
    k = hh[hh['neighbourhood']==l]
    if not k.empty:
        data_host.append(k)

In [None]:
- buttonbox down or title up
- hovertext to left, 

In [16]:
hostdata.shape

(6021, 13)

In [17]:
hostdata = hostdata.groupby(['firstreview_year']).firstreview_year.agg('count').to_frame('host_count').reset_index()
hostdata['host_total'] = hostdata.host_count.cumsum()

In [18]:
listdata = listings.groupby(['firstreview_year']).firstreview_year.agg('count').to_frame('list_count').reset_index()
hostdata['list_total'] = listdata.list_count.cumsum()

In [19]:
df = hostdata.copy()

In [20]:
df.tail()

Unnamed: 0,firstreview_year,host_count,host_total,list_total
7,2016,921,1927,2598
8,2017,1253,3180,4365
9,2018,1069,4249,6126
10,2019,627,4876,7274
11,NaT,1145,6021,9081


In [38]:

fig = go.Figure()


for i, d in enumerate(data_list):
    
    fig.add_trace(go.Scatter( x=d["firstreview_year"], 
                                y=d["listings_cumulative"], 
                                mode = "lines+markers",
                                marker_color="#601A4A",
                                text=d['neighbourhood'],
                                showlegend= True, 
                                legendgroup = labels[i],
                            name = "listings",
                           hovertemplate = "type:LISTING %{y} Unique Listings")
                  
                                )
    

fig.add_trace(go.Scatter( x=df["firstreview_year"], 
                                y=df["list_total"], 
                                mode = "lines+markers",
                                marker_color="#601A4A",
                                text= 'All Neighbourhoods',
                                showlegend= True, 
                                legendgroup = 'All Neighbourhoods',
                            name = "listings",
                        hovertemplate = "type:LISTING %{y} Unique Listings")
                  
                                )    

for i, d in enumerate(data_host):
    
    fig.add_trace(go.Scatter( x=d["firstreview_year"], 
                                y=d["hosts_cumulative"], 
                                mode = "lines+markers",
                                #line=dict(color=hh['neighbourhood']),
                                marker_color="#EE442F",
                                 text=d['neighbourhood'], 
                            legendgroup = labels[i],
                            name = "host", 
                            hovertemplate = "type:HOST %{y} unique hosts"
                              )
                                
                                )
    


fig.add_trace(go.Scatter( x=df["firstreview_year"], 
                                y=df["host_total"], 
                                mode = "lines+markers",
                                marker_color="#EE442F",
                                text= 'All Neighbourhoods',
                                showlegend= True, 
                                legendgroup = 'All Neighbourhoods',
                            name = "hosts", 
                        hovertemplate = "type:HOST %{y} unique hosts")
                  
                                )  

    
### Create buttons for drop down menu
buttons = []
for i, label in enumerate(labels):
    visibility = [i==j for j in range(len(labels))]
    button = dict(
                 label =  label,
                 method = 'restyle',
                 args = [{'visible': visibility},
                     {'title': label}])
    buttons.append(button)

updatemenus = list([
    dict(x = 1,
        y = 1.15,
        #xref = 'paper',
        #yref = 'paper',
        yanchor = 'top',
        active = 1,
        showactive = False,
        buttons = buttons
    )
])



fig['layout']['xaxis']['title'] = ''


#fig['layout']['xaxis']['type'] = 'date'
fig['layout']['xaxis']['gridcolor'] = '#d3d3d3'
fig['layout']['yaxis']['title'] = 'Count'
fig['layout']['xaxis']['title'] = 'Year'
fig['layout']['yaxis']['type'] = 'linear'
#fig['layout']['yaxis']['range'] = [0,1200]
fig['layout']['yaxis']['gridcolor'] = '#d3d3d3'
fig['layout']["plot_bgcolor"]= "#FDFEFE" 
fig['layout']["width"]= 900
fig['layout']["height"] = 500
fig['layout']['showlegend'] = False

fig['layout']['updatemenus'] = updatemenus
fig['layout']['title'] = '<b>Airbnb Listings & Hosts by DC Neighbourhood</b>'
fig['layout']['title']['x'] = 0.099
fig['layout']['title']['y'] = 0.9
fig['layout']['title']['xref'] = 'container'
fig['layout']['title']['yref'] = 'container'



plotly.offline.plot(fig, "listings.html")
fig.show()
    

In [48]:
df = df.drop([11], axis=0)

In [49]:
df

Unnamed: 0,firstreview_year,host_count,host_total,list_total
0,2009,9,9,10
1,2010,13,22,28
2,2011,21,43,56
3,2012,50,93,120
4,2013,104,197,258
5,2014,267,464,614
6,2015,542,1006,1348
7,2016,921,1927,2598
8,2017,1253,3180,4365
9,2018,1069,4249,6126


In [50]:
fig = go.Figure()

fig.add_trace(go.Scatter(x=df['firstreview_year'], 
                                y=df['host_total'], 
                               marker_color="#EE442F", 
                                showlegend = True,
                            name = "Hosts", 
                            hovertemplate = "%{y} unique hosts in year %{x}"))


fig.add_trace(go.Scatter(x=df['firstreview_year'], 
                                y=df['list_total'], 
                               marker_color="#601A4A",
                                 
                                showlegend = True,
                            name = "Listings", 
                            hovertemplate = "%{y} unique listings in year %{x}"))


fig['layout']['xaxis']['title'] = ''


#fig['layout']['xaxis']['type'] = 'date'
fig['layout']['xaxis']['gridcolor'] = '#d3d3d3'
fig['layout']['yaxis']['title'] = 'Count'
fig['layout']['xaxis']['title'] = 'Year'
fig['layout']['yaxis']['type'] = 'linear'
#fig['layout']['yaxis']['range'] = [0,1200]
fig['layout']['yaxis']['gridcolor'] = '#d3d3d3'
fig['layout']["plot_bgcolor"]= "#FDFEFE" 
fig['layout']["width"]= 900
fig['layout']["height"] = 500
fig['layout']['showlegend'] = False

#fig['layout']['updatemenus'] = updatemenus
fig['layout']['title'] = '<b>Airbnb Listings & Hosts in DC</b>'
fig['layout']['title']['x'] = 0.099
fig['layout']['title']['y'] = 0.9
fig['layout']['title']['xref'] = 'container'
fig['layout']['title']['yref'] = 'container'



plotly.offline.plot(fig, "totals.html")
fig.show()


In [101]:
import numpy as np

years = np.arange(2009, 2020, 1)


In [104]:
years = list(years)

In [147]:
year_list = []

for l in years:
    year_list.append(ll[ll['firstreview_year']==l])
    

In [152]:
ll.head()

Unnamed: 0,neighbourhood,firstreview_year,state,listcount,listings_cumulative
0,16th Street Heights,2009,DC,1,1
1,16th Street Heights,2013,DC,3,4
2,16th Street Heights,2014,DC,8,12
3,16th Street Heights,2015,DC,13,25
4,16th Street Heights,2016,DC,24,49
