# Exploring AirBnB in Europe

### Requirements
Note:
jupyter nbextension enable --py widgetsnbextension
command should be run on the terminal for dropdown widget to work

In [7]:
required_packages = [
  'pandas',
  'numpy',
  'plotly',
  'geopandas',
  'dash',
  'shapely',
  'IPython',
  'ipywidgets',
  'altair'
]

with open('requirements.txt', 'w') as f:
  for package in required_packages:
      f.write(f"{package}\n")

In [8]:
%pip install -r requirements.txt

Note: you may need to restart the kernel to use updated packages.




In [9]:
import os
import pandas as pd
import numpy as np
import plotly.express as px
import plotly.graph_objects as go
import ipywidgets as widgets
import altair as alt
import geopandas as gpd
from shapely.geometry import Point
from dash import dcc, html, Input, Output
import dash
import webbrowser
import socket
import threading
from IPython.display import display, Markdown, HTML

## **1. AirBnB Listings**
The dataframe listings_full_data has all the columns needed from the csv files. Other dataframes, utilize subsets of these columns to perform the described tasks. 

In [10]:
data_dir = 'data'

cities = ['Amsterdam', 'Athens', 'Barcelona', 'Berlin', 'Copenhagen','Dublin', 
        'Lisbon', 'Madrid', 'Rome', 'Venice', 'Vienna']

usecols= ['id','name','latitude','longitude','neighbourhood_cleansed','price',
          'number_of_reviews_ltm','first_review','review_scores_rating',
          'room_type', 'minimum_nights', 'license',
          'host_id','host_name','calculated_host_listings_count',
          'calculated_host_listings_count_entire_homes','calculated_host_listings_count_private_rooms','calculated_host_listings_count_shared_rooms']

dtype_dict = {
    'id': 'int64',
    'name': 'string',
    'latitude': 'float64',
    'longitude': 'float64',
    'neighbourhood_cleansed':'string',
    'number_of_reviews_ltm': 'int64',
    'review_scores_rating': 'float64',
    'room_type':'string',
    'minimum_nights':'int64',
    'license':'string',
    'host_id':'int64',
    'host_name':'string',
    'calculated_host_listings_count':'int64',
    'calculated_host_listings_count_entire_homes':'int64',
    'calculated_host_listings_count_private_rooms':'int64',
    'calculated_host_listings_count_shared_rooms':'int64'
}

def clean_price(x): #pandas converter to clean the price from $ symbol
    if isinstance(x, str) and x.strip():  
        return float(x.replace('$', '').replace(',', ''))

li = []
city_counter= 0

for file_name in os.listdir(data_dir):
    if file_name.startswith('listings_') and file_name.endswith('.csv'):
        file_path = os.path.join(data_dir, file_name)
        df = pd.read_csv(file_path, usecols=usecols, dtype=dtype_dict, converters={'price': clean_price}, parse_dates=['first_review'])
        df['city'] = cities[city_counter]
        city_counter+=1
        li.append(df)


listings_full_data = pd.concat(li, ignore_index=True)

#limiting data for visualization of total listings and densities 
cols_to_exclude= ['number_of_reviews_ltm','first_review','review_scores_rating',
                  'minimum_nights','license','host_id','host_name',
                  'calculated_host_listings_count','calculated_host_listings_count_entire_homes',
                  'calculated_host_listings_count_private_rooms','calculated_host_listings_count_shared_rooms'] 
listings = listings_full_data.drop(columns=cols_to_exclude)

total_listings= pd.DataFrame(listings.groupby('city').size().reset_index(name='total_listings'))
total_listings['city'] = total_listings['city'].astype('category') #converting city column to categorical type to be processed by altair 

display(Markdown(total_listings.to_markdown()))

|    | city       |   total_listings |
|---:|:-----------|-----------------:|
|  0 | Amsterdam  |             9659 |
|  1 | Athens     |            13274 |
|  2 | Barcelona  |            19482 |
|  3 | Berlin     |            13759 |
|  4 | Copenhagen |            20909 |
|  5 | Dublin     |             6175 |
|  6 | Lisbon     |            24204 |
|  7 | Madrid     |            26980 |
|  8 | Rome       |            34061 |
|  9 | Venice     |             8322 |
| 10 | Vienna     |            14396 |

### Listings visualization
The horizontal bar chart showcases the AirBnB listings per city, utilizing the AirBnB theme colors (source: https://usbrandcolors.com/airbnb-colors/)

In [11]:
default_color = '#767676'
hover_color = '#FF5A5F'

highlight = alt.selection_point(
    fields=['city'], 
    on='mouseover', 
    nearest=True, 
    empty=False
)

bar_chart = alt.Chart(total_listings).mark_bar().encode(
    y=alt.Y('city:N', sort='-x', title='City'),
    x=alt.X('total_listings:Q', title='AirBnB Listings'),
    color=alt.condition(
        highlight, 
        alt.value(hover_color),
        alt.value(default_color)
    ),
    tooltip=[
        alt.Tooltip('city', title='City'),
        alt.Tooltip('total_listings', title='Total Listings')
    ]
).add_params(
    highlight
).properties(
    title='Total Airbnb Listings per City', 
)

display(bar_chart)

## **2. AirBnB Densities**
For the estimation of the density, data about the population per cell (each representing 1km² of area defined by coordinates), are utilized. The data regarding the population per cell of the European cities are provided by the research programme of University of Southampton 'WorldPop'(source: https://hub.worldpop.org/project/categories?id=3).
<p>As a first step, let's isolate the population data for the cities in question, instead of taking the each country's data.</p>

In [12]:
country_codes= ['NLD','GRC','ESP','DEU','DNK','IRL','PRT','ESP','ITA','ITA','AUT']  
city_to_country = dict(zip(cities, country_codes)) #use of dict to define the naming pattern of datafiles, some should be accessed more than one (for example Italy, for Rome and Venice) 

boundaries = listings.groupby('city').agg(  #calculation of each city's geographical boundaries based on the ranges of the listings' coordinates
    latitude_min=('latitude', 'min'),
    latitude_max=('latitude', 'max'),
    longitude_min=('longitude', 'min'),
    longitude_max=('longitude', 'max')
).reset_index()

pop = []

for city_index, city in enumerate(cities):
  country_code = city_to_country.get(city)
  file_pattern = f"{country_code}_2020_1km_Aggregated.csv"
  
  for file_name in os.listdir(data_dir):
      if file_name == file_pattern:
          file_path = os.path.join(data_dir, file_name)
          country_pop = pd.read_csv(file_path)
          
          city_bounds = boundaries[boundaries['city'] == city].iloc[0]
          city_cells = country_pop[
              (country_pop['X'] >= city_bounds['longitude_min']) &
              (country_pop['X'] <= city_bounds['longitude_max']) &
              (country_pop['Y'] >= city_bounds['latitude_min']) &
              (country_pop['Y'] <= city_bounds['latitude_max'])
          ].copy()
          
          city_cells['city'] = city
          pop.append(city_cells)
          break

city_pop_per_cell = pd.concat(pop, ignore_index=True)

The AirBnB listings are geographically organized by neighbourhood. In order to calculate the densities for each one, we need to estimate their population. The main problem is that a neighbourhood is a different "shape" than the cell defined in the population data, so one neighbourhood is consisted of multiple cells. We tackle this by performing a spatial join between the population cells and the neighborhoods. This operation checks which population cells fall within the boundaries of each neighborhood. 
<p>To perform spatial join between the two they should be converted to GeoDataFrames</p>

In [13]:
def create_geojson(listings):
  listings['geometry'] = listings.apply(lambda row: Point(row['longitude'], row['latitude']), axis=1)
  gdf_listings = gpd.GeoDataFrame(listings, geometry='geometry')

  neighborhoods = gdf_listings.groupby('neighbourhood_cleansed')['geometry'].apply(lambda x: x.unary_union.convex_hull).reset_index() #convex hulls used to approximate irregular shaped neighbourhoods with a simpler boundary.
  neighborhoods.columns = ['neighbourhood_cleansed', 'geometry']

  gdf_neighborhoods = gpd.GeoDataFrame(neighborhoods, geometry='geometry')

  gdf_neighborhoods.set_crs(epsg=4326, inplace=True)  # WGS84 projection for coordinates (source: Inside AirBnB's data dictionary, long and lat descriptions)
  return gdf_neighborhoods

def process_city(city, listings_coords, city_pop_per_cell, neighborhoods):
  city_listings = listings_coords.loc[listings_coords['city'] == city].copy()
  city_pop = city_pop_per_cell.loc[city_pop_per_cell['city'] == city].copy()
  
  if city_listings.empty:
      return None

  city_listings['geometry'] = city_listings.apply(lambda row: Point(row['longitude'], row['latitude']), axis=1)
  gdf_listings = gpd.GeoDataFrame(city_listings, geometry='geometry', crs="EPSG:4326")
  city_pop['geometry'] = city_pop.apply(lambda row: Point(row['X'], row['Y']), axis=1)
  gdf_pop_cells = gpd.GeoDataFrame(city_pop, geometry='geometry', crs="EPSG:4326")

  neighborhoods_city = neighborhoods[neighborhoods['neighbourhood_cleansed'].isin(city_listings['neighbourhood_cleansed'])]
  gdf_pop_cells_within_neighborhoods = gpd.sjoin(gdf_pop_cells, neighborhoods_city, predicate='within') 
  population_by_neighborhood = gdf_pop_cells_within_neighborhoods.groupby('neighbourhood_cleansed')['Z'].sum().reset_index()
  listings_count = gdf_listings.groupby('neighbourhood_cleansed').size().reset_index(name='listings_count')
  results = pd.merge(population_by_neighborhood, listings_count, on='neighbourhood_cleansed', how='left').fillna(0)

  results['listings_density_per_1000'] = (results['listings_count'] * 1000) / results['Z']
  results['listings_density_per_1000'] = results['listings_density_per_1000'].fillna(0)
  city_listings = city_listings.merge(results[['neighbourhood_cleansed', 'listings_density_per_1000']], on='neighbourhood_cleansed', how='left')

  return city_listings[['city', 'neighbourhood_cleansed', 'listings_density_per_1000']]

listings_coords = listings[['latitude', 'longitude', 'city', 'neighbourhood_cleansed']].copy()
neighborhoods = create_geojson(listings)

listings_dens = pd.concat([process_city(city, listings_coords, city_pop_per_cell, neighborhoods) for city in cities], ignore_index=True)

listings['listings_density_per_1000'] = listings_dens['listings_density_per_1000']
listings= listings.drop(columns='geometry')

<p>The calculated densities have some missing values. This can be due to cells near the edges of neighborhoods that fall just outside the neighborhood boundaries due to small misalignments or overlapping boundaries is dense rural areas. Still, most neighbourhoods where successfully matched to the cells they contain as shown in the visualization.</p>

## AirBnB Densities Visualization
In this chart, based on the selected city, we see the listings as points, focused on the specific part of the city where the listings are located. 
* Tool: In order to have the plot focused on the borders of each city (defined by the listings) there was need for dynamic generation of x and y axis. This is why plotly was preffered to other libraries such as Vega Altair (which was showing the points without detail due to this constraint). 
* Color scale: The selected sequential color scale is reds, meaning that listings in regions with higher density are of a deeper red color. If we included the NaN values, they would appear as gray, but this would cause more noise on the plot without adding to the information. 

In [14]:
city_dropdown = widgets.Dropdown(
  options=cities, 
  value='Amsterdam',
  description='City:',
)
def create_airbnb_plot(selected_city):
  city_boundaries = boundaries[boundaries['city'] == selected_city].iloc[0]
  city_listings = listings[listings['city'] == selected_city].dropna(subset=['listings_density_per_1000'])
  
  fig = px.scatter(
      city_listings,
      x='longitude',
      y='latitude',
      hover_name='name',
      color='listings_density_per_1000',
      color_continuous_scale='reds',
      title=f"AirBnB Listings in {selected_city}",
      labels={'longitude': 'Longitude', 'latitude': 'Latitude'}
  )
  
  fig.update_xaxes(
      range=[city_boundaries['longitude_min'], city_boundaries['longitude_max']],
      title='Longitude'
  )
  fig.update_yaxes(
      range=[city_boundaries['latitude_min'], city_boundaries['latitude_max']],
      title='Latitude'
  )
  
  fig.update_layout(coloraxis_colorbar=dict(title='Density'))
  fig.update_traces(marker_size=3)
  fig.show()

def update_plot(change):
  clear_output(wait=True)
  display(city_dropdown)
  create_airbnb_plot(change['new'])

city_dropdown.observe(update_plot, names='value')
display(city_dropdown)
create_airbnb_plot(city_dropdown.value)

Dropdown(description='City:', options=('Amsterdam', 'Athens', 'Barcelona', 'Berlin', 'Copenhagen', 'Dublin', '…

## **3. Activity**
### 3.1 Data preparation
The price column has missing data. The empty values are filled using the price mean for each neighbourhood.

In [15]:
average_prices = listings.groupby(['city', 'neighbourhood_cleansed'])['price'].mean().reset_index(name='average_price')
listings = listings.merge(average_prices, on=['city', 'neighbourhood_cleansed'], how='left')
listings['price'] = listings['price'].fillna(listings['average_price'])
listings.drop(columns='average_price', inplace=True)

Before finding the average number of nights a listing is booked for each city using the full dataframe, we can limit the data by ignoring inactive listings that do not represent the rental market. They influence the listings count, so the averages we calculate as well. We should not exclude newly posted listings and listings with issues (perhaps because of bad rating or lacking amenities) ,that also have a number_of_reviews_ltm = 0, from the listings count. We can identify the inactive listings using other columns of the dataframe as follows:
* number_of_reviews_ltm = 0 
* first_review not Nan (it isn't new)
* review_scores_rating > average review_scores_rating (it doesn't have issues)

In [16]:
cols_to_exclude= ['room_type','host_id','host_name','calculated_host_listings_count','license']
listings_review_data= listings_full_data.drop(columns=cols_to_exclude)

average_review_score = listings_review_data['review_scores_rating'].mean()

inactive_listings = listings_review_data[
  (listings_review_data['number_of_reviews_ltm'] == 0) &
  (listings_review_data['first_review'].notna()) &
  (listings_review_data['review_scores_rating'] > average_review_score)
]

active_listings = listings_review_data[~listings_review_data['id'].isin(inactive_listings['id'])]

### 3.2 Average Nights/Booking calculation

For the data visualization (replicate of Inside Airbnb) all the listings will be included to showcase the results of the first table. Using each dataset we get a different result for the averages:

In [17]:
def calculate_avg_nights(listings): 
  listings.loc[:, 'nights_booked'] = listings['number_of_reviews_ltm'] * 2 * 3
  avg_nights_per_city = listings.groupby('city', as_index=False).agg({'nights_booked': 'mean'})
  avg_nights_per_city.columns = ['city', 'average_nights_booked']
  return avg_nights_per_city

avg_nights_per_city_all = calculate_avg_nights(listings_review_data) 
avg_nights_per_city_active = calculate_avg_nights(active_listings) 

tables = f"""
<div style="display: flex;">
<div style="margin-right: 20px;">
    <h3>Average Nights Booked per City (All listings)</h3>
    {avg_nights_per_city_all.to_html(index=False, border=0)}
</div>
<div>
    <h3>Average Nights Booked per City (Active listings)</h3>
    {avg_nights_per_city_active.to_html(index=False, border=0)}
</div>
</div>
"""

display(HTML(tables))



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



city,average_nights_booked
Amsterdam,56.225075
Athens,86.318819
Barcelona,71.434863
Berlin,53.600116
Copenhagen,30.58855
Dublin,85.344777
Lisbon,87.708478
Madrid,84.971831
Rome,88.730278
Venice,117.67628

city,average_nights_booked
Amsterdam,67.254241
Athens,91.008419
Barcelona,77.393727
Berlin,65.028128
Copenhagen,35.074088
Dublin,89.383311
Lisbon,93.400326
Madrid,92.229151
Rome,95.613338
Venice,125.134424


### 3.3 Average Income calculation

In [18]:
def calculate_avg_income(listings):   
  listings.loc[:, 'income'] = listings['price'] * listings['nights_booked']
  avg_income_per_city = listings.groupby('city', as_index=False)['income'].mean()
  avg_income_per_city.rename(columns={'income': 'average_income'}, inplace=True)
  return avg_income_per_city

avg_income_per_city_all = calculate_avg_income(listings_review_data) 
avg_income_per_city_active = calculate_avg_income(active_listings) 

tables = f"""
<div style="display: flex;">
<div style="margin-right: 20px;">
  <h3>Average Income per City (All listings)</h3>
  {avg_income_per_city_all.to_html(index=False, border=0)}
</div>
<div>
  <h3>Average Income per City (Active listings)</h3>
  {avg_income_per_city_active.to_html(index=False, border=0)}
</div>
</div>
"""

display(HTML(tables))



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



city,average_income
Amsterdam,16506.873062
Athens,8796.018825
Barcelona,16964.680586
Berlin,11798.788799
Copenhagen,50349.852604
Dublin,15502.200345
Lisbon,13223.19578
Madrid,12642.420068
Rome,17210.262675
Venice,26046.422968

city,average_income
Amsterdam,17525.115412
Athens,9275.853292
Barcelona,17697.984442
Berlin,12560.598117
Copenhagen,53223.184264
Dublin,16156.3932
Lisbon,13790.859865
Madrid,13073.059095
Rome,18145.824389
Venice,27121.180632


## **4. Inside AirBnB visualization**

### 4.1 Data preperation for map and subplots
We need to group the licences data into categories. If the licence column is completely filled with missing values, this metrcis is not calculated for this city. Otherwise, NaNs fall under the category 'unlicenced'.

In [19]:
license_summary = listings_full_data.groupby('city').agg(
  unlicensed=('license', lambda x: x.isnull().sum()),
  exempt=('license', lambda x: (x == 'Exempt').sum()),
  licensed=('license', lambda x: ((x.notnull()) & (x != 'Exempt')).sum())

).reset_index()
license_summary['total_listings'] = total_listings['total_listings']

license_summary = license_summary[
  (license_summary['licensed'] != 0) | 
  (license_summary['unlicensed'] != 0) | 
  (license_summary['exempt'] != 0)
]

This cell contains utilities used to produce the plots and some last computation of metrics shown in the subplots.

In [20]:
def compute_center_zoom(lat_min, lat_max, lon_min, lon_max):  #focuses the map to the selected city
  center_lat = (lat_min + lat_max) / 2
  center_lon = (lon_min + lon_max) / 2
  lat_range = lat_max - lat_min
  lon_range = lon_max - lon_min
  zoom = 10 - max(lat_range, lon_range) * 0.2 
  return center_lon, center_lat, zoom

def create_bins_labels(max_val, step): #creates bins and labels for ranges up to max_val with a given step.
  bins = [-1, 0] + list(np.arange(step, max_val + 1, step))
  labels = ['0'] + [f'{i+1}-{i+step}' for i in np.arange(0, max_val - step + 1, step)]
  labels[-1] = labels[-1] + '+'
  return bins, labels

def create_listings_per_host_bins(max_val,step): 
  bins, labels = create_bins_labels(max_val,step)
  bins= bins[2:len(bins)]
  labels = [label.split('-')[0] for label in labels][1:len(labels)-1]
  labels[-1] = labels[-1] + '+'
  return bins,labels


def get_top_hosts_by_city(selected_city): #returns the top 20 hosts of the selected city
  host_listings = listings_full_data.loc[listings_full_data['city'] == selected_city, 
                                  ['host_id', 'host_name',
                                   'calculated_host_listings_count_entire_homes',
                                   'calculated_host_listings_count_private_rooms',
                                   'calculated_host_listings_count_shared_rooms',
                                   'calculated_host_listings_count']].copy() 

  host_listings.sort_values(by=['calculated_host_listings_count', 'calculated_host_listings_count_entire_homes'],\
                            ascending=[False, False], inplace=True) #when hosts have the same total listings, first is the one with the most entire homes
  top_hosts = host_listings.drop_duplicates(subset='host_id', keep='first').head(20) 
  top_hosts.columns = ['Host Id', 'Host Name', 'Entire home/apts', 'Private Rooms', 'Shared Rooms', 'Listings']
  return top_hosts[['Host Name', 'Entire home/apts', 'Private Rooms', 'Shared Rooms','Listings']]

# Color palettes
color_palette = ['#FF5A5F', '#00A699', '#484848', '#FC642D']
color_palette_pie = ['#707070', '#A3A3A3', '#C7C7C7']
room_type_styling = {
  'Entire home/apt': color_palette[0],
  'Private room': color_palette[1],
  'Shared room': color_palette[2],
  'Hotel room': color_palette[3]
}

### 4.2 Dash App
The replica of Inside AirBnB is organized inside a dash app. Each plot, comes with a callback function in order to update depending on the choice of the city. 

In [21]:
app = dash.Dash(__name__)

# App Layout
app.layout = html.Div([
  html.H1("AirBnB Listings per city", style={'textAlign': 'center', 'font-family': 'Arial'}),
  html.Div([
    dcc.Graph(id='map-graph', style={'width': '60%'}),
    html.Div([
        html.P("Select a city to view its AirBnB listings:", style={'fontFamily': 'Arial', 'marginLeft': '30px'}),
        dcc.Dropdown(
            id='city-dropdown',
            options=cities,
            value='Amsterdam',
            clearable=False,
            style={'width': '70%', 'fontFamily': 'Arial', 'marginLeft': '15px', 'marginBottom': '10px'}
        ),
        html.Div([
            html.Div(id='total-listings', style={'fontFamily': 'Arial', 'fontSize': '30px', 'fontWeight': 'bold', 'marginLeft': '30px'}),
            html.P("listings", style={'fontFamily': 'Arial', 'fontSize': '20px', 'marginLeft': '10px'}),
            html.Div(id='average-income', style={'fontFamily': 'Arial', 'fontSize': '30px', 'fontWeight': 'bold', 'marginLeft': '30px'}),
            html.P("average income", style={'fontFamily': 'Arial', 'fontSize': '20px', 'marginLeft': '10px'}),
        ], style={'display': 'flex', 'alignItems': 'center', 'marginLeft': '20px'}),
          dcc.Graph(id='room-type-bar-chart'),
          dcc.Graph(id='nights-bar-chart'),
          dcc.Graph(id='license-pie-chart'),
          dcc.Graph(id='short-term-bar-chart'),
          dcc.Graph(id='listings-bar-chart'),
          dcc.Graph(id='update_top_hosts_table')
      ], style={'display': 'flex', 'flex-direction': 'column', 'overflow-y': 'auto', 'width': '40%','marginTop':'20px'})
  ], style={'display': 'flex', 'max-height': '600px'})
])

# Total listings
@app.callback(
  Output('total-listings', 'children'),
  Input('city-dropdown', 'value')
)
def update_total_listings(selected_city):
  total_listings = listings_full_data[listings_full_data['city'] == selected_city].shape[0]
  return total_listings

# Average income
@app.callback(
  Output('average-income', 'children'),
  Input('city-dropdown', 'value')
)
def update_avg_income(selected_city):
  avg_income = avg_income_per_city_all[avg_income_per_city_all['city'] == selected_city]
  return f"${avg_income['average_income'].values[0]:,.2f}"

# Scatter mapbox 
@app.callback(
  Output('map-graph', 'figure'),
  Input('city-dropdown', 'value')
)
def update_map(selected_city):
  city_boundaries = boundaries[boundaries['city'] == selected_city].iloc[0]
  center_lon, center_lat, zoom = compute_center_zoom(
      city_boundaries['latitude_min'],
      city_boundaries['latitude_max'],
      city_boundaries['longitude_min'],
      city_boundaries['longitude_max']
  )

  city_data = listings[listings['city'] == selected_city]

  fig = px.scatter_mapbox(
      city_data,
      lat='latitude',
      lon='longitude',
      mapbox_style='carto-positron',
      center={'lat': center_lat, 'lon': center_lon},
      zoom=zoom,
      height=600,
      size_max=2,
      color='room_type',
      color_discrete_map=room_type_styling,
      hover_name="name", 
      hover_data={ 
                  'room_type':False,
                  'neighbourhood_cleansed': True,
                  'price':True
                }
  )
  fig.update_traces(marker=dict(size=4))
  fig.update_layout(
      title=f"AirBnB Listings in {selected_city}",
      margin={"r": 0, "t": 60, "l": 0, "b": 0},
      showlegend=False
  )

  return fig

# Room type horizontal bar chart
@app.callback(
  Output('room-type-bar-chart', 'figure'),
  Input('city-dropdown', 'value')
)
def update_bar_chart(selected_city):
  filtered_data = listings_full_data[listings_full_data['city'] == selected_city]
  room_type_summary = filtered_data.groupby('room_type', observed=True).size().reset_index(name='count')
  room_type_summary['room_type'] = pd.Categorical(room_type_summary['room_type'],
                                                  categories=list(room_type_styling.keys()),
                                                  ordered=True)
  room_type_summary = room_type_summary.sort_values('room_type')

  fig = px.bar(room_type_summary,
              x='count',
              y='room_type',
              orientation='h',
              title=f'Room Type Counts in {selected_city}',
              labels={'count': 'Number of Listings', 'room_type': 'Room Type'},
              text='count',
              color='room_type',
              color_discrete_map=room_type_styling)

  fig.update_traces(texttemplate='%{text:.2s}', textposition='outside')
  fig.update_layout(xaxis_title='Number of Listings', yaxis_title='Room Type')

  return fig

# Activity bar chart
@app.callback(
  Output('nights-bar-chart', 'figure'),
  Input('city-dropdown', 'value')
)
def update_nights_bar_chart(selected_city):
  city_data = listings_review_data[listings_review_data['city'] == selected_city].copy()
  city_data.loc[:, 'occupancy'] = city_data['number_of_reviews_ltm'] * 2 * 3
  bins, labels = create_bins_labels(255, 30)
  city_data['occupancy_bins'] = pd.cut(city_data['occupancy'], bins=bins, labels=labels, right=False, ordered=False)
  city_data.loc[city_data['occupancy'] == 0, 'occupancy_bins'] = '0'
  bin_counts = city_data['occupancy_bins'].value_counts(sort=False).reindex(labels, fill_value=0)

  fig = px.bar(x=bin_counts.index, y=bin_counts.values, labels={'x': 'Occupancy (nights booked)', 'y': 'Listings'},
              title='Activity',
              text=bin_counts.values,
              color_discrete_sequence=['#707070'])
  fig.update_traces(texttemplate='%{text:.2s}', textposition='outside')
  fig.update_layout(title_text=f'Activity')
  return fig

# Licenses pie chart
@app.callback(
  Output('license-pie-chart', 'figure'),
  Input('city-dropdown', 'value')
)
def update_license_pie_chart(selected_city):
  city_data = listings_full_data[listings_full_data['city'] == selected_city]
  if not city_data['license'].isna().all():
    city_data = license_summary[license_summary['city'] == selected_city]
    labels = ['Unlicensed', 'Exempt', 'Licensed']
    values = city_data[['unlicensed', 'exempt', 'licensed']].values.flatten()

    fig = go.Figure(data=[go.Pie(labels=labels, values=values, marker=dict(colors=color_palette_pie))])
    fig.update_traces(textinfo='percent')
    fig.update_layout(title_text=f'Licenses')
  else:
    fig = go.Figure()
    fig.update_layout(title_text=f'No available licence data for {selected_city}')
  return fig

# Short-term rentals bar chart
@app.callback(
  Output('short-term-bar-chart', 'figure'),
  Input('city-dropdown', 'value')
)
def update_min_nights_bar_chart(selected_city):
  city_data = listings_full_data.loc[listings_full_data['city'] == selected_city, ['minimum_nights']].copy()
  bins, labels = create_listings_per_host_bins(16, 1)
  city_data['min_nights_group'] = pd.cut(city_data['minimum_nights'], bins=bins, labels=labels, right=False, include_lowest=True, ordered=False, duplicates='drop')
  min_nights_counts = city_data.groupby('min_nights_group', observed=True)['minimum_nights'].count().reset_index(name='nights_counts')

  fig = px.bar(
      min_nights_counts, 
      x='min_nights_group', 
      y='nights_counts',
      labels={'x': 'Minimum Nights', 'y': 'Listings'},
      title='Short-term Rentals', 
      text='nights_counts', 
      color_discrete_sequence=['#707070'] 
  )
  fig.update_layout(xaxis_title='Minimum Nights', yaxis_title='Listings')
  fig.update_traces(texttemplate='%{text:.2s}', textposition='outside')
  return fig

# Listings per host bar chart
@app.callback(
Output('listings-bar-chart', 'figure'),
Input('city-dropdown', 'value')
)
def update_listings_bar_chart(selected_city):
  city_data = listings_full_data.loc[listings_full_data['city'] == selected_city, ['host_id', 'calculated_host_listings_count']].copy().drop_duplicates(subset='host_id', keep='first')
  bins, labels = create_listings_per_host_bins(11,1)
  city_data['listings_group'] = pd.cut(city_data['calculated_host_listings_count'], bins=bins, labels=labels, right=False, include_lowest=True)
  listings_counts = city_data.groupby('listings_group', observed=True)['host_id'].count().reset_index(name='host_count')

  fig = px.bar(
      listings_counts, 
      x='listings_group', 
      y='host_count',
      labels={'listings_group': 'Listings per Host', 'host_count': 'Number of Hosts'},
      title=f'Listings per Host',
      text='host_count', 
      color_discrete_sequence=['#707070'] 
  )
    
  fig.update_layout(xaxis_title='Listings per Host', yaxis_title='Number of Hosts')
  fig.update_traces(texttemplate='%{text:.2s}', textposition='outside')
  return fig

#Top 20 hosts table
@app.callback(
    Output('update_top_hosts_table', 'figure'),
    Input('city-dropdown', 'value')
)
def update_top_hosts_table(selected_city):
  top_hosts= get_top_hosts_by_city(selected_city)
  fig = go.Figure(data=[go.Table(
      header=dict(values=list(top_hosts.columns),
                  fill_color='#A3A3A3',
                  align='left'),
      cells=dict(values=[top_hosts[col] for col in top_hosts.columns],
                  fill_color='#C7C7C7',
                  align='left'))
  ])
  fig.update_layout(title_text="Top Hosts")
  return fig
  
#Browser setup 
def find_available_port(start_port=8050):
  port = start_port
  while True:
      with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
          if s.connect_ex(('127.0.0.1', port)) != 0:
              return port
          port += 1
def open_browser(port):
  webbrowser.open_new(f"http://127.0.0.1:{port}/")

if __name__ == '__main__':
  port = find_available_port()
  threading.Timer(1, open_browser, args=(port,)).start()
  app.run_server(debug=True, port=port, use_reloader=False)