In [1]:
import pandas as pd
import numpy as np

import plotly.graph_objs as go
import plotly.express as px

## Read in the data from listings_1.csv This file is the extract of the zipped listings file


The below functions set the stage for a Sankey chart to be created. 


## What is a Sankey Chart
## How does the data have to be setup
## Challenges when generating such a chart (same name for node)


In [2]:
def createPropertyTypeCol(rental_df):
    """This function adds a column called property_type_class to the dataframe 
    Args:
        rental_df ([type]): [The dataframe has a column called property_type]

    Returns:
        [type]: [Dataframe with a more concise list of property types]
    """

    # Property types Private room and Shared Room identified
    property_df = rental_df[["property_type"]].copy()
    property_df.loc[
        property_df["property_type"].str.contains("Private room|Room in"),
        "property_type",
    ] = "Private Room"
    property_df.loc[
        property_df["property_type"].str.contains("Shared room"), "property_type"
    ] = "Shared Room"

    # Extract the second half of all "Entire" property types to get the actual type such as house..
    property_df.loc[
        property_df["property_type"].str.contains("Entire "), "property_type"
    ] = (
        property_df.loc[
            property_df["property_type"].str.contains("Entire "), "property_type"
        ]
        .str.replace("Entire ", "")
        .str.capitalize()
    )

    # We will call Bungalows, Tiny House, Cabins as Cottages:  A house is pretty much any structure where people live, usually one or more families.
    # As per definition:
    #But traditionally, a cabin is a small house built with simple tools
    #A bungalow is a style of house or cottage
    property_df.loc[
        property_df["property_type"].str.contains("Tiny house|Bungalow|Cabin"), "property_type"
    ] = "Cottage"
        
    rental_df["property_type_class"] = property_df["property_type"]

    return rental_df


##############################################################################################################
# Clean up the dataframe
##############################################################################################################
def cleanRentalDF(filename):

    # Read the data into a dataframe
    full_df = pd.read_csv(filename)
    # select out the columns we are interested in
    rental_df = full_df[
        [
            "id",
            "price",
            "listing_url",
            "host_id",
            "host_response_rate",
            "host_response_time",
            "host_acceptance_rate",
            "review_scores_communication",
            "review_scores_location",
            "review_scores_value",
            "review_scores_checkin",
            "reviews_per_month",
            "review_scores_cleanliness",
            "license",
            "instant_bookable",
            "number_of_reviews",
            "first_review",
            "last_review",
            "neighbourhood_cleansed",
            "neighbourhood_group_cleansed",
            "latitude",
            "longitude",
            "accommodates",
            "bathrooms_text",
            "property_type",
            "has_availability",
            "availability_30",
            "availability_60",
            "availability_90",
            "availability_365",
        ]
    ].copy()

    # Make price a float column
    rental_df["price"] = (
        rental_df["price"].str.replace("$", "").str.replace(",", "").astype("float64")
    )

    # Change host response rate from string to float so that it is a continuous value
    # TBD Look for average for the host - should I look for nans in hosts that have multiple records and set Nan to 0 oly for hosts that have only one record?
    # Mean of their reponse rate for the rest?
    # How about impute?

    # Convert the response rate to float
    rental_df["host_response_rate_percent"] = (
        rental_df["host_response_rate"].str.replace("%", "").astype("float64")
    )
    rental_df["host_response_rate_percent"] = rental_df.groupby(["host_id"])[
        "host_response_rate_percent"
    ].transform(lambda x: x.fillna(x.mean()))
    # All the values that are still Nan, we do not have any info about and so fill with zero
    rental_df["host_response_rate_percent"] = rental_df[
        "host_response_rate_percent"
    ].fillna(0)
    rental_df = rental_df.drop("host_response_rate", axis="columns")

    # Change response time to one within a dict
    # Question should we set Nans to -0 or to a very high number ( highest rank is least reponsive)
    rank_response_time = {
        "within an hour": 1,
        "within a few hours": 2,
        "within a day": 3,
        "a few days or more": 4,
    }
    rental_df["host_reponse_time_rank"] = rental_df["host_response_time"].map(
        rank_response_time
    )
    rental_df["host_reponse_time_rank"] = rental_df["host_reponse_time_rank"].fillna(0)
    rental_df = rental_df.drop("host_response_time", axis="columns")

    # Use the same logic as host_reponse_rate for host_acceptance_rate
    rental_df["host_acceptance_rate_percent"] = (
        rental_df["host_acceptance_rate"].str.replace("%", "").astype("float64")
    )
    rental_df["host_acceptance_rate_percent"] = rental_df.groupby(["host_id"])[
        "host_acceptance_rate_percent"
    ].transform(lambda x: x.fillna(x.mean()))
    # All the values that are still Nan, we do not have any info about and so fill with zero
    rental_df["host_acceptance_rate_percent"] = rental_df[
        "host_acceptance_rate_percent"
    ].fillna(0)
    rental_df = rental_df.drop("host_acceptance_rate", axis="columns")

    # (‘t’ means available and ‘f’ means not available)
    # *Convert t (*true) = 1 , f (false) = 0

    availability_code_dict = {
        "t": 1,
        "f": 0,
    }

    rental_df["instant_bookable"] = rental_df["instant_bookable"].map(
        availability_code_dict
    )
    rental_df["has_availability"] = rental_df["has_availability"].map(
        availability_code_dict
    )

    # Question what must be done for dates not present?
    rental_df["first_review"] = pd.to_datetime(rental_df["first_review"])
    rental_df["last_review"] = pd.to_datetime(rental_df["last_review"])

    # Add a column for a smaller list of property types
    rental_df = createPropertyTypeCol(rental_df)

    return rental_df


################################################################################################################
# Call the cleanup function and setup a global dataframe
################################################################################################################
full_df = cleanRentalDF("data\listings_1.csv")


### Digging into the proprty types
#Sorting the various property types reveals some basic classifications we can make 

### 1) We will make a Yurt a Tent by definition

property_type_list = full_df.property_type.unique().tolist()
property_type_list.sort()
property_type_list

       
<ul>      
<li>  ['Boat',
<li>   'Camper/RV',
 <li>  'Entire apartment',
<li> 'Entire bungalow',
<li>  'Entire cabin',
<li>  'Entire condominium',
<li> 'Entire cottage',
<li> 'Entire floor',
<li> 'Entire guest suite',
<li> 'Entire guesthouse',
<li> 'Entire house',
<li> 'Entire loft',
<li> 'Entire place',
<li> 'Entire serviced apartment',
<li> 'Entire townhouse',
<li> 'Entire villa',
<li> 'Houseboat',
<li> 'Private room',
<li> 'Private room in apartment',
<li> 'Private room in bed and breakfast',
<li> 'Private room in boat',
<li> 'Private room in bungalow',
<li> 'Private room in condominium',
<li> 'Private room in cottage',
<li> 'Private room in earth house',
<li> 'Private room in guest suite',
<li> 'Private room in guesthouse',
<li> 'Private room in house',
<li> 'Private room in loft',
<li> 'Private room in serviced apartment',
<li> 'Private room in tiny house',
<li> 'Private room in townhouse',
<li> 'Private room in treehouse',
<li> 'Private room in villa',
<li> 'Room in aparthotel',
<li> 'Room in bed and breakfast',
<li> 'Room in boutique hotel',
<li> 'Room in hotel',
<li> 'Room in serviced apartment',
<li> 'Shared room',
<li> 'Shared room in apartment',
<li> 'Shared room in condominium',
<li> 'Shared room in house',
<li> 'Shared room in loft',
<li> 'Tent',
<li> 'Tiny house',
<li> 'Yurt']      
       
</ul>    

In [3]:
nbd_groups = list(full_df.neighbourhood_group_cleansed.unique())
nbd_groups.sort()
nbd_groups

['Ballard',
 'Beacon Hill',
 'Capitol Hill',
 'Cascade',
 'Central Area',
 'Delridge',
 'Downtown',
 'Interbay',
 'Lake City',
 'Magnolia',
 'Northgate',
 'Other neighborhoods',
 'Queen Anne',
 'Rainier Valley',
 'Seward Park',
 'University District',
 'West Seattle']

### When we look into the property types in the rental_df, we can decide if we want to keep the granularity or if we want to create a more braod category of choices such as 
'Private room in apartment',
 'Private room in bed and breakfast',
 'Private room in boat',
 'Private room in bungalow',
 
 The decision I made was based on the perspective of a renter who is interested in a room but is more concerened about that room being Private versus one who is not opposed to a shared room. To this user, whether the room is in an apartment or a house is not as much of a concern.  

In [4]:
# Digging into the proprty types
#Sorting the various property types reveals some basic classifications we can make 

# 1) We will make a Yurt a Tent by definition

property_type_list = full_df.property_type.unique().tolist()
property_type_list.sort()
property_type_list


['Boat',
 'Camper/RV',
 'Entire apartment',
 'Entire bungalow',
 'Entire cabin',
 'Entire condominium',
 'Entire cottage',
 'Entire floor',
 'Entire guest suite',
 'Entire guesthouse',
 'Entire house',
 'Entire loft',
 'Entire place',
 'Entire serviced apartment',
 'Entire townhouse',
 'Entire villa',
 'Houseboat',
 'Private room',
 'Private room in apartment',
 'Private room in bed and breakfast',
 'Private room in boat',
 'Private room in bungalow',
 'Private room in condominium',
 'Private room in cottage',
 'Private room in earth house',
 'Private room in guest suite',
 'Private room in guesthouse',
 'Private room in house',
 'Private room in loft',
 'Private room in serviced apartment',
 'Private room in tiny house',
 'Private room in townhouse',
 'Private room in treehouse',
 'Private room in villa',
 'Room in aparthotel',
 'Room in bed and breakfast',
 'Room in boutique hotel',
 'Room in hotel',
 'Room in serviced apartment',
 'Shared room',
 'Shared room in apartment',
 'Shared

### Sankey of neighborhood groups with house types
#### Sankey of neighborhood with house types

In [5]:
def createSeattlePropTypeBar(rental_df):
    prop_count_df = rental_df.copy()

    prop_count_df = rental_df.groupby(['neighbourhood_group_cleansed', 'property_type_class']).agg(count_list = ('property_type_class', 'count'))
    prop_count_df  = prop_count_df.reset_index()
    prop_count_df = prop_count_df.sort_values('count_list', ascending = False)



    fig = px.bar(
        prop_count_df,
        x=prop_count_df.property_type_class,
        y=prop_count_df.count_list,
        color=prop_count_df.neighbourhood_group_cleansed,
        title="Rentable Property types",
        labels={
            'neighbourhood_group_cleansed':'Neighborhood Group',
            'property_type_class' :"Property Type", 
             'count_list':'Number of listings' },
        color_discrete_sequence=px.colors.qualitative.Pastel
        )
    return fig
fig = createSeattlePropTypeBar(full_df)
fig.show()

In [6]:
def createNeighborhoodPopTypeBar(rental_df):
    prop_count_df = rental_df[rental_df['neighbourhood_group_cleansed'] == 'Queen Anne'].copy()
    prop_count_df = prop_count_df.groupby(['neighbourhood_cleansed', 'property_type_class']).agg(count_list = ('property_type_class', 'count'))
    prop_count_df  = prop_count_df.reset_index()
    prop_count_df = prop_count_df.sort_values('property_type_class', ascending = True)



    fig = px.bar(
        prop_count_df,
        y=prop_count_df.property_type_class,
        x=prop_count_df.count_list,
        color=prop_count_df.neighbourhood_cleansed,
        title="Rentable Property types",
        labels={
            'neighbourhood_cleansed':'Neighborhood',
            'property_type_class' :"Property Type", 
             'count_list':'Number of listings' },
        color_discrete_sequence=px.colors.qualitative.Pastel,
        orientation='h'
        )
    return fig

fig = createNeighborhoodPopTypeBar(full_df)
fig.show()

In [7]:
prop_count_df = full_df[full_df['neighbourhood_group_cleansed'] == 'Queen Anne'].copy()
prop_count_df = prop_count_df.groupby(['neighbourhood_cleansed', 'property_type_class']).agg(count_list = ('property_type_class', 'count'))
prop_count_df  = prop_count_df.reset_index()
prop_count_df = prop_count_df.sort_values('property_type_class', ascending = False)



In [8]:
prop_count_df [prop_count_df['property_type_class'] == 'Boat']

Unnamed: 0,neighbourhood_cleansed,property_type_class,count_list
19,North Queen Anne,Boat,1


In [9]:
# nbd = 'Queen Anne' 
# nbd = 'Other neighborhoods'
# nbd = 'Crown Hill'
# nbd =  = 'Central Area',
# nbd ='Other neighborhoods'
# nbd = 'West Seattle'
# nbd = 'Delridge'
# nbd = 'Downtown'
# nbd = 'Ballard'
# nbd = 'Beacon Hill'
# nbd = 'Seward Park',
# nbd = 'Capitol Hill'
# nbd = 'Rainier Valley'
# nbd = 'Magnolia',
nbd = 'Queen Anne'
# nbd = 'Cascade', 
# nbd = 'Lake City'
#nbd = 'University District'
#nbd =  'Northgate'
#nbd =  'Interbay'


rental_df = full_df.copy()

one_nbd_df = rental_df[rental_df['neighbourhood_group_cleansed'] == nbd]

three_proptype_df = one_nbd_df[['neighbourhood_group_cleansed', 'neighbourhood_cleansed', 'property_type_class']]




#Limit types of property to House, Private Room and Shared Room
three_proptype_df = three_proptype_df[three_proptype_df['property_type_class'].str.contains('House|Private Room|Shared Room|Condominium|Seviced apartment|Apartment|Townhouse')]
three_proptype_df = three_proptype_df[~three_proptype_df['property_type_class'].str.contains('Houseboat')]

#An extra precaution - when the Neighborhood group is the same as the neighborhood, Add a Group suffix to the Neighborhood group as in
# University District is within the Neighborhood group Univerisity district and so call the Group University district Group
# This will avoid the circular reference that will occur
change_df = rental_df[rental_df['neighbourhood_cleansed'] == rental_df['neighbourhood_group_cleansed']][['neighbourhood_group_cleansed','neighbourhood_cleansed']].copy()
change_df = change_df['neighbourhood_group_cleansed']  + ' Group'
rental_df.loc[rental_df['neighbourhood_cleansed'] == rental_df['neighbourhood_group_cleansed'],'neighbourhood_group_cleansed'] = change_df 


# This creates indexes to be used in the Sankey diagram
label_list = three_proptype_df['neighbourhood_cleansed'].unique().tolist()
label_list.sort()
label_list = [nbd] + label_list
label_list += three_proptype_df['property_type_class'].unique().tolist()
sankey_df = three_proptype_df.groupby(['neighbourhood_group_cleansed', 'neighbourhood_cleansed', 'property_type_class']).agg(count_listings = ('property_type_class','count'))
sankey_df = sankey_df.reset_index()

label_idx_dict = {}
for idx, label in enumerate(label_list):
    label_idx_dict[label] = idx
label_idx_dict 

sankey_df['nbd_idx'] = sankey_df['neighbourhood_cleansed'].map(label_idx_dict)
sankey_df['prop_idx'] = sankey_df['property_type_class'].map(label_idx_dict)
sankey_df['nbd_grp_idx'] = sankey_df['neighbourhood_group_cleansed'].map(label_idx_dict)



full_color_list = ['#f3d1dc','#f6a7c1','#fcf0cf','#fdcf76',
              '#ffabab','#89aeb2','#97f2f3','#f1e0b0',
              '#f1cdb0','#e7cfc8','#ecad8f','#c1cd97',
              '#38908f','#b2ebe0', '#ffbfa3','#e08963',
              '#9dabdd', '#e7ffac', '#bffcc6', '#877111',
              '#b57fb3', '#ffb347', '#ff6961',  '#aec6cf'   ]


# or each neighborhood node to property we set one color. for group to neighborhood we set one color 
color_list = full_color_list[:len(three_proptype_df['neighbourhood_cleansed'].unique().tolist())]
group_color = dict(zip(list(sankey_df.groupby('neighbourhood_cleansed').groups.keys()), color_list))
sankey_df['color_link'] =  sankey_df['neighbourhood_cleansed'].map(group_color)


sankey_df





Unnamed: 0,neighbourhood_group_cleansed,neighbourhood_cleansed,property_type_class,count_listings,nbd_idx,prop_idx,nbd_grp_idx,color_link
0,Queen Anne,East Queen Anne,Apartment,21,1,6,0,#f3d1dc
1,Queen Anne,East Queen Anne,Condominium,6,1,8,0,#f3d1dc
2,Queen Anne,East Queen Anne,House,10,1,5,0,#f3d1dc
3,Queen Anne,East Queen Anne,Private Room,7,1,7,0,#f3d1dc
4,Queen Anne,East Queen Anne,Townhouse,4,1,9,0,#f3d1dc
5,Queen Anne,Lower Queen Anne,Apartment,27,2,6,0,#f6a7c1
6,Queen Anne,Lower Queen Anne,Condominium,15,2,8,0,#f6a7c1
7,Queen Anne,Lower Queen Anne,House,7,2,5,0,#f6a7c1
8,Queen Anne,Lower Queen Anne,Private Room,18,2,7,0,#f6a7c1
9,Queen Anne,Lower Queen Anne,Townhouse,7,2,9,0,#f6a7c1


In [10]:
label_list

['Queen Anne',
 'East Queen Anne',
 'Lower Queen Anne',
 'North Queen Anne',
 'West Queen Anne',
 'House',
 'Apartment',
 'Private Room',
 'Condominium',
 'Townhouse']

In [11]:
#Note the summation of listing count for the neighborhood
first_level_df = sankey_df.groupby(['neighbourhood_group_cleansed', 'neighbourhood_cleansed']).agg(nbd_count_listings =('count_listings', 'sum'), nbd_idx = ('nbd_idx','max'), nbd_grp_idx = ('nbd_grp_idx','max'))
first_level_df= first_level_df.reset_index()

# Pick the next color after the ones used above from the full color list
if len(three_proptype_df['neighbourhood_cleansed'].unique().tolist()) >= len(full_color_list):
    first_level_df['color_link'] = full_color_list[-1]
else:    
    first_level_df['color_link'] = full_color_list[len(three_proptype_df['neighbourhood_cleansed'].unique().tolist())]

In [12]:
first_level_df

Unnamed: 0,neighbourhood_group_cleansed,neighbourhood_cleansed,nbd_count_listings,nbd_idx,nbd_grp_idx,color_link
0,Queen Anne,East Queen Anne,48,1,0,#ffabab
1,Queen Anne,Lower Queen Anne,74,2,0,#ffabab
2,Queen Anne,North Queen Anne,59,3,0,#ffabab
3,Queen Anne,West Queen Anne,52,4,0,#ffabab


In [13]:
#First set the source and target list from nbd_grp to nbd
source = first_level_df['nbd_grp_idx'].tolist()
source =  source+ sankey_df['nbd_idx'].tolist()

target = first_level_df['nbd_idx'].tolist()
target = target + sankey_df['prop_idx'].tolist()


#First set the vlues from first_level_df
values = first_level_df['nbd_count_listings'].tolist()
values = values + sankey_df['count_listings'].tolist()

# There are as many colors as nodes = 17 + 3
color_node = full_color_list[:len(label_list)]

# For every neighborhood we use the same color for the link
color_link =  first_level_df['color_link'].tolist() + sankey_df['color_link'].tolist()


In [14]:
# this helps us get the theme settings
import plotly.io as plt_io

# this is for simple plotting with plotly express
import plotly.express as px

# create our custom_dark theme from the plotly_dark template
plt_io.templates["custom_dark"] = plt_io.templates["plotly_dark"]

# set the paper_bgcolor and the plot_bgcolor to a new color
plt_io.templates["custom_dark"]['layout']['paper_bgcolor'] = '#30404D'
plt_io.templates["custom_dark"]['layout']['plot_bgcolor'] = '#30404D'

# you may also want to change gridline colors if you are modifying background
plt_io.templates['custom_dark']['layout']['yaxis']['gridcolor'] = '#4f687d'
plt_io.templates['custom_dark']['layout']['xaxis']['gridcolor'] = '#4f687d'

fig = go.Figure(data=[go.Sankey(
    node = dict(
      pad = 15,
      thickness = 20,
      line = dict(color = "black", width = 0.5),
      label = label_list,
      color = color_node,
      customdata=label_list,
      hovertemplate="%{customdata} has  %{value} listings<extra></extra>",  
    ),
    link = dict(
        source = source,
        target = target,
        value = values,
        color = color_link,
        hovertemplate="Link from  %{source.customdata}<br />"
                    + "to %{target.customdata}<br />has  %{value} listings<extra></extra>",
      ),
    #hovertext=steamdf['name'],
    #hoverlabel=dict(namelength=0),
    #|hovertemplate='(values}<br>Neighborhood: {source} <br>Property Type: {target}',

)])

fig.update_layout(title_text="Neighborhood and Property Type Sankey Diagram", font_size=10)
# set the template to our custom_dark template
fig.layout.template = 'custom_dark'
fig.show()

In [15]:
from datamanipulation_bar_sankey import *
from callbacks_bar_sankey import *

### Further expansion of this project can occur by creating consolidating groups of Apartments, Rooms, (Bungalows and houses and cabins) and Tents and then splitting them  

In [16]:
(
    rental_df,
    rental_geo_df,
    rental_neighborhood_df,
    rental_grp_nbd_df,
) = createSpatialData()

In [17]:
nbd_df = rental_neighborhood_df.copy()

In [18]:
nbd_df = nbd_df.rename(columns={'neighbourhood_cleansed':'neighbourhood_cleansed_count'})

In [19]:
nbd_df = nbd_df.reset_index()

In [20]:
nbd_df

Unnamed: 0,neighbourhood_cleansed,latitude,longitude,neighbourhood_group_cleansed,glow_marker_color,neighbourhood_cleansed_count,nbd_count_normalized
0,Adams,47.670429,-122.388414,Ballard,#befdb7,94,13.112392
1,Alki,47.575644,-122.407309,West Seattle,#ff3f03,61,8.357349
2,Arbor Heights,47.507985,-122.379471,West Seattle,#ff3f03,15,1.729107
3,Atlantic,47.594974,-122.304758,Central Area,#FE019A,51,6.916427
4,Belltown,47.615181,-122.343182,Downtown,#ff6ec7,350,50.000000
...,...,...,...,...,...,...,...
83,West Woodland,47.668545,-122.366168,Ballard,#befdb7,43,5.763689
84,Westlake,47.635988,-122.341188,Cascade,#a60000,24,3.025937
85,Whittier Heights,47.681627,-122.372244,Ballard,#befdb7,29,3.746398
86,Windermere,47.669335,-122.271453,Other neighborhoods,#f3cc03,11,1.152738


In [21]:
def saveNeighborhoodMapHTML(nbd):
    """
    Call this function to create the HTML of the neighborhood map
    Input : Neighborhood group 
    Returns : a filename of the folium map created and saved 
    """
    # TBD Add an all and then use All or nbd to filter the dataframe
    if nbd == "All":
        filtered_df = rental_neighborhood_df.copy()
    else:
        filtered_df = rental_neighborhood_df[
            rental_neighborhood_df["neighbourhood_group_cleansed"] == nbd
        ].copy()
        
    this_map = createRentalMap(filtered_df, False, "neighbourhood")
    filename = (
        "assets/Maps/TestNeighborhoodCountMap.html"
        if nbd == "All"
        else f"assets/Maps/TestNeighborhoodCountMap-{nbd}.html"
    )

    # Make a choropleth
    # Use the provided geojson file
    nbd_geo_file = r"data/neighbourhoods.geojson"
    filtered_df = filtered_df.rename(columns={'neighbourhood_cleansed':'neighbourhood_cleansed_count'})    
    filtered_df = filtered_df.reset_index()
    
    #Colors allowed : https://github.com/dsc/colorbrewer-python
    this_map.choropleth(
        geo_data=nbd_geo_file,
        data=filtered_df,
        columns=["neighbourhood_cleansed", "neighbourhood_cleansed_count"],
        key_on="feature.properties.neighbourhood",
        fill_color="Greys",
        fill_opacity=0.6,
        line_opacity=1,
    )

    this_map.save(filename)
    return this_map



In [22]:
saveNeighborhoodMapHTML('All')


The choropleth  method has been deprecated. Instead use the new Choropleth class, which has the same arguments. See the example notebook 'GeoJSON_and_choropleth' for how to do this.



In [23]:
filtered_df

NameError: name 'filtered_df' is not defined

In [None]:
#https://stackoverflow.com/questions/59857949/how-to-add-cluster-markers-to-choropleth-with-folium
    
import folium
nbd_geo_file = r'data/neighbourhoods.geojson'
this_map = folium.Map(
        location=[nbd_df["latitude"].mean(), nbd_df["longitude"].mean()],
        tiles="CartoDB positron",
           zoom_start=13,
    )
this_map.choropleth(
    geo_data = nbd_geo_file,
    data=nbd_df,
    columns=['neighbourhood_cleansed', 'neighbourhood_cleansed_count'],
    key_on='feature.properties.neighbourhood',
    fill_color='OrRd',
    fill_opacity= 0.2,
    line_opacity=1
)
    

folium.LayerControl().add_to(this_map)

this_map