In [1]:
import pickle
import pandas as pd
import numpy as np
import warnings
warnings.filterwarnings('ignore')

import geopandas as gpd
from shapely.geometry import Polygon
from shapely.ops import nearest_points

from ipyleaflet import Map, GeoJSON, WidgetControl
from ipywidgets import IntSlider, VBox, HTML, Label, Layout
from sidecar import Sidecar
import ipywidgets as widgets
import branca.colormap as cm
from branca.colormap import LinearColormap
import voila

# Birdwatching Activity in Massachusetts (2022)

## Overview
These interactive maps provide an overview of birdwatching activities across Massachusetts in 2022, using eBird data. The state is segmented into 400+ grid cells, each spanning 8km x 8km. A slider allows users to explore birding patterns in different months. Users can click on a cell to view the metric values and list of hotspots in that area.


## Methodology
### Metrics
We derive 4 metrics to measure the species diversity and popularity of birding in each area:
- Species diversity metrics:
    - Median Number of Species per Checklist (median_num_species): reflects the average species variety observed in each checklist 
    - Number of Rare Species (num_rarebirds): indicates the count of unique or uncommon species spotted in the area in a month

- Popularity metrics:
    - Total Number of Checklists (num_checklists): represents the aggregate of checklists submitted, highlighting activity levels.
    - Total Number of Unique Checklist Submitters (unique_observers): shows the diversity in observer participation.

Utilizing these metrics, we derive two scores:
- Popularity Score: A weighted sum of median_num_species and num_rarebirds.
- Diversity Score: A weighted sum of num_checklists and unique_observers.

### Area categorization
Each grid cell is classified into one of five categories, based on the Popularity and Diversity scores:
- Underbirded: High diversity with lower popularity.
- Heavily Birded: High popularity but lower diversity.
- Popular: Both high popularity and diversity.
- Low Interest: Low scores in both popularity and diversity.
- Not Enough Info: Areas with fewer than 3 checklists for the month.

## Interactive Maps
### Category Map

In [48]:
# Later, you can load this dictionary back into your Python environment
with open('final_grid8.pkl', 'rb') as file:
    final_grid = pickle.load(file)
final_grid.head(2)

# Later, you can load this dictionary back into your Python environment
with open('monthly_checklists_1129.pkl', 'rb') as file:
    loaded_monthly_checklists = pickle.load(file)

In [49]:
# Define a function to categorize each grid
def categorize_grid(row, median_popular_score, median_diverse_score):
    if pd.isnull(row['popular_score']) or pd.isnull(row['diverse_score']):
        return 'Not enough info'
    elif row['popular_score'] >= median_popular_score:
        if row['diverse_score'] >= median_diverse_score:
            return 'Popular'
        return 'Heavily birded'
    elif row['popular_score'] < median_popular_score:
        if row['diverse_score'] < median_diverse_score:
            return 'Low interest'
        return 'Underbirded'

def getMonthData(loaded_monthly_checklists, month, final_grid):
    checklist = loaded_monthly_checklists[month]

    median_species_df = checklist.groupby('grid_id')['num_species'].median().reset_index(name='median_num_species')
    total_checklist_df = checklist.groupby('grid_id').size().reset_index(name='num_checklists')
    insufficient_grids = total_checklist_df[total_checklist_df['num_checklists'] < 3]['grid_id'].unique()
    
    observer_exploded = checklist.assign(observer_id=checklist['observer_id'].str.split(',')).explode('observer_id')
    unique_observer_df = observer_exploded.groupby('grid_id')['observer_id'].nunique().reset_index(name='unique_observers')
    rarebird_exploded = checklist.assign(rare_birds=checklist['rare_birds'].str.split(',')).explode('rare_birds')
    unique_rarebird_df = rarebird_exploded.groupby('grid_id')['rare_birds'].nunique().reset_index(name='unique_rarebirds')
    merged = total_checklist_df.merge(median_species_df, on='grid_id').merge(unique_observer_df, on='grid_id').merge(unique_rarebird_df, on='grid_id')
    
    merged['popular_score'] = 0.4*merged['num_checklists'] + 0.6*merged['unique_observers']
    merged['diverse_score'] = 0.4*merged['median_num_species'] + 0.6*merged['unique_rarebirds']
    median_popular_score = merged['popular_score'].median()
    median_diverse_score = merged['diverse_score'].median()
    
    merged = merged.merge(final_grid, on='grid_id', how='right')
    merged.loc[merged['grid_id'].isin(insufficient_grids), ['popular_score', 'diverse_score']] = None
    merged['category'] = merged.apply(
        lambda row: 'Not enough info' if row['grid_id'] in insufficient_grids else categorize_grid(row, median_popular_score, median_diverse_score), 
        axis=1
    )
    return merged


# Function to assign colors based on the category
def get_color_category(category):
    colors = {
        'Underbirded': '#41d232',  # green
        'Heavily birded': '#d22a1b',   # red '#e04436'
        'Popular': '#f57a8d',  # light red
        'Low interest': '#9fcaed', # light blue
        'Not enough info': '#999999'
    }
    return colors.get(category, '#9e83e2')  # Default to purple

def compute_unique_localities(loaded_monthly_checklists):
    # This function will create a dictionary where each key is a grid_id
    # and each value is a set of unique 'locality' values where 'locality_type' is "H".
    unique_localities = {}
    for monthly_data in loaded_monthly_checklists.values():
        for _, row in monthly_data.iterrows():
            if row['locality_type'] == "H":
                grid_id = row['grid_id']
                if grid_id not in unique_localities:
                    unique_localities[grid_id] = {row['locality']}
                else:
                    unique_localities[grid_id].add(row['locality'])
    return unique_localities
unique_localities = compute_unique_localities(loaded_monthly_checklists)


def setup_base_map(mapTitle):
    center = [final_grid.geometry.centroid.y.mean(), final_grid.geometry.centroid.x.mean()]
    zoom = 8
    base_map = Map(center=center, zoom=zoom, layout={'height':'500px'})
    
    tooltip = HTML()
    tooltip.layout = {'margin': '20px 20px 20px 20px', 'max_height': '200px', 'max_width': '300px', 'overflow': 'auto'}
    tooltip_control = WidgetControl(widget=tooltip, position='bottomright', max_width=500)
    base_map.add_control(tooltip_control)

    # Title and caption widgets
    title = Label(mapTitle)  # Replace with your desired title
    title_control = WidgetControl(widget=title, position='topright', max_width=500)
    base_map.add_control(title_control)
    return base_map, tooltip

def create_slider(monthly_layers, on_slider_change):
    slider = IntSlider(description='Month:', min=1, max=12, value=1)
    slider.observe(on_slider_change, names='value')
    return slider


# Modify the slider change function to take tooltip as an additional parameter
def on_slider_change(change, monthly_layers, base_map, tooltip):
    current_month = change['new']
    for month, layer in enumerate(monthly_layers, start=1):
        layer.visible = (month == current_month)
        if layer.visible:  # Only add the layer if it's visible (and therefore the selected month)
            if layer not in base_map.layers:
                base_map.add_layer(layer)
    tooltip.value = ''  # Clear tooltip on month change


In [51]:
# Function to create GeoJSON layer with click events
def create_geojson_layer_category(df_month, tooltip):
    feature_collection = []

    def on_click(feature, **kwargs):
        properties = feature['properties']
        tooltip.value = f"""
        <style>
            .tooltip-content {{
                line-height: 1.2;
                font-size: 100%;
            }}
        </style>
        <div class="tooltip-content">
            {properties['category']}<br>
            <b># checklists:</b> {properties['num_checklists']}<br>
            <b>Median # species/checklist:</b> {properties['median_num_species']}<br>
            <b>Rare birds:</b> {properties['unique_rarebirds']}<br>
            <b># unique observers:</b> {properties['unique_observers']}<br>
            <b>Hotspots:</b><br>{properties['localities']}<br>
            Grid: {properties['grid_id']}
        </div>
        """
    for _, row in df_month.iterrows():
        grid_id = row['grid_id']
        localities_list = '<br>'.join(unique_localities.get(grid_id, []))
        feature = {
            'type': 'Feature',
            'properties': {
                'category': row['category'],
                'num_checklists': row['num_checklists'],
                'unique_observers': row['unique_observers'],
                'unique_rarebirds': row['unique_rarebirds'],
                'median_num_species': row['median_num_species'],
                'localities': localities_list,
                'grid_id': grid_id
            },
            'geometry': row['geometry'].__geo_interface__
        }
        feature_collection.append(feature)
        
    # Define the style for each feature
    def set_style(feature):
        category = feature['properties']['category']
        return {
            'fillColor': get_color_category(category),
            'color': 'grey',  # Border color
            'weight': 0.1,
            'fillOpacity': 0.5
        }
    # Create the GeoJSON layer with the feature collection
    geojson_layer = GeoJSON(data={'type': 'FeatureCollection', 'features': feature_collection}, style_callback=set_style)
    # Attach the click event to the GeoJSON layer
    geojson_layer.on_click(on_click)
    return geojson_layer

# Base map, tooltip, title setup
base_map1, tooltip1 = setup_base_map(mapTitle = 'Area Categorization')

# Layer set up
monthly_layers1 = []
for month in range(1, 13):
    df_month = getMonthData(loaded_monthly_checklists, month, final_grid)
    month_layer = create_geojson_layer_category(df_month, tooltip1)
    if month == 1:
        month_layer.visible = True  # Only the first month is visible
        base_map1.add_layer(month_layer)  # Add only the first month's layer
    else:
        month_layer.visible = False
    monthly_layers1.append(month_layer)
    
slider1 = create_slider(monthly_layers1, lambda change: on_slider_change(change, monthly_layers1, base_map1, tooltip1))
base_map1.add_control(slider1)

# Display the map and slider using Sidecar
with Sidecar(title='Area Categorization'):
    display(VBox([slider1, base_map1]))

VBox(children=(IntSlider(value=1, description='Month:', max=12, min=1), Map(center=[42.204559492112374, -71.65…

### Individual Metric Maps

In [52]:
def get_color(value, global_min, global_max, colormap, color_scale_type='linear'):
    try:
        if not all(isinstance(x, (int, float)) and x >= 0 for x in [value, global_min, global_max]):
            return '#b2b0b0'
        elif color_scale_type == 'log':
            offset = 1 - min(global_min, value, 0)
            value_log = np.log(value + offset)
            global_min_log = np.log(global_min + offset)
            global_max_log = np.log(global_max + offset)
            fraction = (value_log - global_min_log) / (global_max_log - global_min_log)
        else:
            # Linear scaling
            fraction = (value - global_min) / (global_max - global_min)
        if not 0 <= fraction <= 1:
            return '#b2b0b0'
        color = colormap(fraction)  
        return color
    except Exception:
        return '#b2b0b0'
    
def getMinMax(colName): 
    global_min = float('inf')
    global_max = 0
    for month in range(1, 13):
        df_month = getMonthData(loaded_monthly_checklists, month, final_grid)
        monthly_min = df_month[df_month[colName].notnull()][colName].min()
        monthly_max = df_month[df_month[colName].notnull()][colName].max()
        global_min = min(global_min, monthly_min)
        global_max = max(global_max, monthly_max)
    return global_min, global_max


def create_on_click_function(col_name, tooltip_val, tooltip):
    def on_click(feature, **kwargs):
        properties = feature['properties']
        col_value = properties.get(col_name, None)
        num_checklists = properties.get('num_checklists')
        if col_value in [None, 'nan', 'NaN', '', 'NA']:
            display_text = "No checklist<br>"
        else:
            display_text = f"{tooltip_val} {properties[col_name]}<br>"
            if col_name == 'median_num_species':
                display_text += f"<b># checklists:</b> {num_checklists}<br>"
        tooltip.value = f"""
        <style>
            .tooltip-content {{
                line-height: 1.2;
                font-size: 100%;
            }}
        </style>
        <div class="tooltip-content">
            {display_text}
            <b>Hotspots:</b><br>{properties['localities']}<br>
            Grid:{properties['grid_id']}
        </div>
        """
    return on_click
    
def feature_style(feature):
    return {
        'weight': 0.2,
        'color': 'grey', #'rgba(0,0,0,0)',  # Border color
        'fillColor': feature['properties']['fillColor'],
        'fillOpacity': 0.7
    }
    
def create_geojson_layer(month, df_month, global_min, global_max, col_name, tooltip_val, tooltip, colormap, color_scale_type): 
    feature_collection = []
    for _, row in df_month.iterrows():
        grid_id = row['grid_id']
        num_checklists = row['num_checklists']
        localities_list = '<br>'.join(unique_localities.get(grid_id, []))
        color = get_color(row[col_name], global_min, global_max, colormap, color_scale_type)
        feature = {
            'type': 'Feature',
            'properties': {
                col_name: row[col_name],
                'localities': localities_list,
                'grid_id':grid_id,
                'num_checklists': num_checklists,
                'fillColor': color 
            },
            'geometry': row['geometry'].__geo_interface__,
        }
        feature_collection.append(feature)

    geojson_layer = GeoJSON(data={'type': 'FeatureCollection', 'features': feature_collection},style_callback=feature_style)
    on_click_function = create_on_click_function(col_name, tooltip_val, tooltip)
    geojson_layer.on_click(on_click_function)
    return geojson_layer

def create_map(col_name, tooltip_val, mapTitle, colormap, color_scale_type):
    base_map, tooltip = setup_base_map(mapTitle)
    global_min, global_max = getMinMax(col_name)
    monthly_layers = []
    for month in range(1, 13):
        df_month = getMonthData(loaded_monthly_checklists, month, final_grid)
        month_layer = create_geojson_layer(month, df_month, global_min, global_max, col_name, tooltip_val, tooltip, colormap, color_scale_type)
        if month == 1:
            month_layer.visible = True  # Only the first month is visible
            base_map.add_layer(month_layer)  # Add only the first month's layer
        else:
            month_layer.visible = False
        monthly_layers.append(month_layer)
    slider = create_slider(monthly_layers, lambda change: on_slider_change(change, monthly_layers, base_map, tooltip))
    base_map.add_control(slider)
    return slider, base_map, monthly_layers

def on_shared_slider_change(change, base_mapU, base_mapD, monthly_layersU, monthly_layersD):
    current_month = change['new']
    print(f"Changing to month: {current_month}")

    # Update layers for the first map
    for month, layer in enumerate(monthly_layersU, start=1):
        layer.visible = (month == current_month)
        if layer.visible and layer not in base_mapU.layers:
            base_mapU.add_layer(layer)

    # Update layers for the second map
    for month, layer in enumerate(monthly_layersD, start=1):
        layer.visible = (month == current_month)
        if layer.visible and layer not in base_mapD.layers:
            base_mapD.add_layer(layer)


In [53]:
col_name1, tooltip_val1, mapTitle1 = "unique_rarebirds", "<b># rare species:</b>", "Total Number of Rare Species"
colormap1 = cm.linear.PuRd_09
slider2, base_map2, monthly_layers2 = create_map(col_name1, tooltip_val1, mapTitle1, colormap1, color_scale_type='log')

col_name2, tooltip_val2, mapTitle2 = "median_num_species", "<b>Median # species/checklist:</b>", "Median Number of Species per Checklist"
colormap2 = LinearColormap(
    ['#ffffcc',  # Light Yellow
     '#ffeda0',  # Pale Orange
     '#feb24c',  # Orange
     '#fd8d3c',  # Dark Orange
     '#fc4e2a',  # Red-Orange
     '#cc0000',  # Red
     '#660000'], # Dark Brown
    vmin=0, vmax=1,
    index=[0, 0.3, 0.45, 0.58, 0.68, 0.8, 0.9]  # Evenly spaced indices for a smooth transition
)
slider3, base_map3, monthly_layers3 = create_map(col_name2, tooltip_val2, mapTitle2, colormap2, color_scale_type='log')

col_name3, tooltip_val3, mapTitle3 = "unique_observers", "<b># Unique Observers:</b>", "Number of Unique Observers"
colormap3 = cm.linear.YlGnBu_09
slider4, base_map4, monthly_layers4 = create_map(col_name3, tooltip_val3, mapTitle3, colormap3, color_scale_type='log')

col_name4, tooltip_val4, mapTitle4 = "num_checklists", "<b># Checklists:</b>", "Number of Checklists" 
colormap4 = cm.linear.Purples_09
slider5, base_map5, monthly_layers5 = create_map(col_name4, tooltip_val4, mapTitle4, colormap4, color_scale_type='log')

In [54]:
def on_shared_slider_change4(change, base_map1, base_map2, base_map3, base_map4, layers1, layers2, layers3, layers4):
    current_month = change['new']
    # Update layers for each map
    for map_layers, base_map in zip([layers1, layers2, layers3, layers4], [base_map1, base_map2, base_map3, base_map4]):
        for month, layer in enumerate(map_layers, start=1):
            layer.visible = (month == current_month)
            if layer.visible and layer not in base_map.layers:
                base_map.add_layer(layer)


shared_slider4 = IntSlider(description='Month:', min=1, max=12, value=1)

# Attach the event handler to the slider with additional parameters for all four maps
shared_slider4.observe(lambda change: on_shared_slider_change4(change, base_map2, base_map3, base_map4, base_map5, monthly_layers2, monthly_layers3, monthly_layers4, monthly_layers5), names='value')

from ipywidgets import HBox, VBox

with Sidecar(title="Comparison 4"):
    display(VBox([
        shared_slider4, 
        HBox([base_map2, base_map4]), 
        HBox([base_map3, base_map5])
    ]))

VBox(children=(IntSlider(value=1, description='Month:', max=12, min=1), HBox(children=(Map(center=[42.20455949…

For any queries, please email: ds5500ebird@gmail.com
