<a href="https://colab.research.google.com/github/milver/Experiments/blob/main/TestBiomassStreaming.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Testing ABGB dataset streaming from HuggingFace

## Install necessary libraries

In [1]:
!pip install datasets leafmap geopandas

Collecting datasets
  Downloading datasets-2.20.0-py3-none-any.whl (547 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m547.8/547.8 kB[0m [31m4.4 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting leafmap
  Downloading leafmap-0.35.9-py2.py3-none-any.whl (1.9 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.9/1.9 MB[0m [31m11.6 MB/s[0m eta [36m0:00:00[0m
Collecting pyarrow>=15.0.0 (from datasets)
  Downloading pyarrow-16.1.0-cp310-cp310-manylinux_2_28_x86_64.whl (40.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m40.8/40.8 MB[0m [31m8.3 MB/s[0m eta [36m0:00:00[0m
Collecting dill<0.3.9,>=0.3.0 (from datasets)
  Downloading dill-0.3.8-py3-none-any.whl (116 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m116.3/116.3 kB[0m [31m9.7 MB/s[0m eta [36m0:00:00[0m
Collecting requests>=2.32.2 (from datasets)
  Downloading requests-2.32.3-py3-none-any.whl (64 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━

## Import necessary libraries

In [5]:
from datasets import load_dataset
import leafmap
import geopandas as gpd
from shapely.geometry import Point
import ipywidgets as widgets

## Functions for stream map

In [10]:
# Function to filter dataset based on map bounds
def filter_dataset(bounds, dataset):
    filtered_data = {'latitude': [], 'longitude': [], 'biomass': []}
    for sample in dataset:
        lat = sample['latitude']
        lon = sample['longitude']
        if bounds['south'] <= lat <= bounds['north'] and bounds['west'] <= lon <= bounds['east']:
            filtered_data['latitude'].append(lat)
            filtered_data['longitude'].append(lon)
            filtered_data['biomass'].append(sample['biomass'])
    return filtered_data

# Function to update the map based on current bounds
def update_map(m, dataset):
    bounds = m.get_bounds()
    filtered_data = filter_dataset(bounds, dataset)
    geometry = [Point(xy) for xy in zip(filtered_data['longitude'], filtered_data['latitude'])]
    gdf = gpd.GeoDataFrame({'biomass': filtered_data['biomass']}, geometry=geometry)
    m.add_gdf(gdf, layer_name='Biomass Data', zoom_to_layer=False)

## Load the dataset

In [13]:
# Load the dataset
dataset = load_dataset("prs-eth/AGBD_15", streaming=True)["train"]

Downloading readme:   0%|          | 0.00/1.89k [00:00<?, ?B/s]

Resolving data files:   0%|          | 0/461 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/125 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/153 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/461 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/125 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/153 [00:00<?, ?it/s]

## Initialize and Map

In [None]:
# Initialize a Leafmap map
m = leafmap.Map(center=(39.0170445, -77.5782395), zoom=13)

# Create a button to update the map
update_button = widgets.Button(description="Update Map")
update_button.on_click(lambda x: update_map(m, dataset))

# Display the button and map
display(update_button)
m