In [8]:
import agrigee_lite as agl
import ee
import os

Earth Engine initialized successfully using AgriGEE.lite for academic usage (project=ee-mateuspsilva).


In [6]:
!killall aria2c & aria2c --daemon=true --enable-rpc -j50 -x1 -s1 --max-tries=3 --retry-wait=10 --continue=true
! # --auto-file-renaming=false

Terminated


In [15]:
!aria2c --daemon=true --enable-rpc -j50 -x1 -s1 --max-tries=3 --retry-wait=10 --continue=true

In [1]:
import aria2p

aria2 = aria2p.API(aria2p.Client(host="http://localhost", port=6800, secret=""))

In [6]:
aria2.get_downloads()[-1].status

'error'

In [9]:
# FeatureCollection of power plants in Belgium.
fc = ee.FeatureCollection("WRI/GPPD/power_plants")

# Get a download URL for the FeatureCollection.
download_url = fc.getDownloadURL(**{
    "filetype": "CSV",
    # 'selectors': ['capacitymw', 'fuel1'],
    "filename": "aaa",
})
print("URL for downloading FeatureCollection as CSV:", download_url)

URL for downloading FeatureCollection as CSV: https://earthengine-highvolume.googleapis.com/v1/projects/ee-mateuspsilva/tables/802d0af813bc5f9299fa142878374990-70ec37c81c5325a69662889d9e31ac00:getFeatures


In [17]:
aria2.get_downloads()

[]

In [None]:
aria2p.stats.Stats

In [10]:
download_a = aria2.add_uris([download_url], options={"dir": "/home/mateus/git/AgriGEE.lite/aatesturl"})

In [11]:
download_a.status

'active'

In [None]:
os.makedirs("aatesturl")

In [None]:
!aria2c --daemon=true --enable-rpc -j10 -x1 -s1 --max-tries=5 --max-file-not-found=5 --retry-wait=10 --continue=true --auto-file-renaming=false

In [5]:
aria2.get_stats()._struct

{'downloadSpeed': '0',
 'numActive': '0',
 'numStopped': '667',
 'numStoppedTotal': '667',
 'numWaiting': '0',
 'uploadSpeed': '0'}

In [None]:
import concurrent.futures
import getpass
import json
import logging
import logging.handlers
import pathlib
import queue
import time
from functools import partial

import ee
import geopandas as gpd
import numpy as np
import pandas as pd
import pandera as pa
from shapely import Polygon
from smart_open import open  # noqa: A004
from tqdm.std import tqdm

from agrigee_lite.ee_utils import ee_gdf_to_feature_collection, ee_get_tasks_status
from agrigee_lite.misc import (
    add_indexnum_column,
    create_gdf_hash,
    log_dict_function_call_summary,
    quadtree_clustering,
    remove_underscore_in_df,
)
from agrigee_lite.sat.abstract_satellite import AbstractSatellite
from agrigee_lite.task_manager import GEETaskManager


def build_chunk_download_urls(
    gdf,
    satellite,
    reducers: list[str] | None = None,
    subsampling_max_pixels: float = 1_000,
    chunksize: int = 100,
    max_parallel_downloads: int = 50,
) -> list[str]:

    schema = pa.DataFrameSchema({
        "geometry": pa.Column("geometry", nullable=False),
        "start_date": pa.Column(
            pa.DateTime,
            nullable=False,
        ),
        "end_date": pa.Column(
            pa.DateTime,
            nullable=False,
        ),
    })
    schema.validate(gdf, lazy=True)

    if len(gdf) == 0:
        return []

    gdf = gdf.copy()
    add_indexnum_column(gdf)
    gdf = quadtree_clustering(gdf, max_size=1000)
    hashname = create_gdf_hash(gdf)

    output_path = pathlib.Path("data/temp") / "aria2" / f"{satellite.shortName}_{hashname}_{chunksize}"
    output_path.mkdir(parents=True, exist_ok=True)

    num_chunks = (len(gdf) + chunksize - 1) // chunksize

    total_rows = len(gdf)
    pbar = tqdm(total=total_rows, desc="Building download URLs", unit="feature", smoothing=0)

    for i in range(num_chunks):

        if not (output_path / f"{i}.csv").exists():

            while (int(aria2.get_stats()._struct['numActive']) + int(aria2.get_stats()._struct['numWaiting'])) >= max_parallel_downloads:
                time.sleep(5)

            sub = gdf.iloc[i * chunksize : (i + 1) * chunksize]

            fc = ee_gdf_to_feature_collection(sub)
            ee_expression = ee.FeatureCollection(
                fc.map(
                    partial(
                        satellite.compute,
                        reducers=reducers,
                        subsampling_max_pixels=subsampling_max_pixels,
                    )
                )
            ).flatten()


            url = ee_expression.getDownloadURL(
                filetype="csv",
                selectors=[
                    "00_indexnum",
                    "01_timestamp",
                    *[numeral_band_name for _, numeral_band_name in satellite.selectedBands],
                    *[numeral_indice_name for _, _, numeral_indice_name in satellite.selectedIndices],
                    "99_validPixelsCount",
                ],
                filename=f"{i}",
            )
            aria2.add_uris([url], options={"dir": str(output_path.absolute()) + "/"})

        stopped = int(aria2.get_stats()._struct.get("numStopped", "0"))
        pbar.set_postfix(error_downloads=stopped)
        pbar.update(min(chunksize, total_rows - i * chunksize))

    pbar.close()

In [8]:
gdf = gpd.read_parquet("data/mt_crops.parquet")

gdf["start_date"] = pd.to_datetime(gdf.year.apply(lambda x: f"{x - 1}-10-01"))
gdf["end_date"] = pd.to_datetime(gdf.year.apply(lambda x: f"{x}-10-01"))

In [9]:
gdf = gdf.sample(20000, random_state=13).reset_index(drop=True)

In [10]:
s2_sat = agl.sat.Sentinel2(use_sr=False)

In [18]:
build_chunk_download_urls(gdf, s2_sat, chunksize=10)

Building download URLs:   0%|          | 0/20000 [00:40<?, ?feature/s]
Simplifying clusters: 100%|██████████| 32/32 [00:02<00:00, 14.74it/s]
Building download URLs: 100%|██████████| 20000/20000 [31:51<00:00, 10.46feature/s]


In [None]:
urls

In [None]:
def salvar_urls_txt(urls: list[str], caminho_arquivo: str = "urls.txt") -> str:
    """
    Salva uma lista de URLs em um arquivo de texto para ser usado no aria2.

    Args:
        urls (list[str]): Lista de URLs de download.
        caminho_arquivo (str): Caminho do arquivo de saída.

    Returns:
        str: Caminho do arquivo gerado.
    """
    with open(caminho_arquivo, "w", encoding="utf-8") as f:
        for url in urls:
            f.write(url.strip() + "\n")
    return caminho_arquivo

In [None]:
salvar_urls_txt(urls)

In [None]:
agl.get.multiple_sits(gdf, s2_sat, force_redownload=True)

In [None]:
# FeatureCollection of power plants in Belgium.
fc = ee.FeatureCollection("WRI/GPPD/power_plants")

# Get a download URL for the FeatureCollection.
download_url = fc.getDownloadURL(**{
    "filetype": "kmz",
    "selectors": ["capacitymw", "fuel1"],
    "filename": "belgian_power_plants_sel",
})
print("URL for downloading FeatureCollection as KMZ:", download_url)

In [None]:
df = pd.read_parquet("data/temp/s2sr_d0d42d6ccada0d6fefe7e383eb9b58d53a971efb_10000/1.parquet")

In [None]:
df