<a href="https://colab.research.google.com/github/sca-opdir/sentinel-data/blob/main/query_florid.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# IMPORTANT: SOME KAGGLE DATA SOURCES ARE PRIVATE

# RUN THIS CELL IN ORDER TO IMPORT YOUR KAGGLE DATA SOURCES.

import kagglehub

kagglehub.login()


In [None]:
# IMPORTANT: RUN THIS CELL IN ORDER TO IMPORT YOUR KAGGLE DATA SOURCES,

# THEN FEEL FREE TO DELETE THIS CELL.

# NOTE: THIS NOTEBOOK ENVIRONMENT DIFFERS FROM KAGGLE'S PYTHON

# ENVIRONMENT SO THERE MAY BE MISSING LIBRARIES USED BY YOUR

# NOTEBOOK.



mzufferey_sentinel_path = kagglehub.dataset_download('mzufferey/sentinel')



print('Data source import complete.')


In [None]:
import pandas as pd

import requests

import json

import os

from concurrent.futures import ThreadPoolExecutor

from queue import Queue

import threading

import csv

from datetime import datetime



num_threads = os.cpu_count()



# Chemin de sortie pour le fichier CSV

output_file = "output_results_1000.csv"



kagout_file = '/kaggle/working/' + output_file

if os.path.exists(kagout_file):

    os.remove(kagout_file)





print(datetime.now().strftime("%Y-%m-%d %H:%M:%S"), "\n")

2024-11-19 09:24:13 



In [None]:
#central_points_url ="https://github.com/sca-opdir/sentinel-data/raw/refs/heads/main/central_points_avecgeom.xlsx"

central_points_path = "/kaggle/input/sentinel/central_points_avecgeom.xlsx"

liste_especes_path = "/kaggle/input/sentinel/liste_espces.xlsx"

#liste_especes_url = "https://github.com/sca-opdir/sentinel-data/raw/refs/heads/main/liste_esp%C3%A8ces.xlsx"

# df_central_points = pd.read_excel(central_points_url, engine="openpyxl")

# df_liste_especes = pd.read_excel(liste_especes_url)

df_central_points = pd.read_excel(central_points_path)

df_liste_especes = pd.read_excel(liste_especes_path)

In [None]:
# Fonction pour appeler l'API

def get_florID_resp(latcoord, loncoord, ntaxon, reqtaxons,

                    reqdate="2024-06-20", apiurl="https://speciesid.wsl.ch/florid"):

    url = apiurl

    headers = {

        "accept": "*/*",

        "Content-Type": "application/json"

    }

    data = {

        "date": reqdate,

        "lat": latcoord,

        "lon": loncoord,

        "num_taxon_ids": ntaxon,

        "req_taxon_ids": reqtaxons

    }

    response = requests.post(url, headers=headers, json=data)

    if response.status_code == 200:

        response_dict = response.json()

        return {

            "center_x": loncoord,

            "center_y": latcoord,

            "req_taxa": response_dict['requested_taxa']

        }

    else:

        return None



# Fonction pour traiter chaque point et envoyer les résultats à la file d’attente

def process_point(ipoint, output_queue):

    ilat = df_central_points.center_y[ipoint]

    ilon = df_central_points.center_x[ipoint]

    ipoly = df_central_points.polygon_coords[ipoint]



    api_resp = get_florID_resp(

        latcoord=ilat,

        loncoord=ilon,

        ntaxon=5,

        reqtaxons=list(df_liste_especes.taxon_id)

    )



    if api_resp:

        row_data = {

            "center_x": ilon,

            "center_y": ilat,

            "polygon_coords": ipoly

        }

        for idx, taxa_id in enumerate(api_resp['req_taxa']['id']):

            taxa_nom = api_resp['req_taxa']['name'][idx]

            ecological_model_value = api_resp['req_taxa']['ecological_model'][idx]

            row_data[f"{taxa_id} {taxa_nom}"] = ecological_model_value



        # Envoyer le résultat dans la file d’attente

        output_queue.put(row_data)



# Fonction pour écrire progressivement dans un fichier CSV

def writer(output_queue, output_file):

    header_written = False

    with open(output_file, mode="w", newline="", encoding="utf-8") as csvfile:

        writer = None

        while True:

            row_data = output_queue.get()

            if row_data is None:  # Indique la fin

                break

            if not header_written:

                # Écrire l'en-tête

                writer = csv.DictWriter(csvfile, fieldnames=row_data.keys())

                writer.writeheader()

                header_written = True

            # Écrire une ligne

            writer.writerow(row_data)




In [None]:
# File d’attente pour les résultats

output_queue = Queue()





# Lancer le thread consommateur pour écrire les résultats

writer_thread = threading.Thread(target=writer, args=(output_queue, output_file))

writer_thread.start()



# Exécuter en parallèle les tâches de traitement

with ThreadPoolExecutor(max_workers=4) as executor:

    #executor.map(lambda ipoint: process_point(ipoint, output_queue), range(df_central_points.shape[0]))

    executor.map(lambda ipoint: process_point(ipoint, output_queue), range(1))



# Indiquer la fin des écritures

output_queue.put(None)



# Attendre la fin du thread consommateur

writer_thread.join()



print("Traitement terminé et fichier CSV créé.")


Traitement terminé et fichier CSV créé.


In [None]:
print(datetime.now().strftime("%Y-%m-%d %H:%M:%S"), "\n")

2024-11-19 09:37:57 

