In [37]:
from utils import setup_env_path

setup_env_path()

%load_ext autoreload
%autoreload 2

Former working directory:  /Users/ambroisebertin/Desktop/prog/prog_abeilles/fil-rouge-pollinisateurs
Current working directory:  /Users/ambroisebertin/Desktop/prog/prog_abeilles/fil-rouge-pollinisateurs
The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [77]:
import pandas as pd
import numpy as np
from tqdm import tqdm
import missingno as msno

In [39]:
API_keys = [
    "2b10Me1HF0rfjoGWCseolNa8e",
    "2b10Xs7brWPuBdRTSeWx9V7HJu",
    "2b10umm8L2jTYjlWrJKPfshJ0O",
    "2b10sEg5pSsrtVT372XqlrzcLe",
    "2b10LDWWlXJwuS6FleaqfK6Rke",
]

## Préparation des données

In [40]:
# Import spipoll.csv as a pandas dataframe
spipoll = pd.read_csv("././data/spipoll.csv",low_memory=False)

# Extract relevant columns from the spipoll dataframe
plantes = spipoll[['collection_id', 'plante_sc', 'plante_fr',
       'plante_precision', 'plante_inconnue', 'plante_caractere',
       'photo_fleur', 'photo_plante', 'photo_feuille']]

# Shrinking the data by grouping by collection_id
# Keep only the first row for each unique value of collection_id
plantes = plantes.drop_duplicates(subset='collection_id', keep='first')

plantes.shape

(75372, 9)

In [41]:
values_to_check = ["Je ne sais pas", "Plante inconnue"]

In [42]:
plantes_sc_unlabelled = plantes.loc[ #renommer en plantes_unlabelled_sc pour plus de clarté ??
    (
        (plantes["plante_sc"].isna() | plantes["plante_sc"].isin(values_to_check))
    )
    | ((plantes["plante_inconnue"] == 1.0))
]

# creation du dataframe contenant les valeurs de plantes mais pas de plantes_sc_unlabelled
plantes_sc_labelled = plantes[~plantes.index.isin(plantes_sc_unlabelled.index)]

# in plante add "1" to the column group for each row that is in plantes_sc_labelled
plantes.loc[plantes_sc_labelled.index, 'group'] = 1

plantes_sc_labelled.shape

(63465, 9)

In [43]:
# verification des dimensions des deux df par rapport à la dim de plantes
plantes_sc_labelled.shape[0] + plantes_sc_unlabelled.shape[0] == plantes.shape[0]

True

Compression de l'information étiquettée redondante :

In [44]:
# group plantes_sc_labelled by unique values of plante_sc
plantes_sc_labelled = plantes_sc_labelled.drop_duplicates(subset='plante_sc', keep='first')

# display the dimension of the dataframe
plantes_sc_labelled.shape

(2620, 9)

On a donc un df avec 2620 plantes dont le nom est spécifié dans plante_sc et 11.907 (~1.8% des données d'origine, spipoll.csv) dont le nom n'est pas spécifié dans plante_sc.

In [45]:
# ajoute trois colonnes après plante_sc dans le dataframe plantes_sc_labelled "Famille", "Genre" et "Espèce" entre la colonne plante_sc et plante_fr
plantes_sc_labelled.insert(2, "Famille", np.nan)
plantes_sc_labelled.insert(3, "Genre", np.nan)
plantes_sc_labelled.insert(4, "Espece", np.nan)

# ajoute trois colonnes après plante_sc dans le dataframe plantes_sc_unlabelled "Famille", "Genre" et "Espèce" entre la colonne plante_sc et plante_fr
plantes_sc_unlabelled.insert(2, "Famille", np.nan)
plantes_sc_unlabelled.insert(3, "Genre", np.nan)
plantes_sc_unlabelled.insert(4, "Espece", np.nan)

In [46]:
plantes_sc_labelled.head()

Unnamed: 0,collection_id,plante_sc,Famille,Genre,Espece,plante_fr,plante_precision,plante_inconnue,plante_caractere,photo_fleur,photo_plante,photo_feuille
0,1,Les Trèfles à fleurs blanches ou roses en boul...,,,,,,0.0,spontanée,https://spgp-api.65mo.fr/api/containers/spgp/d...,https://spgp-api.65mo.fr/api/containers/spgp/d...,https://spgp-api.65mo.fr/api/containers/spgp/d...
2,2,Acanthus mollis,,,,Acanthe molle,,,plantée,https://spgp-api.65mo.fr/api/containers/spgp/d...,https://spgp-api.65mo.fr/api/containers/spgp/d...,https://spgp-api.65mo.fr/api/containers/spgp/d...
5,3,"Les Orchidées à fleurs blanches, jaunes ou ver...",,,,,orchis bouc - Himantoglossum hircinum,0.0,spontanée,https://spgp-api.65mo.fr/api/containers/spgp/d...,https://spgp-api.65mo.fr/api/containers/spgp/d...,https://spgp-api.65mo.fr/api/containers/spgp/d...
6,4,Les Vipérines à fleurs bleues-violacées (des e...,,,,,Theaceae,0.0,plantée,https://spgp-api.65mo.fr/api/containers/spgp/d...,https://spgp-api.65mo.fr/api/containers/spgp/d...,https://spgp-api.65mo.fr/api/containers/spgp/d...
10,5,Les Pâquerettes (Bellis sp),,,,,,0.0,spontanée,https://spgp-api.65mo.fr/api/containers/spgp/d...,https://spgp-api.65mo.fr/api/containers/spgp/d...,https://spgp-api.65mo.fr/api/containers/spgp/d...


## Appel à l'API

In [47]:
from data_quality.plant_treatment.plantnet_api import PlantNetPredictor

def call_API(iloc, api_key_index=0):
    predictor = PlantNetPredictor(key = API_keys[api_key_index])
    prediction = None

    try:
        prediction = predictor.predict(imageURL= plantes_sc_labelled['photo_fleur'].iloc[iloc],
                        organs = "auto",
                        includeRelatedImages=False)
    except Exception as e:
        print(f"Error: {e}")
        if api_key_index + 1 < len(API_keys):
            return get_plant_details(iloc, api_key_index + 1)
        else:
            print("You used all your API keys for today.")
            return None

    famille = prediction["results"][0]["species"]["family"]["scientificNameWithoutAuthor"] # famille
    genre = prediction["results"][0]["species"]["genus"]["scientificNameWithoutAuthor"] # genre
    espece = prediction["results"][0]["species"]["scientificNameWithoutAuthor"] # espece

    return [famille, genre, espece]

In [84]:
def remplir_tableau(df, num):
    start_index = 0
    for i, val in enumerate(df["Famille"]):
        if pd.isnull(val):
            start_index = i
            break
    else:
        print("Le dataframe est déjà rempli.")
        return df

    for i in tqdm(range(start_index, start_index + num)):
        try:
            df.iloc[i, 2:5] = call_API(i)
        except Exception as e:
            print(f"Error: {e}")

    return df

In [85]:
remplir_tableau(plantes_sc_labelled, 7)

  0%|          | 0/7 [00:00<?, ?it/s]

100%|██████████| 7/7 [00:21<00:00,  3.14s/it]


Unnamed: 0,collection_id,plante_sc,Famille,Genre,Espece,plante_fr,plante_precision,plante_inconnue,plante_caractere,photo_fleur,photo_plante,photo_feuille
0,1,Les Trèfles à fleurs blanches ou roses en boul...,Fabaceae,Trifolium,Trifolium pratense,,,0.0,spontanée,https://spgp-api.65mo.fr/api/containers/spgp/d...,https://spgp-api.65mo.fr/api/containers/spgp/d...,https://spgp-api.65mo.fr/api/containers/spgp/d...
2,2,Acanthus mollis,Acanthaceae,Acanthus,Acanthus spinosus,Acanthe molle,,,plantée,https://spgp-api.65mo.fr/api/containers/spgp/d...,https://spgp-api.65mo.fr/api/containers/spgp/d...,https://spgp-api.65mo.fr/api/containers/spgp/d...
5,3,"Les Orchidées à fleurs blanches, jaunes ou ver...",Orchidaceae,Himantoglossum,Himantoglossum hircinum,,orchis bouc - Himantoglossum hircinum,0.0,spontanée,https://spgp-api.65mo.fr/api/containers/spgp/d...,https://spgp-api.65mo.fr/api/containers/spgp/d...,https://spgp-api.65mo.fr/api/containers/spgp/d...
6,4,Les Vipérines à fleurs bleues-violacées (des e...,Theaceae,Camellia,Camellia sasanqua,,Theaceae,0.0,plantée,https://spgp-api.65mo.fr/api/containers/spgp/d...,https://spgp-api.65mo.fr/api/containers/spgp/d...,https://spgp-api.65mo.fr/api/containers/spgp/d...
10,5,Les Pâquerettes (Bellis sp),Asteraceae,Bellis,Bellis sylvestris,,,0.0,spontanée,https://spgp-api.65mo.fr/api/containers/spgp/d...,https://spgp-api.65mo.fr/api/containers/spgp/d...,https://spgp-api.65mo.fr/api/containers/spgp/d...
...,...,...,...,...,...,...,...,...,...,...,...,...
665235,75 468,Arbutus andrachne,,,,Arbousier de Chypre,,0.0,spontanée,https://spgp-api.65mo.fr/api/containers/spgp/d...,https://spgp-api.65mo.fr/api/containers/spgp/d...,https://spgp-api.65mo.fr/api/containers/spgp/d...
665583,75 525,Pavonia hastata,,,,Pavonie (Pavonia hastata),,,plantée,https://spgp-api.65mo.fr/api/containers/spgp/d...,https://spgp-api.65mo.fr/api/containers/spgp/d...,https://spgp-api.65mo.fr/api/containers/spgp/d...
665861,75 554,Medicago falcata,,,,Luzerne en faucille,,0.0,spontanée,https://spgp-api.65mo.fr/api/containers/spgp/d...,https://spgp-api.65mo.fr/api/containers/spgp/d...,https://spgp-api.65mo.fr/api/containers/spgp/d...
666544,75 625,Helianthus divaricatus,,,,Hélianthe à feuilles Ètalées,,,spontanée,https://spgp-api.65mo.fr/api/containers/spgp/d...,https://spgp-api.65mo.fr/api/containers/spgp/d...,https://spgp-api.65mo.fr/api/containers/spgp/d...


In [86]:
plantes_sc_labelled.head(16) # verification des deux fois 7 premières lignes

Unnamed: 0,collection_id,plante_sc,Famille,Genre,Espece,plante_fr,plante_precision,plante_inconnue,plante_caractere,photo_fleur,photo_plante,photo_feuille
0,1,Les Trèfles à fleurs blanches ou roses en boul...,Fabaceae,Trifolium,Trifolium pratense,,,0.0,spontanée,https://spgp-api.65mo.fr/api/containers/spgp/d...,https://spgp-api.65mo.fr/api/containers/spgp/d...,https://spgp-api.65mo.fr/api/containers/spgp/d...
2,2,Acanthus mollis,Acanthaceae,Acanthus,Acanthus spinosus,Acanthe molle,,,plantée,https://spgp-api.65mo.fr/api/containers/spgp/d...,https://spgp-api.65mo.fr/api/containers/spgp/d...,https://spgp-api.65mo.fr/api/containers/spgp/d...
5,3,"Les Orchidées à fleurs blanches, jaunes ou ver...",Orchidaceae,Himantoglossum,Himantoglossum hircinum,,orchis bouc - Himantoglossum hircinum,0.0,spontanée,https://spgp-api.65mo.fr/api/containers/spgp/d...,https://spgp-api.65mo.fr/api/containers/spgp/d...,https://spgp-api.65mo.fr/api/containers/spgp/d...
6,4,Les Vipérines à fleurs bleues-violacées (des e...,Theaceae,Camellia,Camellia sasanqua,,Theaceae,0.0,plantée,https://spgp-api.65mo.fr/api/containers/spgp/d...,https://spgp-api.65mo.fr/api/containers/spgp/d...,https://spgp-api.65mo.fr/api/containers/spgp/d...
10,5,Les Pâquerettes (Bellis sp),Asteraceae,Bellis,Bellis sylvestris,,,0.0,spontanée,https://spgp-api.65mo.fr/api/containers/spgp/d...,https://spgp-api.65mo.fr/api/containers/spgp/d...,https://spgp-api.65mo.fr/api/containers/spgp/d...
24,6,Les Bruyères à fleurs roses en grappes (des es...,Ericaceae,Erica,Erica x darleyensis,,Bruyère carnée (Erica carnea),0.0,plantée,https://spgp-api.65mo.fr/api/containers/spgp/d...,https://spgp-api.65mo.fr/api/containers/spgp/d...,https://spgp-api.65mo.fr/api/containers/spgp/d...
25,7,Les Ajoncs (des espèces du genre Ulex),Fabaceae,Adenocarpus,Adenocarpus foliolosus,,Ajonc (Ulex europaeus),0.0,ne se prononce pas,https://spgp-api.65mo.fr/api/containers/spgp/d...,https://spgp-api.65mo.fr/api/containers/spgp/d...,https://spgp-api.65mo.fr/api/containers/spgp/d...
41,12,Les Pissenlits (Taraxacum sp),Asteraceae,Taraxacum,Taraxacum palustre,,Pissenlit (Taraxacum officinale),0.0,spontanée,https://spgp-api.65mo.fr/api/containers/spgp/d...,https://spgp-api.65mo.fr/api/containers/spgp/d...,https://spgp-api.65mo.fr/api/containers/spgp/d...
47,15,Les Crucifères jaunes (des espèces de la famil...,Brassicaceae,Sinapis,Sinapis arvensis,,,0.0,spontanée,https://spgp-api.65mo.fr/api/containers/spgp/d...,https://spgp-api.65mo.fr/api/containers/spgp/d...,https://spgp-api.65mo.fr/api/containers/spgp/d...
54,17,Senecio jacobaea,Asteraceae,Senecio,Senecio inaequidens,Le Sénéçon jacobée,,0.0,spontanée,https://spgp-api.65mo.fr/api/containers/spgp/d...,https://spgp-api.65mo.fr/api/containers/spgp/d...,https://spgp-api.65mo.fr/api/containers/spgp/d...


In [66]:
# (uniquement utile pour faire des tests) vide les colonnes "Famille", "Genre" et "Espece" du dataframe plantes_sc_labelled
plantes_sc_labelled.iloc[:, 2:5] = np.nan

In [89]:
plantes_sc_labelled.shape # verification du maintien de la dimension du dataframe : OK

(2620, 12)

## Pipeline