In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import settings
import requests
import json

In [3]:
API_KEY = settings.PLANTNET_API_KEY
PROJECT = 'all'
api_endpoint = f"https://my-api.plantnet.org/v2/identify/{PROJECT}?api-key={API_KEY}"

# Identifying plants

We'll use the PlantNet API (as a starting point, TODO: investigate iNaturalist API) to identify user-uploaded images of plants. As a first step, we'll expect the user photos to contain a single plant, though eventually we want to apply image segmentation to handle whole-landscape images. The PlantNet API requires us to identify a prominent organ type (leaf, flower, bark, etc.) in the image to aid in identification; we need to experiment with how much this matters, and whether we can handle it without user input.

In [6]:
img_path = '../data/img/img1.jpg'
with open(img_path, 'rb') as img_data:
    files = [
            ('images', (img_path,img_data))
            ]
    req = requests.Request('POST', url = api_endpoint, files = files, data = {'organs': ['flower',]})
    prepared = req.prepare()
    s = requests.Session()
    response = s.send(prepared)
    result = json.loads(response.text)

KeyError: 'commonNames'

In [11]:
print(result['bestMatch'], result['results'][0]['score'], result['results'][0]['species']['commonNames'])

Pyrus calleryana Decne. 0.39538 ['Bradford Pear', 'Callery pear', 'Ornamental pear']


Let's wrap this in a function.

In [58]:
def id_plant(img_path,organ = 'leaf'):
    with open(img_path, 'rb') as img_data:
        files = [
                ('images', (img_path,img_data))
                ]
        req = requests.Request('POST', url = api_endpoint, files = files, data = {'organs': [organ,]})
        prepared = req.prepare()
        s = requests.Session()
        response = s.send(prepared)
        result = json.loads(response.text)
    return result['bestMatch'], result['results'][0]['species']['commonNames'], result['results'][0]['score']

id_plant(img_path)

('Pyrus calleryana Decne.',
 ['Bradford Pear', 'Callery pear', 'Ornamental pear'],
 0.39538)

# Comparing results to the PA Invasive species list

In [21]:
invasives = pd.read_csv('../data/pa_invasives.csv')
invasives.head(5)

Unnamed: 0,Scientific Name,Common Name,PISC Priority Score,PA Noxious Weed Rank,Invasive Assessment Score,Aquatic or Terrestrial,PLNA Economic Importance Score,DCNR Rank,EDRR
0,Wisteria sinensis,Chinese Wisteria,3.6,,57.0,Terrestrial,4.2,2,No
1,Wisteria floribunda,Japanese Wisteria,4.5,,57.0,Terrestrial,4.6,2,No
2,Vincetoxicum rossicum,Pale Swallow-Wort,4.4,B,88.0,Terrestrial,0.1,1,No
3,Vincetoxicum nigrum,Black Swallow-Wort,4.5,B,90.0,Terrestrial,0.4,1,No
4,Vinca minor,Common Periwinkle,2.2,,57.0,Terrestrial,7.6,3,No


We've identified an invasive Callery/Bradford pear up above. Let's try to match it to this list.

In [24]:
invasives.loc[invasives['Scientific Name'] == 'pyrus calleryana']

Unnamed: 0,Scientific Name,Common Name,PISC Priority Score,PA Noxious Weed Rank,Invasive Assessment Score,Aquatic or Terrestrial,PLNA Economic Importance Score,DCNR Rank,EDRR
28,pyrus calleryana,Callery Pear,5.8,B,65.0,Terrestrial,3.7,2,No


In [32]:
invasives['Scientific Name'] = invasives['Scientific Name'].str.lower()
species_id = result['bestMatch'].lower()
species_id

invasives[invasives['Scientific Name'].apply(lambda x: species_id in x or x in species_id)]

Unnamed: 0,Scientific Name,Common Name,PISC Priority Score,PA Noxious Weed Rank,Invasive Assessment Score,Aquatic or Terrestrial,PLNA Economic Importance Score,DCNR Rank,EDRR
28,pyrus calleryana,Callery Pear,5.8,B,65.0,Terrestrial,3.7,2,No


That's a bit ugly. Who knows how the names will end up being related to each other. Let's try fuzzy matching.

In [34]:
from fuzzywuzzy import fuzz

In [35]:
fuzz.ratio(species_id,'pyrus calleryana')

82

In [47]:
matches = invasives['Scientific Name'].apply(lambda x: fuzz.ratio(species_id,x)).sort_values(ascending=False)
match = invasives.iloc[list(matches.index)[0]]
match

Scientific Name                   pyrus calleryana
Common Name                           Callery Pear
PISC Priority Score                            5.8
PA Noxious Weed Rank                             B
Invasive Assessment Score                     65.0
Aquatic or Terrestrial                 Terrestrial
PLNA Economic Importance Score                 3.7
DCNR Rank                                        2
EDRR                                            No
Name: 28, dtype: object

In [46]:
list(matches.index)[0]

28

Now let's try with a non-invasive and see what the matches look like.

In [50]:
phlox_img = '../data/img/phlox.png'
species_id, common_names, score = id_plant(phlox_img, organ = 'flower')
species_id

'Phlox subulata L.'

In [51]:
matches = invasives['Scientific Name'].apply(lambda x: fuzz.ratio(species_id,x)).sort_values(ascending=False)
matches

33     47
97     44
131    41
134    40
34     39
       ..
73     12
75     12
15     11
136    11
51      7
Name: Scientific Name, Length: 148, dtype: int64

More confident than I would have thought; maybe a different metric would be better?

In [52]:
matches = invasives['Scientific Name'].apply(lambda x: fuzz.token_sort_ratio(species_id,x)).sort_values(ascending=False)
matches

139    47
34     46
131    43
113    43
33     43
       ..
135    12
48     12
116    11
45     11
57     11
Name: Scientific Name, Length: 148, dtype: int64

In [56]:
matches = invasives['Scientific Name'].apply(lambda x: fuzz.token_set_ratio(species_id,x)).sort_values(ascending=False)
list(matches.values)[0]

47

Nope. Might just have to set our threshold a bit higher, around 75?

In [57]:
def is_invasive(species_id, threshold = 75):
    matches = invasives['Scientific Name'].apply(lambda x: fuzz.ratio(species_id,x)).sort_values(ascending=False)
    confidence = list(matches.values)[0]
    if confidence >= threshold:
        return "Invasive"
    else:
        return "Not invasive" # eventually should add a "Native" identifier

is_invasive(id_plant(img_path)[0]), is_invasive(id_plant(phlox_img)[0])

('Invasive', 'Not invasive')

Great! Let's do some battery testing.

In [63]:
import os

imgs = os.listdir('../data/img')
for img in imgs:
    species_id, commonNames, score = id_plant('../data/img/'+img) #organ always set to leaf -- to what extent does this matter?
    invasive = is_invasive(species_id)
    print(f'ID: {species_id}, common names {commonNames} is {invasive}.')


ID: Pyrus calleryana Decne., common names ['Bradford Pear', 'Callery pear', 'Ornamental pear'] is Invasive.
ID: Wisteria sinensis (Sims) Sweet, common names ['Chinese Wisteria', 'Pruneau', 'پیچ گلیسین'] is Not invasive.
ID: Reynoutria japonica Houtt., common names ['Japanese knotweed', 'Impossible to kill invasive demon plant', 'Mexican-bamboo'] is Not invasive.
ID: Alliaria petiolata (M.Bieb.) Cavara & Grande, common names ['Garlic Mustard', 'Jack-By-The-Hedge', 'Hedge-Garlic'] is Not invasive.
ID: Tulipa agenensis Redouté, common names ['Tulip', 'Common tulip'] is Not invasive.
ID: Quercus robur L., common names ['Common Oak', 'English oak', 'Pedunculate oak'] is Not invasive.
ID: Phlox subulata L., common names ['Moss phlox', 'Creeping phlox', 'Moss-pink'] is Not invasive.


Oh nooooo. Numbers 2-4 are all definitely invasive. Let's take another look at the matching.

In [64]:
invasives.loc[invasives['Scientific Name'] == 'wisteria sinensis']

Unnamed: 0,Scientific Name,Common Name,PISC Priority Score,PA Noxious Weed Rank,Invasive Assessment Score,Aquatic or Terrestrial,PLNA Economic Importance Score,DCNR Rank,EDRR
0,wisteria sinensis,Chinese Wisteria,3.6,,57.0,Terrestrial,4.2,2,No


In [65]:
fuzz.ratio('Wisteria sinensis (Sims) Sweet', 'wisteria sinensis')

68

In [70]:
invasives.loc[invasives['Scientific Name'].str[0] == 'r']

Unnamed: 0,Scientific Name,Common Name,PISC Priority Score,PA Noxious Weed Rank,Invasive Assessment Score,Aquatic or Terrestrial,PLNA Economic Importance Score,DCNR Rank,EDRR
23,rubus phoenicolasius,Wineberry,2.8,,86.0,Terrestrial,0.9,2,No
24,rosa multiflora,Multiflora Rose,6.2,B,89.0,Terrestrial,,1,No
25,rhodotypos scanden,Jetbead,2.6,,69.0,Terrestrial,1.3,1,No
26,rhamnus cathartica,Common Buckthorn,5.8,B,81.0,Terrestrial,2.3,1,No


Oops, need to find a more complete noxious weeds list. Problem for next time.

In [71]:
invasives.loc[invasives['Scientific Name'] == 'alliaria petiolata']

Unnamed: 0,Scientific Name,Common Name,PISC Priority Score,PA Noxious Weed Rank,Invasive Assessment Score,Aquatic or Terrestrial,PLNA Economic Importance Score,DCNR Rank,EDRR
139,alliaria petiolata,Garlic Mustard,6.1,B,84.0,Terrestrial,0.6,1,No


In [72]:
fuzz.ratio('Alliaria petiolata (M.Bieb.) Cavara & Grande','alliaria petiolata')

55

LAME

In [74]:
imgs = os.listdir('../data/img')
for img in imgs:
    species_id, commonNames, score = id_plant('../data/img/'+img) #organ always set to leaf -- to what extent does this matter?
    invasive = is_invasive(species_id, threshold = 50)
    print(f'ID: {species_id}, common names {commonNames} is {invasive}.')


ID: Pyrus calleryana Decne., common names ['Bradford Pear', 'Callery pear', 'Ornamental pear'] is Invasive.
ID: Wisteria sinensis (Sims) Sweet, common names ['Chinese Wisteria', 'پیچ گلیسین', 'Pruneau'] is Invasive.
ID: Reynoutria japonica Houtt., common names ['Japanese knotweed', 'Impossible to kill invasive demon plant', 'Mexican-bamboo'] is Invasive.
ID: Alliaria petiolata (M.Bieb.) Cavara & Grande, common names ['Garlic Mustard', 'Jack-By-The-Hedge', 'Hedge-Garlic'] is Invasive.
ID: Tulipa agenensis Redouté, common names ['Common tulip', 'Tulip'] is Not invasive.
ID: Quercus robur L., common names ['Common Oak', 'English oak', 'Pedunculate oak'] is Not invasive.
ID: Phlox subulata L., common names ['Moss phlox', 'Creeping phlox', 'Moss-pink'] is Not invasive.


In [76]:
matches = invasives['Scientific Name'].apply(lambda x: fuzz.ratio('Reynoutria japonica',x)).sort_values(ascending=False)
matches

92     67
66     67
18     63
111    59
71     58
       ..
19     11
116    11
57     10
105     6
32      5
Name: Scientific Name, Length: 148, dtype: int64

In [77]:
invasives.iloc[92]

Scientific Name                   fallopia japonica
Common Name                       Japanese Knotweed
PISC Priority Score                             8.0
PA Noxious Weed Rank                              B
Invasive Assessment Score                      98.0
Aquatic or Terrestrial                  Terrestrial
PLNA Economic Importance Score                  0.3
DCNR Rank                                         1
EDRR                                             No
Name: 92, dtype: object

Are there two different scientific names for Japanese Knotweed? There are! Science has let me down.