Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Browse files
Browse the repository at this point in the history
Add wordnet helpers and rename synsets properties
- Loading branch information
Ludwig Schubert
committed
Jan 23, 2019
1 parent
b24b223
commit 9933879
Showing
7 changed files
with
174 additions
and
27 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,104 @@ | ||
# Copyright 2019 The Lucid Authors. All Rights Reserved. | ||
# | ||
# Licensed under the Apache License, Version 2.0 (the "License"); | ||
# you may not use this file except in compliance with the License. | ||
# You may obtain a copy of the License at | ||
# | ||
# http://www.apache.org/licenses/LICENSE-2.0 | ||
# | ||
# Unless required by applicable law or agreed to in writing, software | ||
# distributed under the License is distributed on an "AS IS" BASIS, | ||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
# See the License for the specific language governing permissions and | ||
# limitations under the License. | ||
# ============================================================================== | ||
"""Helpers for using WordNet Synsets. | ||
When comparing different models, be aware that they may encode their predictions in | ||
different orders. Do not compare outputs of models without ensuring their outputs are | ||
in the same order! We recommend relying on WordNet's synsets to uniquely identify a | ||
label. Let's clarify these terms: | ||
## Labels ("Labrador Retriever") | ||
Label are totally informal and vary between implementations. We aim to provide a list of | ||
model labels in the `.labels` property. These may include differen labels in different | ||
orders for each model. | ||
For translating between textual labels and synsets, plase use the labels and synsets | ||
collections on models. There's no other foolproof way of goinfg from a descriptive text | ||
label to a precise synset definition. | ||
## Synset IDs ("n02099712") | ||
Synset IDs are identifiers used by the ILSVRC2012 ImageNet classification contest. | ||
We provide `id_from_synset()` to format them correctly. | ||
## Synsets Names ('labrador_retriever.n.01') | ||
Synset names are a wordnet internal concept. When youw ant to create a synset but don't | ||
know its precise name, we offer `imagenet_synset_from_description()` to search for a | ||
synset containing the description in its name that is also one of the synsets used for | ||
the ILSVRC2012. | ||
## Label indexes (logits[i]) | ||
When obtaining predictions from a model, they will often be provided in the form of a | ||
BATCH by NUM_CLASSES multidimensional array. In order to map those to human readable | ||
strings, please use a model's `.labels` or `.synsets` or `.synset_ids` property. We aim | ||
to provide these in the same ordering as the model was trained on. Unfortunately these | ||
may be subtly different between models. | ||
""" | ||
|
||
from cachetools.func import lru_cache | ||
|
||
import nltk | ||
nltk.download("wordnet") | ||
from nltk.corpus import wordnet as wn | ||
|
||
from lucid.misc.io import load | ||
|
||
|
||
IMAGENET_SYNSETS_PATH = "gs://modelzoo/labels/ImageNet_standard_synsets.txt" | ||
|
||
|
||
def id_from_synset(synset): | ||
return f"{synset.pos()}{synset.offset():08}" | ||
|
||
|
||
def synset_from_id(id_str): | ||
assert len(id_str) == 1 + 8 | ||
pos, offset = id_str[0], int(id_str[1:]) | ||
return wn.synset_from_pos_and_offset(pos, offset) | ||
|
||
|
||
@lru_cache(maxsize=1) | ||
def imagenet_synset_ids(): | ||
return load(IMAGENET_SYNSETS_PATH, split=True) | ||
|
||
|
||
@lru_cache(maxsize=1) | ||
def imagenet_synsets(): | ||
return [synset_from_id(id) for id in imagenet_synset_ids()] | ||
|
||
|
||
@lru_cache() | ||
def imagenet_synset_from_description(search_term): | ||
names_and_synsets = [(synset.name(), synset) for synset in imagenet_synsets()] | ||
candidates = [ | ||
synset for (name, synset) in names_and_synsets if search_term.lower().replace(' ', '_') in name | ||
] | ||
hits = len(candidates) | ||
if hits == 1: | ||
return candidates[0] | ||
if hits == 0: | ||
message = "Could not find any imagenet synset with search term {}." | ||
raise ValueError(message.format(search_term)) | ||
else: | ||
message = "Found {} imagenet synsets with search term {}: {}." | ||
names = [synset.name() for synset in candidates] | ||
raise ValueError(message.format(hits, search_term, ", ".join(names))) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -2,7 +2,7 @@ | |
description-file = README.md | ||
|
||
[aliases] | ||
test = pytest -s | ||
test=pytest | ||
|
||
[flake8] | ||
ignore = E501,E731,E111 | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,56 @@ | ||
import pytest | ||
|
||
import nltk | ||
|
||
nltk.download("wordnet") | ||
from nltk.corpus import wordnet as wn | ||
|
||
from lucid.modelzoo.wordnet import ( | ||
id_from_synset, | ||
synset_from_id, | ||
imagenet_synset_ids, | ||
imagenet_synsets, | ||
imagenet_synset_from_description, | ||
) | ||
|
||
|
||
@pytest.fixture() | ||
def synset(): | ||
return wn.synset("great_white_shark.n.01") | ||
|
||
|
||
@pytest.fixture() | ||
def synset_id(): | ||
return "n01484850" | ||
|
||
|
||
def test_id_from_synset(synset, synset_id): | ||
result = id_from_synset(synset) | ||
assert result == synset_id | ||
|
||
|
||
def test_synset_from_id(synset_id, synset): | ||
result = synset_from_id(synset_id) | ||
assert result == synset | ||
|
||
|
||
def test_imagenet_synset_ids(synset_id): | ||
synset_ids = imagenet_synset_ids() | ||
assert len(synset_ids) == 1000 | ||
assert synset_id in synset_ids | ||
|
||
|
||
def test_imagenet_synsets(synset): | ||
synsets = imagenet_synsets() | ||
assert len(synsets) == 1000 | ||
assert synset in synsets | ||
|
||
|
||
def test_imagenet_synset_from_description(synset): | ||
synset_from_description = imagenet_synset_from_description("white shark") | ||
assert synset == synset_from_description | ||
|
||
|
||
def test_imagenet_synset_from_description_raises(synset): | ||
with pytest.raises(ValueError, match=r'.*great_white_shark.*tiger_shark.*'): | ||
imagenet_synset_from_description("shark") |