diff --git a/README.md b/README.md index ad83d52e..4e552757 100644 --- a/README.md +++ b/README.md @@ -5,14 +5,10 @@ A collection of tutorials for [Snorkel](http://snorkel.org). The Snorkel tutorials are grouped by application, with some applications having multiple associated notebooks in their directory. * `spam`: Is this YouTube comment spam? * `spouse`: Does this sentence imply that the two marked people are spouses? -* `scene_graph`: Is object A riding object B in the image, carrying it, or neither? +* `visual_relation`: Is object A riding object B in the image, carrying it, or neither? * `crowdsourcing`: Is this tweet about the weather expressing a positive, negative or neutral sentiment? -<<<<<<< HEAD -* `multitask` (Multi-Task Learning): A synthetic task demonstrating the native Snorkel multi-task classifier API -======= * `mtl` (Multi-Task Learning): A synthetic task demonstrating the native Snorkel multi-task classifier API * [`drybell`](https://ai.googleblog.com/2019/03/harnessing-organizational-knowledge-for.html): Is a celebrity mentioned in this news article? ->>>>>>> Add to tox and README See the [Tutorials Index](#tutorials-index) for a listing of which tutorials demonstrate which task types, techniques, and integrations. @@ -144,14 +140,13 @@ Here we provide an index pointing to different available tutorials by their task * Task * Text Classification (Text): `spam`, `crowdsourcing`, `drybell` * Relation Extraction (Text): `spouse` - * Visual Relationship Detection (Image): `scene_graph` + * Visual Relationship Detection (Image): `visual_relation` * Techniques - * Labeling with Labeling Functions (LFs): `spam`, `spouse`, `scene_graph`, `crowdsourcing`, `drybell` + * Labeling with Labeling Functions (LFs): `spam`, `spouse`, `visual_relation`, `crowdsourcing` * Augmentation with Transformation Functions (TFs): `spam` * Monitoring with Slicing Functions (SFs): `spam` * Using Crowd Worker Labels: `crowdsourcing` - * Multi-Task Learning (MTL): `multitask`, `scene_graph`, `spam` - * Large-Scale Production Pipeline: `drybell` + * Multi-Task Learning (MTL): `multitask`, `visual_relation`, `spam` * Integrations * TensorFlow/Keras: `spam`, `spouse` * Scikit-Learn: `spam`, `crowdsourcing` diff --git a/scene_graph/.notebooks b/scene_graph/.notebooks deleted file mode 100644 index a5cef3bd..00000000 --- a/scene_graph/.notebooks +++ /dev/null @@ -1 +0,0 @@ -vrd_tutorial \ No newline at end of file diff --git a/spam/01_spam_tutorial.ipynb b/spam/01_spam_tutorial.ipynb index 3f2ff034..d73ab13f 100644 --- a/spam/01_spam_tutorial.ipynb +++ b/spam/01_spam_tutorial.ipynb @@ -4916,7 +4916,7 @@ "If you enjoyed this tutorial and you've already checked out the Snorkel 101 Guide, check out the [`snorkel-tutorials` table of contents](https://github.com/snorkel-team/snorkel-tutorials#snorkel-tutorials) for other tutorials that you may find interesting, including demonstrations of how to use Snorkel\n", "\n", "* As part of a [hybrid crowdsourcing pipeline](https://github.com/snorkel-team/snorkel-tutorials/tree/master/crowdsourcing)\n", - "* For [scene-graph detection over images](https://github.com/snorkel-team/snorkel-tutorials/tree/master/scene_graph)\n", + "* For [scene-graph detection over images](https://github.com/snorkel-team/snorkel-tutorials/tree/master/visual_relation)\n", "* For [information extraction over text](https://github.com/snorkel-team/snorkel-tutorials/tree/master/spouse)\n", "* For [data augmentation](https://github.com/snorkel-team/snorkel-tutorials/tree/master/spam)\n", "\n", diff --git a/spam/01_spam_tutorial.py b/spam/01_spam_tutorial.py index 7ed44246..5f5c0f00 100644 --- a/spam/01_spam_tutorial.py +++ b/spam/01_spam_tutorial.py @@ -1006,7 +1006,7 @@ def plot_probabilities_histogram(Y): # If you enjoyed this tutorial and you've already checked out the Snorkel 101 Guide, check out the [`snorkel-tutorials` table of contents](https://github.com/snorkel-team/snorkel-tutorials#snorkel-tutorials) for other tutorials that you may find interesting, including demonstrations of how to use Snorkel # # * As part of a [hybrid crowdsourcing pipeline](https://github.com/snorkel-team/snorkel-tutorials/tree/master/crowdsourcing) -# * For [scene-graph detection over images](https://github.com/snorkel-team/snorkel-tutorials/tree/master/scene_graph) +# * For [scene-graph detection over images](https://github.com/snorkel-team/snorkel-tutorials/tree/master/visual_relation) # * For [information extraction over text](https://github.com/snorkel-team/snorkel-tutorials/tree/master/spouse) # * For [data augmentation](https://github.com/snorkel-team/snorkel-tutorials/tree/master/spam) # diff --git a/tox.ini b/tox.ini index b9399555..c24f8daa 100644 --- a/tox.ini +++ b/tox.ini @@ -3,6 +3,7 @@ skipsdist = true envlist = spouse, intro, + visual_relation, spam, scene_graph, crowdsourcing, @@ -19,7 +20,7 @@ deps = intro: -rintro/requirements.txt spam: -rspam/requirements.txt multitask: -rmultitask/requirements.txt - scene_graph: -rscene_graph/requirements.txt + visual_relation: -rvisual_relation/requirements.txt crowdsourcing: -rcrowdsourcing/requirements.txt recsys: -rrecsys/requirements.txt drybell: -rdrybell/requirements.txt @@ -32,7 +33,7 @@ commands = intro: python {toxinidir}/scripts/build.py {posargs:test} intro spam: python {toxinidir}/scripts/build.py {posargs:test} spam multitask: python {toxinidir}/scripts/build.py {posargs:test} multitask - scene_graph: python {toxinidir}/scripts/build.py {posargs:test} scene_graph + visual_relation: python {toxinidir}/scripts/build.py {posargs:test} visual_relation crowdsourcing: python {toxinidir}/scripts/build.py {posargs:test} crowdsourcing drybell: python {toxinidir}/scripts/build.py {posargs:test} drybell diff --git a/visual_relation/.notebooks b/visual_relation/.notebooks new file mode 100644 index 00000000..db3c6bcd --- /dev/null +++ b/visual_relation/.notebooks @@ -0,0 +1 @@ +visual_relation_tutorial \ No newline at end of file diff --git a/scene_graph/__init__.py b/visual_relation/__init__.py similarity index 100% rename from scene_graph/__init__.py rename to visual_relation/__init__.py diff --git a/visual_relation/download_full_data.sh b/visual_relation/download_full_data.sh new file mode 100644 index 00000000..d1aaeb37 --- /dev/null +++ b/visual_relation/download_full_data.sh @@ -0,0 +1,37 @@ +# Execute from snorkel-tutorials/ +# Download data, + +ANNOTATIONS_URL="https://www.dropbox.com/s/bnfhm6kt9xumik8/vrd.zip" +IMAGES_URL="http://imagenet.stanford.edu/internal/jcjohns/visual_relations/sg_dataset.zip" +SAMPLE_IMAGES_URL="https://github.com/Prof-Lu-Cewu/Visual-Relationship-Detection.git" +GLOVE_URL="http://nlp.stanford.edu/data/wordvecs/glove.6B.zip" + +if [ ! -d "visual_relation/data" ]; then + mkdir -p visual_relation/data + cd visual_relation/data + + # download and unzip metadata and annotations + wget $ANNOTATIONS_URL + unzip vrd.zip + + # Delete the zip files. + rm vrd.zip + cd VRD + + # Download and unzip all images + wget $IMAGES_URL + unzip sg_dataset.zip + rm sg_dataset.zip + cd ../../.. + + mkdir -p visual_relation/data/glove + cd visual_relation/data/glove + + wget $GLOVE_URL + unzip glove.6B.zip + + # Delete the zip files + rm glove.6B.zip + cd ../../.. +fi + diff --git a/scene_graph/download_data.sh b/visual_relation/download_sample_data.sh similarity index 54% rename from scene_graph/download_data.sh rename to visual_relation/download_sample_data.sh index 315ad8fb..8f99e562 100755 --- a/scene_graph/download_data.sh +++ b/visual_relation/download_sample_data.sh @@ -12,20 +12,20 @@ DIRS=("glove" "VRD/sg_dataset/samples") # Check if at least any file is missing. If so, reload all data. for directory_name in "${DIRS[@]}" do - if [ ! -d "scene_graph/data/$directory_name" ]; then + if [ ! -d "visual_relation/data/$directory_name" ]; then RELOAD=true fi done ANNOTATIONS_URL="https://www.dropbox.com/s/bnfhm6kt9xumik8/vrd.zip" -IMAGES_URL="http://imagenet.stanford.edu/internal/jcjohns/scene_graphs/sg_dataset.zip" +IMAGES_URL="http://imagenet.stanford.edu/internal/jcjohns/visual_relations/sg_dataset.zip" SAMPLE_IMAGES_URL="https://github.com/Prof-Lu-Cewu/Visual-Relationship-Detection.git" GLOVE_URL="http://nlp.stanford.edu/data/wordvecs/glove.6B.zip" -if [ ! -d "scene_graph/data" ]; then - if [ -d "scene_graph/data/" ]; then rm -Rf "scene_graph/data/"; fi - mkdir -p scene_graph/data - cd scene_graph/data +if [ ! -d "visual_relation/data" ]; then + if [ -d "visual_relation/data/" ]; then rm -Rf "visual_relation/data/"; fi + mkdir -p visual_relation/data + cd visual_relation/data # download and unzip metadata and annotations wget $ANNOTATIONS_URL @@ -35,23 +35,7 @@ if [ ! -d "scene_graph/data" ]; then rm vrd.zip cd VRD - # if [ "$TRAVIS" = "true" ]; then - # # Download and unzip sample images - # mkdir sg_dataset - # cd sg_dataset - # git clone $SAMPLE_IMAGES_URL - # mv Visual-Relationship-Detection/samples ./ - # rm -r Visual-Relationship-Detection - # cd ../.. - # else - # # Download and unzip all images - # wget $IMAGES_URL - # unzip sg_dataset.zip - # rm sg_dataset.zip - # cd ../../.. - # fi - - # Download and unzip all images + # Download and unzip sample images mkdir sg_dataset cd sg_dataset git clone $SAMPLE_IMAGES_URL @@ -59,8 +43,8 @@ if [ ! -d "scene_graph/data" ]; then rm -r Visual-Relationship-Detection cd ../../../.. - mkdir -p scene_graph/data/glove - cd scene_graph/data/glove + mkdir -p visual_relation/data/glove + cd visual_relation/data/glove wget $GLOVE_URL unzip glove.6B.zip diff --git a/scene_graph/model.py b/visual_relation/model.py similarity index 97% rename from scene_graph/model.py rename to visual_relation/model.py index a5687b06..cad95b89 100644 --- a/scene_graph/model.py +++ b/visual_relation/model.py @@ -75,7 +75,7 @@ def __init__( "sub_category": df["subject_category"].tolist(), } Y_dict = { - "scene_graph_task": torch.LongTensor(df["label"].to_numpy()) + "visual_relation_task": torch.LongTensor(df["label"].to_numpy()) } # change to take in the rounded train labels super(SceneGraphDataset, self).__init__(name, split, X_dict, Y_dict) @@ -123,7 +123,7 @@ def __len__(self): class WordEmb(nn.Module): """Extract and concat word embeddings for obj and sub categories.""" - def __init__(self, glove_fn="scene_graph/data/glove/glove.6B.100d.txt"): + def __init__(self, glove_fn="visual_relation/data/glove/glove.6B.100d.txt"): super(WordEmb, self).__init__() self.word_embs = pandas.read_csv( diff --git a/scene_graph/requirements.txt b/visual_relation/requirements.txt similarity index 100% rename from scene_graph/requirements.txt rename to visual_relation/requirements.txt diff --git a/scene_graph/utils.py b/visual_relation/utils.py similarity index 72% rename from scene_graph/utils.py rename to visual_relation/utils.py index 72a2cd2b..aa2d9633 100644 --- a/scene_graph/utils.py +++ b/visual_relation/utils.py @@ -55,24 +55,42 @@ def vrd_to_pandas( # %% -def load_vrd_data(): +def load_vrd_data(sample=False, is_travis=False): """Download and load Pandas DataFrame of VRD relationships. NOTE: Only loads semantic relationship examples. """ - try: - subprocess.run( - ["bash", "scene_graph/download_data.sh"], check=True, stderr=subprocess.PIPE - ) - except subprocess.CalledProcessError as e: - print(e.stderr.decode()) - raise e - - relationships_train = json.load(open("scene_graph/data/VRD/annotations_train.json")) - relationships_test = json.load(open("scene_graph/data/VRD/annotations_test.json")) - objects = json.load(open("scene_graph/data/VRD/objects.json")) - predicates = json.load(open("scene_graph/data/VRD/predicates.json")) + if sample or is_travis: + try: + subprocess.run( + ["bash", "visual_relation/download_sample_data.sh"], + check=True, + stderr=subprocess.PIPE, + ) + except subprocess.CalledProcessError as e: + print(e.stderr.decode()) + raise e + else: + try: + subprocess.run( + ["bash", "visual_relation/download_full_data.sh"], + check=True, + stderr=subprocess.PIPE, + ) + except subprocess.CalledProcessError as e: + print(e.stderr.decode()) + raise e + + relationships_train = json.load( + open("visual_relation/data/VRD/annotations_train.json") + ) + relationships_test = json.load( + open("visual_relation/data/VRD/annotations_test.json") + ) + + objects = json.load(open("visual_relation/data/VRD/objects.json")) + predicates = json.load(open("visual_relation/data/VRD/predicates.json")) semantic_predicates = [ "carry", "cover", @@ -99,9 +117,9 @@ def load_vrd_data(): } # TODO: hack to work with small sample of data for tox - if os.path.isdir("scene_graph/data/VRD/sg_dataset/samples"): + if os.path.isdir("visual_relation/data/VRD/sg_dataset/samples"): # pass in list of images as keys_list - keys_list = os.listdir("scene_graph/data/VRD/sg_dataset/samples") + keys_list = os.listdir("visual_relation/data/VRD/sg_dataset/samples") test_df = vrd_to_pandas( relationships_test, objects, @@ -110,7 +128,7 @@ def load_vrd_data(): keys_list=keys_list, ) return test_df, test_df, test_df - elif os.path.isdir("scene_graph/data/VRD/sg_dataset/sg_train_images"): + elif os.path.isdir("visual_relation/data/VRD/sg_dataset/sg_train_images"): train_df = vrd_to_pandas( relationships_train, objects, diff --git a/scene_graph/vrd_tutorial.ipynb b/visual_relation/visual_relation_tutorial.ipynb similarity index 84% rename from scene_graph/vrd_tutorial.ipynb rename to visual_relation/visual_relation_tutorial.ipynb index 25447741..9305a4d7 100644 --- a/scene_graph/vrd_tutorial.ipynb +++ b/visual_relation/visual_relation_tutorial.ipynb @@ -25,7 +25,7 @@ "source": [ "import os\n", "\n", - "if os.path.basename(os.getcwd()) == \"scene_graph\":\n", + "if os.path.basename(os.getcwd()) == \"visual_relation\":\n", " os.chdir(\"..\")" ] }, @@ -34,36 +34,34 @@ "metadata": {}, "source": [ "### 1. Load Dataset\n", - "We load the VRD dataset and filter images with at least one action predicate in it, since these are more difficult to classify than geometric relationships like `above` or `next to`. We load the train, valid, and test sets as Pandas DataFrame objects with the following fields:\n", + "We load the VRD dataset and filter images with at least one action predicate in it, since these are more difficult to classify than geometric relationships like `above` or `next to`. We load the train, valid, and test sets as Pandas `DataFrame` objects with the following fields:\n", "- `label`: The relationship between the objects. 0: `RIDE`, 1: `CARRY`, 2: `OTHER` action predicates\n", "- `object_bbox`: coordinates of the bounding box for the object `[ymin, ymax, xmin, xmax]`\n", "- `object_category`: category of the object\n", "- `source_img`: filename for the corresponding image the relationship is in\n", "- `subject_bbox`: coordinates of the bounding box for the object `[ymin, ymax, xmin, xmax]`\n", - "- `subject_category`: category of the subject\n", - "\n", - "Note that the training DataFrame will have a labels field with all -1s. This denotes the lack of labels for that particular dataset. In this tutorial, we will assign probabilistic labels to the training set by writing labeling functions over attributes of the subject and objects!" + "- `subject_category`: category of the subject" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "The autoreload extension is already loaded. To reload it, use:\n", - " %reload_ext autoreload\n" - ] - } - ], + "outputs": [], "source": [ "%load_ext autoreload\n", "%autoreload 2\n", "\n", - "import numpy as np" + "import numpy as np\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "If you are running this notebook for the first time, it will take ~15 mins to download all the required sample data.\n", + "\n", + "The sampled version of the dataset **uses the same 26 examples across the train, dev, and test sets. This setting is meant to demonstrate how Snorkel works with this task, not to demonstrate performance.**" ] }, { @@ -82,21 +80,33 @@ } ], "source": [ - "from scene_graph.utils import load_vrd_data\n", + "from visual_relation.utils import load_vrd_data\n", "\n", - "train_df, valid_df, test_df = load_vrd_data()\n", + "# setting sample=False will take ~3 hours to run (downloads full VRD dataset)\n", + "sample = True\n", + "is_travis = \"TRAVIS\" in os.environ\n", + "train_df, valid_df, test_df = load_vrd_data(sample, is_travis)\n", "\n", "print(\"Train Relationships: \", len(train_df))\n", "print(\"Dev Relationships: \", len(valid_df))\n", "print(\"Test Relationships: \", len(test_df))" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Note that the training `DataFrame` will have a labels field with all -1s. This denotes the lack of labels for that particular dataset. In this tutorial, we will assign probabilistic labels to the training set by writing labeling functions over attributes of the subject and objects!" + ] + }, { "cell_type": "markdown", "metadata": {}, "source": [ "## 2. Writing Labeling Functions\n", - "We now write labeling functions to detect what relationship exists between pairs of bounding boxes. To do so, we can encode various intuitions into the labeling functions. _Categorical_ intution: knowledge about the categories of subjects and objects usually involved in these relationships (e.g., `person` is usually the subject for predicates like `ride` and `carry`), and _spatial_ intuition: knowledge about the relative positions of the subject and objects (e.g., subject is usually higher than the object for the predicate `ride`)." + "We now write labeling functions to detect what relationship exists between pairs of bounding boxes. To do so, we can encode various intuitions into the labeling functions:\n", + "* _Categorical_ intution: knowledge about the categories of subjects and objects usually involved in these relationships (e.g., `person` is usually the subject for predicates like `ride` and `carry`)\n", + "* _Spatial_ intuition: knowledge about the relative positions of the subject and objects (e.g., subject is usually higher than the object for the predicate `ride`)" ] }, { @@ -234,7 +244,7 @@ "output_type": "stream", "text": [ "\r", - "100%|██████████| 26/26 [00:00<00:00, 1988.80it/s]" + "100%|██████████| 26/26 [00:00<00:00, 1588.57it/s]" ] }, { @@ -251,7 +261,7 @@ "output_type": "stream", "text": [ "\r", - "100%|██████████| 26/26 [00:00<00:00, 2442.48it/s]" + "100%|██████████| 26/26 [00:00<00:00, 2890.55it/s]" ] }, { @@ -468,7 +478,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "We use [F1](https://scikit-learn.org/stable/modules/generated/sklearn.metrics.f1_score.html) Micro average for the multiclass setting, which calculates metrics globally by counting the total true positives, false negatives and false positives." + "We use [F1](https://scikit-learn.org/stable/modules/generated/sklearn.metrics.f1_score.html) Micro average for the multiclass setting, which calculates metrics globally across classes, by counting the total true positives, false negatives and false positives." ] }, { @@ -513,12 +523,15 @@ "outputs": [], "source": [ "from snorkel.classification import DictDataLoader\n", - "from scene_graph.model import FlatConcat, SceneGraphDataset, WordEmb, init_fc\n", + "from visual_relation.model import FlatConcat, SceneGraphDataset, WordEmb, init_fc\n", "\n", - "# change to \"scene_graph/data/VRD/sg_dataset/sg_train_images\" for full set\n", - "TRAIN_DIR = \"scene_graph/data/VRD/sg_dataset/samples\"\n", "train_df[\"labels\"] = label_model.predict(L_train)\n", "\n", + "if sample:\n", + " TRAIN_DIR = \"visual_relation/data/VRD/sg_dataset/samples\"\n", + "else:\n", + " TRAIN_DIR = \"visual_relation/data/VRD/sg_dataset/sg_train_images\"\n", + "\n", "train_dl = DictDataLoader(\n", " SceneGraphDataset(\"train_dataset\", \"train\", TRAIN_DIR, train_df),\n", " batch_size=16,\n", @@ -585,12 +598,12 @@ "metadata": {}, "outputs": [], "source": [ - "from scene_graph.model import get_task_flow\n", + "from visual_relation.model import get_task_flow\n", "\n", "# define task flow through modules\n", "task_flow = get_task_flow()\n", "pred_cls_task = Task(\n", - " name=\"scene_graph_task\",\n", + " name=\"visual_relation_task\",\n", " module_pool=module_pool,\n", " task_flow=task_flow,\n", " scorer=Scorer(metrics=[\"f1_micro\"]),\n", @@ -621,10 +634,10 @@ "name": "stderr", "output_type": "stream", "text": [ - "/Users/braden/repos/snorkel-tutorials/scene_graph/model.py:134: FutureWarning: Method .as_matrix will be removed in a future version. Use .values instead.\n", + "/Users/paroma/Documents/Projects/snorkel-tutorials/visual_relation/model.py:134: FutureWarning: Method .as_matrix will be removed in a future version. Use .values instead.\n", " return self.word_embs.loc[word].as_matrix()\n", "\r", - "Epoch 0:: 0%| | 0/2 [00:04 object` (e.g. `person bike`). @@ -11,53 +26,62 @@ # # In the examples of the relationships shown below, the red box represents the _subject_ while the green box represents the _object_. The _predicate_ (e.g. kick) denotes what relationship connects the subject and the object. -# %% +# + import os -if os.path.basename(os.getcwd()) == "scene_graph": +if os.path.basename(os.getcwd()) == "visual_relation": os.chdir("..") +# - -# %% [markdown] # ### 1. Load Dataset -# We load the VRD dataset and filter images with at least one action predicate in it, since these are more difficult to classify than geometric relationships like `above` or `next to`. We load the train, valid, and test sets as Pandas DataFrame objects with the following fields: +# We load the VRD dataset and filter images with at least one action predicate in it, since these are more difficult to classify than geometric relationships like `above` or `next to`. We load the train, valid, and test sets as Pandas `DataFrame` objects with the following fields: # - `label`: The relationship between the objects. 0: `RIDE`, 1: `CARRY`, 2: `OTHER` action predicates # - `object_bbox`: coordinates of the bounding box for the object `[ymin, ymax, xmin, xmax]` # - `object_category`: category of the object # - `source_img`: filename for the corresponding image the relationship is in # - `subject_bbox`: coordinates of the bounding box for the object `[ymin, ymax, xmin, xmax]` # - `subject_category`: category of the subject -# -# Note that the training DataFrame will have a labels field with all -1s. This denotes the lack of labels for that particular dataset. In this tutorial, we will assign probabilistic labels to the training set by writing labeling functions over attributes of the subject and objects! -# %% +# + # %load_ext autoreload # %autoreload 2 import numpy as np -# %% -from scene_graph.utils import load_vrd_data +# - + +# If you are running this notebook for the first time, it will take ~15 mins to download all the required sample data. +# +# The sampled version of the dataset **uses the same 26 examples across the train, dev, and test sets. This setting is meant to demonstrate how Snorkel works with this task, not to demonstrate performance.** + +# + +from visual_relation.utils import load_vrd_data -train_df, valid_df, test_df = load_vrd_data() +# setting sample=False will take ~3 hours to run (downloads full VRD dataset) +sample = True +is_travis = "TRAVIS" in os.environ +train_df, valid_df, test_df = load_vrd_data(sample, is_travis) print("Train Relationships: ", len(train_df)) print("Dev Relationships: ", len(valid_df)) print("Test Relationships: ", len(test_df)) +# - + +# Note that the training `DataFrame` will have a labels field with all -1s. This denotes the lack of labels for that particular dataset. In this tutorial, we will assign probabilistic labels to the training set by writing labeling functions over attributes of the subject and objects! -# %% [markdown] # ## 2. Writing Labeling Functions -# We now write labeling functions to detect what relationship exists between pairs of bounding boxes. To do so, we can encode various intuitions into the labeling functions. _Categorical_ intution: knowledge about the categories of subjects and objects usually involved in these relationships (e.g., `person` is usually the subject for predicates like `ride` and `carry`), and _spatial_ intuition: knowledge about the relative positions of the subject and objects (e.g., subject is usually higher than the object for the predicate `ride`). +# We now write labeling functions to detect what relationship exists between pairs of bounding boxes. To do so, we can encode various intuitions into the labeling functions: +# * _Categorical_ intution: knowledge about the categories of subjects and objects usually involved in these relationships (e.g., `person` is usually the subject for predicates like `ride` and `carry`) +# * _Spatial_ intuition: knowledge about the relative positions of the subject and objects (e.g., subject is usually higher than the object for the predicate `ride`) -# %% RIDE = 0 CARRY = 1 OTHER = 2 ABSTAIN = -1 -# %% [markdown] # We begin with labeling functions that encode categorical intuition: we use knowledge about common subject-object category pairs that are common for `RIDE` and `CARRY` and also knowledge about what subjects or objects are unlikely to be involved in the two relationships. -# %% +# + from snorkel.labeling import labeling_function # Category-based LFs @@ -100,10 +124,11 @@ def LF_person(x): return ABSTAIN -# %% [markdown] +# - + # We now encode our spatial intuition, which includes measuring the distance between the bounding boxes and comparing their relative areas. -# %% +# + # Distance-based LFs @labeling_function() def LF_ydist(x): @@ -134,10 +159,11 @@ def LF_area(x): return ABSTAIN -# %% [markdown] +# - + # Note that the labeling functions have varying empirical accuracies and coverages. Due to class imbalance in our chosen relationships, labeling functions that label the `OTHER` class have higher coverage than labeling functions for `RIDE` or `CARRY`. This reflects the distribution of classes in the dataset as well. -# %% +# + from snorkel.labeling import PandasLFApplier lfs = [ @@ -155,43 +181,43 @@ def LF_area(x): L_train = applier.apply(train_df) L_valid = applier.apply(valid_df) -# %% +# + from snorkel.labeling import LFAnalysis Y_valid = valid_df.label.values LFAnalysis(L_valid, lfs).lf_summary(Y_valid) +# - -# %% [markdown] # ## 3. Train Label Model # We now train a multi-class `LabelModel` to assign training labels to the unalabeled training set. -# %% +# + from snorkel.labeling import LabelModel label_model = LabelModel(cardinality=3, verbose=True) label_model.fit(L_train, seed=123, lr=0.01, log_freq=10, n_epochs=100) +# - -# %% [markdown] -# We use [F1](https://scikit-learn.org/stable/modules/generated/sklearn.metrics.f1_score.html) Micro average for the multiclass setting, which calculates metrics globally by counting the total true positives, false negatives and false positives. +# We use [F1](https://scikit-learn.org/stable/modules/generated/sklearn.metrics.f1_score.html) Micro average for the multiclass setting, which calculates metrics globally across classes, by counting the total true positives, false negatives and false positives. -# %% label_model.score(L_valid, Y_valid, metrics=["f1_micro"]) -# %% [markdown] # ## 4. Train a Classifier # You can then use these training labels to train any standard discriminative model, such as [an off-the-shelf ResNet](https://github.com/KaimingHe/deep-residual-networks), which should learn to generalize beyond the LF's we've developed! -# %% [markdown] # #### Create DataLoaders for Classifier -# %% +# + from snorkel.classification import DictDataLoader -from scene_graph.model import FlatConcat, SceneGraphDataset, WordEmb, init_fc +from visual_relation.model import FlatConcat, SceneGraphDataset, WordEmb, init_fc -# change to "scene_graph/data/VRD/sg_dataset/sg_train_images" for full set -TRAIN_DIR = "scene_graph/data/VRD/sg_dataset/samples" train_df["labels"] = label_model.predict(L_train) +if sample: + TRAIN_DIR = "visual_relation/data/VRD/sg_dataset/samples" +else: + TRAIN_DIR = "visual_relation/data/VRD/sg_dataset/sg_train_images" + train_dl = DictDataLoader( SceneGraphDataset("train_dataset", "train", TRAIN_DIR, train_df), batch_size=16, @@ -203,11 +229,11 @@ def LF_area(x): batch_size=16, shuffle=False, ) +# - -# %% [markdown] # #### Define Model Architecture -# %% +# + import torchvision.models as models import torch.nn as nn @@ -241,32 +267,34 @@ def LF_area(x): } ) -# %% -from scene_graph.model import get_task_flow +# + +from visual_relation.model import get_task_flow # define task flow through modules task_flow = get_task_flow() pred_cls_task = Task( - name="scene_graph_task", + name="visual_relation_task", module_pool=module_pool, task_flow=task_flow, scorer=Scorer(metrics=["f1_micro"]), ) +# - -# %% [markdown] # ### Train and Evaluate Model -# %% +# + from snorkel.classification import MultitaskClassifier, Trainer model = MultitaskClassifier([pred_cls_task]) trainer = Trainer( - n_epochs=1, + n_epochs=1, # increase for improved performance lr=1e-3, checkpointing=True, checkpointer_config={"checkpoint_dir": "checkpoint"}, ) trainer.fit(model, [train_dl]) +# - -# %% model.score([valid_dl]) + +# We have successfully trained a visual relationship detection model! Using categorical and spatial intuition about how objects in a visual relationship interact with each other, we are able to assign high quality training labels to object pairs in the VRD dataset in a multi-class classification setting.