Rename scene_graph and vrd to visual_relation (#66)

snorkel-team · Aug 11, 2019 · 0bbdfc7 · 0bbdfc7
1 parent e91a833
commit 0bbdfc7
Show file tree

Hide file tree

Showing 14 changed files with 61 additions and 62 deletions.
diff --git a/README.md b/README.md
@@ -5,14 +5,10 @@ A collection of tutorials for [Snorkel](http://snorkel.org).
 The Snorkel tutorials are grouped by application, with some applications having multiple associated notebooks in their directory.
 * `spam`: Is this YouTube comment spam?
 * `spouse`: Does this sentence imply that the two marked people are spouses?
-* `scene_graph`: Is object A riding object B in the image, carrying it, or neither?
+* `visual_relation`: Is object A riding object B in the image, carrying it, or neither?
 * `crowdsourcing`: Is this tweet about the weather expressing a positive, negative or neutral sentiment?
-<<<<<<< HEAD
-* `multitask` (Multi-Task Learning): A synthetic task demonstrating the native Snorkel multi-task classifier API
-=======
 * `mtl` (Multi-Task Learning): A synthetic task demonstrating the native Snorkel multi-task classifier API
 * [`drybell`](https://ai.googleblog.com/2019/03/harnessing-organizational-knowledge-for.html): Is a celebrity mentioned in this news article?
->>>>>>> Add to tox and README
 
 See the [Tutorials Index](#tutorials-index) for a listing of which tutorials demonstrate which task types, techniques, and integrations.
 
@@ -144,14 +140,13 @@ Here we provide an index pointing to different available tutorials by their task
 * Task
     * Text Classification (Text): `spam`, `crowdsourcing`, `drybell`
     * Relation Extraction (Text): `spouse`
-    * Visual Relationship Detection (Image): `scene_graph`
+    * Visual Relationship Detection (Image): `visual_relation`
 * Techniques
-    * Labeling with Labeling Functions (LFs): `spam`, `spouse`, `scene_graph`, `crowdsourcing`, `drybell`
+    * Labeling with Labeling Functions (LFs): `spam`, `spouse`, `visual_relation`, `crowdsourcing`
     * Augmentation with Transformation Functions (TFs): `spam`
     * Monitoring with Slicing Functions (SFs): `spam`
     * Using Crowd Worker Labels: `crowdsourcing`
-    * Multi-Task Learning (MTL): `multitask`, `scene_graph`, `spam`
-    * Large-Scale Production Pipeline: `drybell`
+    * Multi-Task Learning (MTL): `multitask`, `visual_relation`, `spam`
 * Integrations
     * TensorFlow/Keras: `spam`, `spouse`
     * Scikit-Learn: `spam`, `crowdsourcing`

diff --git a/scene_graph/.notebooks b/scene_graph/.notebooks
diff --git a/spam/01_spam_tutorial.ipynb b/spam/01_spam_tutorial.ipynb
diff --git a/spam/01_spam_tutorial.py b/spam/01_spam_tutorial.py
@@ -1006,7 +1006,7 @@ def plot_probabilities_histogram(Y):
 # If you enjoyed this tutorial and you've already checked out the Snorkel 101 Guide, check out the [`snorkel-tutorials` table of contents](https://github.com/snorkel-team/snorkel-tutorials#snorkel-tutorials) for other tutorials that you may find interesting, including demonstrations of how to use Snorkel
 #
 # * As part of a [hybrid crowdsourcing pipeline](https://github.com/snorkel-team/snorkel-tutorials/tree/master/crowdsourcing)
-# * For [scene-graph detection over images](https://github.com/snorkel-team/snorkel-tutorials/tree/master/scene_graph)
+# * For [scene-graph detection over images](https://github.com/snorkel-team/snorkel-tutorials/tree/master/visual_relation)
 # * For [information extraction over text](https://github.com/snorkel-team/snorkel-tutorials/tree/master/spouse)
 # * For [data augmentation](https://github.com/snorkel-team/snorkel-tutorials/tree/master/spam)
 #

diff --git a/tox.ini b/tox.ini
@@ -2,7 +2,7 @@
 skipsdist = true
 envlist =
     spouse,
-    scene_graph,
+    visual_relation,
     spam,
     crowdsourcing,
     mtl,
@@ -17,7 +17,7 @@ deps =
     spouse: -rspouse/requirements.txt
     spam: -rspam/requirements.txt
     multitask: -rmultitask/requirements.txt
-    scene_graph: -rscene_graph/requirements.txt
+    visual_relation: -rvisual_relation/requirements.txt
     crowdsourcing: -rcrowdsourcing/requirements.txt
     recsys: -rrecsys/requirements.txt
     drybell: -rdrybell/requirements.txt
@@ -29,7 +29,7 @@ commands =
     spouse: python {toxinidir}/scripts/build.py {posargs:test} spouse
     spam: python {toxinidir}/scripts/build.py {posargs:test} spam
     multitask: python {toxinidir}/scripts/build.py {posargs:test} multitask
-    scene_graph: python {toxinidir}/scripts/build.py {posargs:test} scene_graph
+    visual_relation: python {toxinidir}/scripts/build.py {posargs:test} visual_relation
     crowdsourcing: python {toxinidir}/scripts/build.py {posargs:test} crowdsourcing
     drybell: python {toxinidir}/scripts/build.py {posargs:test} drybell
 

diff --git a/visual_relation/.notebooks b/visual_relation/.notebooks
@@ -0,0 +1 @@
+visual_relation_tutorial
diff --git a/scene_graph/__init__.py → visual_relation/__init__.py b/scene_graph/__init__.py → visual_relation/__init__.py
diff --git a/scene_graph/download_full_data.sh → visual_relation/download_full_data.sh b/scene_graph/download_full_data.sh → visual_relation/download_full_data.sh
@@ -2,13 +2,13 @@
 # Download data,
 
 ANNOTATIONS_URL="https://www.dropbox.com/s/bnfhm6kt9xumik8/vrd.zip"
-IMAGES_URL="http://imagenet.stanford.edu/internal/jcjohns/scene_graphs/sg_dataset.zip"
+IMAGES_URL="http://imagenet.stanford.edu/internal/jcjohns/visual_relations/sg_dataset.zip"
 SAMPLE_IMAGES_URL="https://github.com/Prof-Lu-Cewu/Visual-Relationship-Detection.git"
 GLOVE_URL="http://nlp.stanford.edu/data/wordvecs/glove.6B.zip"
 
-if [ ! -d "scene_graph/data" ]; then
-    mkdir -p scene_graph/data
-    cd scene_graph/data
+if [ ! -d "visual_relation/data" ]; then
+    mkdir -p visual_relation/data
+    cd visual_relation/data
 
     # download and unzip metadata and annotations
     wget $ANNOTATIONS_URL
@@ -24,8 +24,8 @@ if [ ! -d "scene_graph/data" ]; then
     rm sg_dataset.zip
     cd ../../..
 
-    mkdir -p scene_graph/data/glove
-    cd scene_graph/data/glove
+    mkdir -p visual_relation/data/glove
+    cd visual_relation/data/glove
 
     wget $GLOVE_URL
     unzip glove.6B.zip

diff --git a/scene_graph/download_sample_data.sh → visual_relation/download_sample_data.sh b/scene_graph/download_sample_data.sh → visual_relation/download_sample_data.sh
@@ -12,20 +12,20 @@ DIRS=("glove" "VRD/sg_dataset/samples")
 # Check if at least any file is missing. If so, reload all data.
 for directory_name in "${DIRS[@]}"
 do
-    if [ ! -d "scene_graph/data/$directory_name" ]; then
+    if [ ! -d "visual_relation/data/$directory_name" ]; then
         RELOAD=true
     fi
 done
 
 ANNOTATIONS_URL="https://www.dropbox.com/s/bnfhm6kt9xumik8/vrd.zip"
-IMAGES_URL="http://imagenet.stanford.edu/internal/jcjohns/scene_graphs/sg_dataset.zip"
+IMAGES_URL="http://imagenet.stanford.edu/internal/jcjohns/visual_relations/sg_dataset.zip"
 SAMPLE_IMAGES_URL="https://github.com/Prof-Lu-Cewu/Visual-Relationship-Detection.git"
 GLOVE_URL="http://nlp.stanford.edu/data/wordvecs/glove.6B.zip"
 
-if [ ! -d "scene_graph/data" ]; then
-    if [ -d "scene_graph/data/" ]; then rm -Rf "scene_graph/data/"; fi
-    mkdir -p scene_graph/data
-    cd scene_graph/data
+if [ ! -d "visual_relation/data" ]; then
+    if [ -d "visual_relation/data/" ]; then rm -Rf "visual_relation/data/"; fi
+    mkdir -p visual_relation/data
+    cd visual_relation/data
 
     # download and unzip metadata and annotations
     wget $ANNOTATIONS_URL
@@ -43,8 +43,8 @@ if [ ! -d "scene_graph/data" ]; then
     rm -r Visual-Relationship-Detection
     cd ../../../..
 
-    mkdir -p scene_graph/data/glove
-    cd scene_graph/data/glove
+    mkdir -p visual_relation/data/glove
+    cd visual_relation/data/glove
 
     wget $GLOVE_URL
     unzip glove.6B.zip

diff --git a/scene_graph/model.py → visual_relation/model.py b/scene_graph/model.py → visual_relation/model.py
@@ -75,7 +75,7 @@ def __init__(
             "sub_category": df["subject_category"].tolist(),
         }
         Y_dict = {
-            "scene_graph_task": torch.LongTensor(df["label"].to_numpy())
+            "visual_relation_task": torch.LongTensor(df["label"].to_numpy())
         }  # change to take in the rounded train labels
         super(SceneGraphDataset, self).__init__(name, split, X_dict, Y_dict)
 
@@ -123,7 +123,7 @@ def __len__(self):
 class WordEmb(nn.Module):
     """Extract and concat word embeddings for obj and sub categories."""
 
-    def __init__(self, glove_fn="scene_graph/data/glove/glove.6B.100d.txt"):
+    def __init__(self, glove_fn="visual_relation/data/glove/glove.6B.100d.txt"):
         super(WordEmb, self).__init__()
 
         self.word_embs = pandas.read_csv(

diff --git a/scene_graph/requirements.txt → visual_relation/requirements.txt b/scene_graph/requirements.txt → visual_relation/requirements.txt
diff --git a/scene_graph/utils.py → visual_relation/utils.py b/scene_graph/utils.py → visual_relation/utils.py
@@ -64,7 +64,7 @@ def load_vrd_data(sample=False, is_travis=False):
     if sample or is_travis:
         try:
             subprocess.run(
-                ["bash", "scene_graph/download_sample_data.sh"],
+                ["bash", "visual_relation/download_sample_data.sh"],
                 check=True,
                 stderr=subprocess.PIPE,
             )
@@ -74,19 +74,23 @@ def load_vrd_data(sample=False, is_travis=False):
     else:
         try:
             subprocess.run(
-                ["bash", "scene_graph/download_full_data.sh"],
+                ["bash", "visual_relation/download_full_data.sh"],
                 check=True,
                 stderr=subprocess.PIPE,
             )
         except subprocess.CalledProcessError as e:
             print(e.stderr.decode())
             raise e
 
-    relationships_train = json.load(open("scene_graph/data/VRD/annotations_train.json"))
-    relationships_test = json.load(open("scene_graph/data/VRD/annotations_test.json"))
+    relationships_train = json.load(
+        open("visual_relation/data/VRD/annotations_train.json")
+    )
+    relationships_test = json.load(
+        open("visual_relation/data/VRD/annotations_test.json")
+    )
 
-    objects = json.load(open("scene_graph/data/VRD/objects.json"))
-    predicates = json.load(open("scene_graph/data/VRD/predicates.json"))
+    objects = json.load(open("visual_relation/data/VRD/objects.json"))
+    predicates = json.load(open("visual_relation/data/VRD/predicates.json"))
     semantic_predicates = [
         "carry",
         "cover",
@@ -113,9 +117,9 @@ def load_vrd_data(sample=False, is_travis=False):
     }
 
     # TODO: hack to work with small sample of data for tox
-    if os.path.isdir("scene_graph/data/VRD/sg_dataset/samples"):
+    if os.path.isdir("visual_relation/data/VRD/sg_dataset/samples"):
         # pass in list of images as keys_list
-        keys_list = os.listdir("scene_graph/data/VRD/sg_dataset/samples")
+        keys_list = os.listdir("visual_relation/data/VRD/sg_dataset/samples")
         test_df = vrd_to_pandas(
             relationships_test,
             objects,
@@ -124,7 +128,7 @@ def load_vrd_data(sample=False, is_travis=False):
             keys_list=keys_list,
         )
         return test_df, test_df, test_df
-    elif os.path.isdir("scene_graph/data/VRD/sg_dataset/sg_train_images"):
+    elif os.path.isdir("visual_relation/data/VRD/sg_dataset/sg_train_images"):
         train_df = vrd_to_pandas(
             relationships_train,
             objects,

diff --git a/scene_graph/vrd_tutorial.ipynb → ...l_relation/visual_relation_tutorial.ipynb b/scene_graph/vrd_tutorial.ipynb → ...l_relation/visual_relation_tutorial.ipynb
diff --git a/scene_graph/vrd_tutorial.py → visual_relation/visual_relation_tutorial.py b/scene_graph/vrd_tutorial.py → visual_relation/visual_relation_tutorial.py
@@ -29,7 +29,7 @@
 # +
 import os
 
-if os.path.basename(os.getcwd()) == "scene_graph":
+if os.path.basename(os.getcwd()) == "visual_relation":
     os.chdir("..")
 # -
 
@@ -55,7 +55,7 @@
 # The sampled version of the dataset **uses the same 26 examples across the train, dev, and test sets. This setting is meant to demonstrate how Snorkel works with this task, not to demonstrate performance.**
 
 # +
-from scene_graph.utils import load_vrd_data
+from visual_relation.utils import load_vrd_data
 
 # setting sample=False will take ~3 hours to run (downloads full VRD dataset)
 sample = True
@@ -209,14 +209,14 @@ def LF_area(x):
 
 # +
 from snorkel.classification import DictDataLoader
-from scene_graph.model import FlatConcat, SceneGraphDataset, WordEmb, init_fc
+from visual_relation.model import FlatConcat, SceneGraphDataset, WordEmb, init_fc
 
 train_df["labels"] = label_model.predict(L_train)
 
 if sample:
-    TRAIN_DIR = "scene_graph/data/VRD/sg_dataset/samples"
+    TRAIN_DIR = "visual_relation/data/VRD/sg_dataset/samples"
 else:
-    TRAIN_DIR = "scene_graph/data/VRD/sg_dataset/sg_train_images"
+    TRAIN_DIR = "visual_relation/data/VRD/sg_dataset/sg_train_images"
 
 train_dl = DictDataLoader(
     SceneGraphDataset("train_dataset", "train", TRAIN_DIR, train_df),
@@ -268,12 +268,12 @@ def LF_area(x):
 )
 
 # +
-from scene_graph.model import get_task_flow
+from visual_relation.model import get_task_flow
 
 # define task flow through modules
 task_flow = get_task_flow()
 pred_cls_task = Task(
-    name="scene_graph_task",
+    name="visual_relation_task",
     module_pool=module_pool,
     task_flow=task_flow,
     scorer=Scorer(metrics=["f1_micro"]),