diff --git a/README.md b/README.md
index ad83d52e..4e552757 100644
--- a/README.md
+++ b/README.md
@@ -5,14 +5,10 @@ A collection of tutorials for [Snorkel](http://snorkel.org).
 The Snorkel tutorials are grouped by application, with some applications having multiple associated notebooks in their directory.
 * `spam`: Is this YouTube comment spam?
 * `spouse`: Does this sentence imply that the two marked people are spouses?
-* `scene_graph`: Is object A riding object B in the image, carrying it, or neither?
+* `visual_relation`: Is object A riding object B in the image, carrying it, or neither?
 * `crowdsourcing`: Is this tweet about the weather expressing a positive, negative or neutral sentiment?
-<<<<<<< HEAD
-* `multitask` (Multi-Task Learning): A synthetic task demonstrating the native Snorkel multi-task classifier API
-=======
 * `mtl` (Multi-Task Learning): A synthetic task demonstrating the native Snorkel multi-task classifier API
 * [`drybell`](https://ai.googleblog.com/2019/03/harnessing-organizational-knowledge-for.html): Is a celebrity mentioned in this news article?
->>>>>>> Add to tox and README
 
 See the [Tutorials Index](#tutorials-index) for a listing of which tutorials demonstrate which task types, techniques, and integrations.
 
@@ -144,14 +140,13 @@ Here we provide an index pointing to different available tutorials by their task
 * Task
     * Text Classification (Text): `spam`, `crowdsourcing`, `drybell`
     * Relation Extraction (Text): `spouse`
-    * Visual Relationship Detection (Image): `scene_graph`
+    * Visual Relationship Detection (Image): `visual_relation`
 * Techniques
-    * Labeling with Labeling Functions (LFs): `spam`, `spouse`, `scene_graph`, `crowdsourcing`, `drybell`
+    * Labeling with Labeling Functions (LFs): `spam`, `spouse`, `visual_relation`, `crowdsourcing`
     * Augmentation with Transformation Functions (TFs): `spam`
     * Monitoring with Slicing Functions (SFs): `spam`
     * Using Crowd Worker Labels: `crowdsourcing`
-    * Multi-Task Learning (MTL): `multitask`, `scene_graph`, `spam`
-    * Large-Scale Production Pipeline: `drybell`
+    * Multi-Task Learning (MTL): `multitask`, `visual_relation`, `spam`
 * Integrations
     * TensorFlow/Keras: `spam`, `spouse`
     * Scikit-Learn: `spam`, `crowdsourcing`
diff --git a/scene_graph/.notebooks b/scene_graph/.notebooks
deleted file mode 100644
index a5cef3bd..00000000
--- a/scene_graph/.notebooks
+++ /dev/null
@@ -1 +0,0 @@
-vrd_tutorial
\ No newline at end of file
diff --git a/spam/01_spam_tutorial.ipynb b/spam/01_spam_tutorial.ipynb
index 3f2ff034..d73ab13f 100644
--- a/spam/01_spam_tutorial.ipynb
+++ b/spam/01_spam_tutorial.ipynb
@@ -4916,7 +4916,7 @@
     "If you enjoyed this tutorial and you've already checked out the Snorkel 101 Guide, check out the [`snorkel-tutorials` table of contents](https://github.com/snorkel-team/snorkel-tutorials#snorkel-tutorials) for other tutorials that you may find interesting, including demonstrations of how to use Snorkel\n",
     "\n",
     "* As part of a [hybrid crowdsourcing pipeline](https://github.com/snorkel-team/snorkel-tutorials/tree/master/crowdsourcing)\n",
-    "* For [scene-graph detection over images](https://github.com/snorkel-team/snorkel-tutorials/tree/master/scene_graph)\n",
+    "* For [scene-graph detection over images](https://github.com/snorkel-team/snorkel-tutorials/tree/master/visual_relation)\n",
     "* For [information extraction over text](https://github.com/snorkel-team/snorkel-tutorials/tree/master/spouse)\n",
     "* For [data augmentation](https://github.com/snorkel-team/snorkel-tutorials/tree/master/spam)\n",
     "\n",
diff --git a/spam/01_spam_tutorial.py b/spam/01_spam_tutorial.py
index 7ed44246..5f5c0f00 100644
--- a/spam/01_spam_tutorial.py
+++ b/spam/01_spam_tutorial.py
@@ -1006,7 +1006,7 @@ def plot_probabilities_histogram(Y):
 # If you enjoyed this tutorial and you've already checked out the Snorkel 101 Guide, check out the [`snorkel-tutorials` table of contents](https://github.com/snorkel-team/snorkel-tutorials#snorkel-tutorials) for other tutorials that you may find interesting, including demonstrations of how to use Snorkel
 #
 # * As part of a [hybrid crowdsourcing pipeline](https://github.com/snorkel-team/snorkel-tutorials/tree/master/crowdsourcing)
-# * For [scene-graph detection over images](https://github.com/snorkel-team/snorkel-tutorials/tree/master/scene_graph)
+# * For [scene-graph detection over images](https://github.com/snorkel-team/snorkel-tutorials/tree/master/visual_relation)
 # * For [information extraction over text](https://github.com/snorkel-team/snorkel-tutorials/tree/master/spouse)
 # * For [data augmentation](https://github.com/snorkel-team/snorkel-tutorials/tree/master/spam)
 #
diff --git a/tox.ini b/tox.ini
index b9399555..c24f8daa 100644
--- a/tox.ini
+++ b/tox.ini
@@ -3,6 +3,7 @@ skipsdist = true
 envlist =
     spouse,
     intro,
+    visual_relation,
     spam,
     scene_graph,
     crowdsourcing,
@@ -19,7 +20,7 @@ deps =
     intro: -rintro/requirements.txt
     spam: -rspam/requirements.txt
     multitask: -rmultitask/requirements.txt
-    scene_graph: -rscene_graph/requirements.txt
+    visual_relation: -rvisual_relation/requirements.txt
     crowdsourcing: -rcrowdsourcing/requirements.txt
     recsys: -rrecsys/requirements.txt
     drybell: -rdrybell/requirements.txt
@@ -32,7 +33,7 @@ commands =
     intro: python {toxinidir}/scripts/build.py {posargs:test} intro
     spam: python {toxinidir}/scripts/build.py {posargs:test} spam
     multitask: python {toxinidir}/scripts/build.py {posargs:test} multitask
-    scene_graph: python {toxinidir}/scripts/build.py {posargs:test} scene_graph
+    visual_relation: python {toxinidir}/scripts/build.py {posargs:test} visual_relation
     crowdsourcing: python {toxinidir}/scripts/build.py {posargs:test} crowdsourcing
     drybell: python {toxinidir}/scripts/build.py {posargs:test} drybell
 
diff --git a/visual_relation/.notebooks b/visual_relation/.notebooks
new file mode 100644
index 00000000..db3c6bcd
--- /dev/null
+++ b/visual_relation/.notebooks
@@ -0,0 +1 @@
+visual_relation_tutorial
\ No newline at end of file
diff --git a/scene_graph/__init__.py b/visual_relation/__init__.py
similarity index 100%
rename from scene_graph/__init__.py
rename to visual_relation/__init__.py
diff --git a/visual_relation/download_full_data.sh b/visual_relation/download_full_data.sh
new file mode 100644
index 00000000..d1aaeb37
--- /dev/null
+++ b/visual_relation/download_full_data.sh
@@ -0,0 +1,37 @@
+# Execute from snorkel-tutorials/
+# Download data,
+
+ANNOTATIONS_URL="https://www.dropbox.com/s/bnfhm6kt9xumik8/vrd.zip"
+IMAGES_URL="http://imagenet.stanford.edu/internal/jcjohns/visual_relations/sg_dataset.zip"
+SAMPLE_IMAGES_URL="https://github.com/Prof-Lu-Cewu/Visual-Relationship-Detection.git"
+GLOVE_URL="http://nlp.stanford.edu/data/wordvecs/glove.6B.zip"
+
+if [ ! -d "visual_relation/data" ]; then
+    mkdir -p visual_relation/data
+    cd visual_relation/data
+
+    # download and unzip metadata and annotations
+    wget $ANNOTATIONS_URL
+    unzip vrd.zip
+
+    # Delete the zip files.
+    rm vrd.zip
+    cd VRD
+
+    # Download and unzip all images
+    wget $IMAGES_URL
+    unzip sg_dataset.zip
+    rm sg_dataset.zip
+    cd ../../..
+
+    mkdir -p visual_relation/data/glove
+    cd visual_relation/data/glove
+
+    wget $GLOVE_URL
+    unzip glove.6B.zip
+
+    # Delete the zip files
+    rm  glove.6B.zip
+    cd ../../..
+fi
+
diff --git a/scene_graph/download_data.sh b/visual_relation/download_sample_data.sh
similarity index 54%
rename from scene_graph/download_data.sh
rename to visual_relation/download_sample_data.sh
index 315ad8fb..8f99e562 100755
--- a/scene_graph/download_data.sh
+++ b/visual_relation/download_sample_data.sh
@@ -12,20 +12,20 @@ DIRS=("glove" "VRD/sg_dataset/samples")
 # Check if at least any file is missing. If so, reload all data.
 for directory_name in "${DIRS[@]}"
 do
-    if [ ! -d "scene_graph/data/$directory_name" ]; then
+    if [ ! -d "visual_relation/data/$directory_name" ]; then
         RELOAD=true
     fi
 done
 
 ANNOTATIONS_URL="https://www.dropbox.com/s/bnfhm6kt9xumik8/vrd.zip"
-IMAGES_URL="http://imagenet.stanford.edu/internal/jcjohns/scene_graphs/sg_dataset.zip"
+IMAGES_URL="http://imagenet.stanford.edu/internal/jcjohns/visual_relations/sg_dataset.zip"
 SAMPLE_IMAGES_URL="https://github.com/Prof-Lu-Cewu/Visual-Relationship-Detection.git"
 GLOVE_URL="http://nlp.stanford.edu/data/wordvecs/glove.6B.zip"
 
-if [ ! -d "scene_graph/data" ]; then
-    if [ -d "scene_graph/data/" ]; then rm -Rf "scene_graph/data/"; fi
-    mkdir -p scene_graph/data
-    cd scene_graph/data
+if [ ! -d "visual_relation/data" ]; then
+    if [ -d "visual_relation/data/" ]; then rm -Rf "visual_relation/data/"; fi
+    mkdir -p visual_relation/data
+    cd visual_relation/data
 
     # download and unzip metadata and annotations
     wget $ANNOTATIONS_URL
@@ -35,23 +35,7 @@ if [ ! -d "scene_graph/data" ]; then
     rm vrd.zip
     cd VRD
 
-    # if [ "$TRAVIS" = "true" ]; then
-    #     # Download and unzip sample images
-    #     mkdir sg_dataset
-    #     cd sg_dataset
-    #     git clone $SAMPLE_IMAGES_URL
-    #     mv Visual-Relationship-Detection/samples ./
-    #     rm -r Visual-Relationship-Detection
-    #     cd ../..
-    # else
-    #     # Download and unzip all images
-    #     wget $IMAGES_URL
-    #     unzip sg_dataset.zip
-    #     rm sg_dataset.zip
-    #     cd ../../..
-    # fi
-
-    # Download and unzip all images
+    # Download and unzip sample images
     mkdir sg_dataset
     cd sg_dataset
     git clone $SAMPLE_IMAGES_URL
@@ -59,8 +43,8 @@ if [ ! -d "scene_graph/data" ]; then
     rm -r Visual-Relationship-Detection
     cd ../../../..
 
-    mkdir -p scene_graph/data/glove
-    cd scene_graph/data/glove
+    mkdir -p visual_relation/data/glove
+    cd visual_relation/data/glove
 
     wget $GLOVE_URL
     unzip glove.6B.zip
diff --git a/scene_graph/model.py b/visual_relation/model.py
similarity index 97%
rename from scene_graph/model.py
rename to visual_relation/model.py
index a5687b06..cad95b89 100644
--- a/scene_graph/model.py
+++ b/visual_relation/model.py
@@ -75,7 +75,7 @@ def __init__(
             "sub_category": df["subject_category"].tolist(),
         }
         Y_dict = {
-            "scene_graph_task": torch.LongTensor(df["label"].to_numpy())
+            "visual_relation_task": torch.LongTensor(df["label"].to_numpy())
         }  # change to take in the rounded train labels
         super(SceneGraphDataset, self).__init__(name, split, X_dict, Y_dict)
 
@@ -123,7 +123,7 @@ def __len__(self):
 class WordEmb(nn.Module):
     """Extract and concat word embeddings for obj and sub categories."""
 
-    def __init__(self, glove_fn="scene_graph/data/glove/glove.6B.100d.txt"):
+    def __init__(self, glove_fn="visual_relation/data/glove/glove.6B.100d.txt"):
         super(WordEmb, self).__init__()
 
         self.word_embs = pandas.read_csv(
diff --git a/scene_graph/requirements.txt b/visual_relation/requirements.txt
similarity index 100%
rename from scene_graph/requirements.txt
rename to visual_relation/requirements.txt
diff --git a/scene_graph/utils.py b/visual_relation/utils.py
similarity index 72%
rename from scene_graph/utils.py
rename to visual_relation/utils.py
index 72a2cd2b..aa2d9633 100644
--- a/scene_graph/utils.py
+++ b/visual_relation/utils.py
@@ -55,24 +55,42 @@ def vrd_to_pandas(
 
 
 # %%
-def load_vrd_data():
+def load_vrd_data(sample=False, is_travis=False):
     """Download and load Pandas DataFrame of VRD relationships.
 
     NOTE: Only loads semantic relationship examples.
     """
-    try:
-        subprocess.run(
-            ["bash", "scene_graph/download_data.sh"], check=True, stderr=subprocess.PIPE
-        )
-    except subprocess.CalledProcessError as e:
-        print(e.stderr.decode())
-        raise e
-
-    relationships_train = json.load(open("scene_graph/data/VRD/annotations_train.json"))
-    relationships_test = json.load(open("scene_graph/data/VRD/annotations_test.json"))
 
-    objects = json.load(open("scene_graph/data/VRD/objects.json"))
-    predicates = json.load(open("scene_graph/data/VRD/predicates.json"))
+    if sample or is_travis:
+        try:
+            subprocess.run(
+                ["bash", "visual_relation/download_sample_data.sh"],
+                check=True,
+                stderr=subprocess.PIPE,
+            )
+        except subprocess.CalledProcessError as e:
+            print(e.stderr.decode())
+            raise e
+    else:
+        try:
+            subprocess.run(
+                ["bash", "visual_relation/download_full_data.sh"],
+                check=True,
+                stderr=subprocess.PIPE,
+            )
+        except subprocess.CalledProcessError as e:
+            print(e.stderr.decode())
+            raise e
+
+    relationships_train = json.load(
+        open("visual_relation/data/VRD/annotations_train.json")
+    )
+    relationships_test = json.load(
+        open("visual_relation/data/VRD/annotations_test.json")
+    )
+
+    objects = json.load(open("visual_relation/data/VRD/objects.json"))
+    predicates = json.load(open("visual_relation/data/VRD/predicates.json"))
     semantic_predicates = [
         "carry",
         "cover",
@@ -99,9 +117,9 @@ def load_vrd_data():
     }
 
     # TODO: hack to work with small sample of data for tox
-    if os.path.isdir("scene_graph/data/VRD/sg_dataset/samples"):
+    if os.path.isdir("visual_relation/data/VRD/sg_dataset/samples"):
         # pass in list of images as keys_list
-        keys_list = os.listdir("scene_graph/data/VRD/sg_dataset/samples")
+        keys_list = os.listdir("visual_relation/data/VRD/sg_dataset/samples")
         test_df = vrd_to_pandas(
             relationships_test,
             objects,
@@ -110,7 +128,7 @@ def load_vrd_data():
             keys_list=keys_list,
         )
         return test_df, test_df, test_df
-    elif os.path.isdir("scene_graph/data/VRD/sg_dataset/sg_train_images"):
+    elif os.path.isdir("visual_relation/data/VRD/sg_dataset/sg_train_images"):
         train_df = vrd_to_pandas(
             relationships_train,
             objects,
diff --git a/scene_graph/vrd_tutorial.ipynb b/visual_relation/visual_relation_tutorial.ipynb
similarity index 84%
rename from scene_graph/vrd_tutorial.ipynb
rename to visual_relation/visual_relation_tutorial.ipynb
index 25447741..9305a4d7 100644
--- a/scene_graph/vrd_tutorial.ipynb
+++ b/visual_relation/visual_relation_tutorial.ipynb
@@ -25,7 +25,7 @@
    "source": [
     "import os\n",
     "\n",
-    "if os.path.basename(os.getcwd()) == \"scene_graph\":\n",
+    "if os.path.basename(os.getcwd()) == \"visual_relation\":\n",
     "    os.chdir(\"..\")"
    ]
   },
@@ -34,36 +34,34 @@
    "metadata": {},
    "source": [
     "### 1. Load Dataset\n",
-    "We load the VRD dataset and filter images with at least one action predicate in it, since these are more difficult to classify than geometric relationships like `above` or `next to`. We load the train, valid, and test sets as Pandas DataFrame objects with the following fields:\n",
+    "We load the VRD dataset and filter images with at least one action predicate in it, since these are more difficult to classify than geometric relationships like `above` or `next to`. We load the train, valid, and test sets as Pandas `DataFrame` objects with the following fields:\n",
     "- `label`: The relationship between the objects. 0: `RIDE`, 1: `CARRY`, 2: `OTHER` action predicates\n",
     "- `object_bbox`: coordinates of the bounding box for the object `[ymin, ymax, xmin, xmax]`\n",
     "- `object_category`: category of the object\n",
     "- `source_img`: filename for the corresponding image the relationship is in\n",
     "- `subject_bbox`: coordinates of the bounding box for the object `[ymin, ymax, xmin, xmax]`\n",
-    "- `subject_category`: category of the subject\n",
-    "\n",
-    "Note that the training DataFrame will have a labels field with all -1s. This denotes the lack of labels for that particular dataset. In this tutorial, we will assign probabilistic labels to the training set by writing labeling functions over attributes of the subject and objects!"
+    "- `subject_category`: category of the subject"
    ]
   },
   {
    "cell_type": "code",
    "execution_count": 2,
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "The autoreload extension is already loaded. To reload it, use:\n",
-      "  %reload_ext autoreload\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "%load_ext autoreload\n",
     "%autoreload 2\n",
     "\n",
-    "import numpy as np"
+    "import numpy as np\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "If you are running this notebook for the first time, it will take ~15 mins to download all the required sample data.\n",
+    "\n",
+    "The sampled version of the dataset **uses the same 26 examples across the train, dev, and test sets. This setting is meant to demonstrate how Snorkel works with this task, not to demonstrate performance.**"
    ]
   },
   {
@@ -82,21 +80,33 @@
     }
    ],
    "source": [
-    "from scene_graph.utils import load_vrd_data\n",
+    "from visual_relation.utils import load_vrd_data\n",
     "\n",
-    "train_df, valid_df, test_df = load_vrd_data()\n",
+    "# setting sample=False will take ~3 hours to run (downloads full VRD dataset)\n",
+    "sample = True\n",
+    "is_travis = \"TRAVIS\" in os.environ\n",
+    "train_df, valid_df, test_df = load_vrd_data(sample, is_travis)\n",
     "\n",
     "print(\"Train Relationships: \", len(train_df))\n",
     "print(\"Dev Relationships: \", len(valid_df))\n",
     "print(\"Test Relationships: \", len(test_df))"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Note that the training `DataFrame` will have a labels field with all -1s. This denotes the lack of labels for that particular dataset. In this tutorial, we will assign probabilistic labels to the training set by writing labeling functions over attributes of the subject and objects!"
+   ]
+  },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
     "## 2. Writing Labeling Functions\n",
-    "We now write labeling functions to detect what relationship exists between pairs of bounding boxes. To do so, we can encode various intuitions into the labeling functions. _Categorical_ intution: knowledge about the categories of subjects and objects usually involved in these relationships (e.g., `person` is usually the subject for predicates like `ride` and `carry`), and _spatial_ intuition: knowledge about the relative positions of the subject and objects (e.g., subject is usually higher than the object for the predicate `ride`)."
+    "We now write labeling functions to detect what relationship exists between pairs of bounding boxes. To do so, we can encode various intuitions into the labeling functions:\n",
+    "* _Categorical_ intution: knowledge about the categories of subjects and objects usually involved in these relationships (e.g., `person` is usually the subject for predicates like `ride` and `carry`)\n",
+    "* _Spatial_ intuition: knowledge about the relative positions of the subject and objects (e.g., subject is usually higher than the object for the predicate `ride`)"
    ]
   },
   {
@@ -234,7 +244,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "100%|██████████| 26/26 [00:00<00:00, 1988.80it/s]"
+      "100%|██████████| 26/26 [00:00<00:00, 1588.57it/s]"
      ]
     },
     {
@@ -251,7 +261,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "100%|██████████| 26/26 [00:00<00:00, 2442.48it/s]"
+      "100%|██████████| 26/26 [00:00<00:00, 2890.55it/s]"
      ]
     },
     {
@@ -468,7 +478,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "We use [F1](https://scikit-learn.org/stable/modules/generated/sklearn.metrics.f1_score.html) Micro average for the multiclass setting, which calculates metrics globally by counting the total true positives, false negatives and false positives."
+    "We use [F1](https://scikit-learn.org/stable/modules/generated/sklearn.metrics.f1_score.html) Micro average for the multiclass setting, which calculates metrics globally across classes, by counting the total true positives, false negatives and false positives."
    ]
   },
   {
@@ -513,12 +523,15 @@
    "outputs": [],
    "source": [
     "from snorkel.classification import DictDataLoader\n",
-    "from scene_graph.model import FlatConcat, SceneGraphDataset, WordEmb, init_fc\n",
+    "from visual_relation.model import FlatConcat, SceneGraphDataset, WordEmb, init_fc\n",
     "\n",
-    "# change to \"scene_graph/data/VRD/sg_dataset/sg_train_images\" for full set\n",
-    "TRAIN_DIR = \"scene_graph/data/VRD/sg_dataset/samples\"\n",
     "train_df[\"labels\"] = label_model.predict(L_train)\n",
     "\n",
+    "if sample:\n",
+    "    TRAIN_DIR = \"visual_relation/data/VRD/sg_dataset/samples\"\n",
+    "else:\n",
+    "    TRAIN_DIR = \"visual_relation/data/VRD/sg_dataset/sg_train_images\"\n",
+    "\n",
     "train_dl = DictDataLoader(\n",
     "    SceneGraphDataset(\"train_dataset\", \"train\", TRAIN_DIR, train_df),\n",
     "    batch_size=16,\n",
@@ -585,12 +598,12 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "from scene_graph.model import get_task_flow\n",
+    "from visual_relation.model import get_task_flow\n",
     "\n",
     "# define task flow through modules\n",
     "task_flow = get_task_flow()\n",
     "pred_cls_task = Task(\n",
-    "    name=\"scene_graph_task\",\n",
+    "    name=\"visual_relation_task\",\n",
     "    module_pool=module_pool,\n",
     "    task_flow=task_flow,\n",
     "    scorer=Scorer(metrics=[\"f1_micro\"]),\n",
@@ -621,10 +634,10 @@
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "/Users/braden/repos/snorkel-tutorials/scene_graph/model.py:134: FutureWarning: Method .as_matrix will be removed in a future version. Use .values instead.\n",
+      "/Users/paroma/Documents/Projects/snorkel-tutorials/visual_relation/model.py:134: FutureWarning: Method .as_matrix will be removed in a future version. Use .values instead.\n",
       "  return self.word_embs.loc[word].as_matrix()\n",
       "\r",
-      "Epoch 0::   0%|          | 0/2 [00:04<?, ?it/s, model/all/train/loss=1.25, model/all/train/lr=0.001]"
+      "Epoch 0::   0%|          | 0/2 [00:03<?, ?it/s, model/all/train/loss=1.25, model/all/train/lr=0.001]"
      ]
     },
     {
@@ -632,7 +645,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0::  50%|█████     | 1/2 [00:04<00:04,  4.97s/it, model/all/train/loss=1.25, model/all/train/lr=0.001]"
+      "Epoch 0::  50%|█████     | 1/2 [00:03<00:03,  3.82s/it, model/all/train/loss=1.25, model/all/train/lr=0.001]"
      ]
     },
     {
@@ -640,7 +653,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0::  50%|█████     | 1/2 [00:08<00:04,  4.97s/it, model/all/train/loss=1.07, model/all/train/lr=0.001]"
+      "Epoch 0::  50%|█████     | 1/2 [00:06<00:03,  3.82s/it, model/all/train/loss=1.07, model/all/train/lr=0.001]"
      ]
     },
     {
@@ -648,7 +661,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:: 100%|██████████| 2/2 [00:08<00:00,  4.39s/it, model/all/train/loss=1.07, model/all/train/lr=0.001]"
+      "Epoch 0:: 100%|██████████| 2/2 [00:06<00:00,  3.38s/it, model/all/train/loss=1.07, model/all/train/lr=0.001]"
      ]
     },
     {
@@ -664,7 +677,7 @@
     "\n",
     "model = MultitaskClassifier([pred_cls_task])\n",
     "trainer = Trainer(\n",
-    "    n_epochs=1,\n",
+    "    n_epochs=1,  # increase for improved performance\n",
     "    lr=1e-3,\n",
     "    checkpointing=True,\n",
     "    checkpointer_config={\"checkpoint_dir\": \"checkpoint\"},\n",
@@ -680,7 +693,7 @@
     {
      "data": {
       "text/plain": [
-       "{'scene_graph_task/valid_dataset/valid/f1_micro': 0.6153846153846154}"
+       "{'visual_relation_task/valid_dataset/valid/f1_micro': 0.6153846153846154}"
       ]
      },
      "execution_count": 15,
@@ -691,11 +704,20 @@
    "source": [
     "model.score([valid_dl])"
    ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "We have successfully trained a visual relationship detection model! Using categorical and spatial intuition about how objects in a visual relationship interact with each other, we are able to assign high quality training labels to object pairs in the VRD dataset in a multi-class classification setting."
+   ]
   }
  ],
  "metadata": {
   "jupytext": {
-   "cell_metadata_filter": "-all"
+   "cell_metadata_filter": "-all",
+   "formats": "ipynb,py:light",
+   "notebook_metadata_filter": "-all"
   },
   "kernelspec": {
    "display_name": "Python 3",
@@ -712,7 +734,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.7.3"
+   "version": "3.6.8"
   }
  },
  "nbformat": 4,
diff --git a/scene_graph/vrd_tutorial.py b/visual_relation/visual_relation_tutorial.py
similarity index 74%
rename from scene_graph/vrd_tutorial.py
rename to visual_relation/visual_relation_tutorial.py
index 9de0c335..70ca1641 100644
--- a/scene_graph/vrd_tutorial.py
+++ b/visual_relation/visual_relation_tutorial.py
@@ -1,4 +1,19 @@
-# %% [markdown]
+# ---
+# jupyter:
+#   jupytext:
+#     cell_metadata_filter: -all
+#     formats: ipynb,py:light
+#     text_representation:
+#       extension: .py
+#       format_name: light
+#       format_version: '1.4'
+#       jupytext_version: 1.2.0
+#   kernelspec:
+#     display_name: Python 3
+#     language: python
+#     name: python3
+# ---
+
 # # Visual Relationship Detection
 #
 # In this tutorial, we focus on the task of classifying visual relationships. For a given image, there might be many such relationships, defined formally as a `subject <predictate> object` (e.g. `person <riding> bike`).
@@ -11,53 +26,62 @@
 #
 # In the examples of the relationships shown below, the red box represents the _subject_ while the green box represents the _object_. The _predicate_ (e.g. kick) denotes what relationship connects the subject and the object.
 
-# %%
+# +
 import os
 
-if os.path.basename(os.getcwd()) == "scene_graph":
+if os.path.basename(os.getcwd()) == "visual_relation":
     os.chdir("..")
+# -
 
-# %% [markdown]
 # ### 1. Load Dataset
-# We load the VRD dataset and filter images with at least one action predicate in it, since these are more difficult to classify than geometric relationships like `above` or `next to`. We load the train, valid, and test sets as Pandas DataFrame objects with the following fields:
+# We load the VRD dataset and filter images with at least one action predicate in it, since these are more difficult to classify than geometric relationships like `above` or `next to`. We load the train, valid, and test sets as Pandas `DataFrame` objects with the following fields:
 # - `label`: The relationship between the objects. 0: `RIDE`, 1: `CARRY`, 2: `OTHER` action predicates
 # - `object_bbox`: coordinates of the bounding box for the object `[ymin, ymax, xmin, xmax]`
 # - `object_category`: category of the object
 # - `source_img`: filename for the corresponding image the relationship is in
 # - `subject_bbox`: coordinates of the bounding box for the object `[ymin, ymax, xmin, xmax]`
 # - `subject_category`: category of the subject
-#
-# Note that the training DataFrame will have a labels field with all -1s. This denotes the lack of labels for that particular dataset. In this tutorial, we will assign probabilistic labels to the training set by writing labeling functions over attributes of the subject and objects!
 
-# %%
+# +
 # %load_ext autoreload
 # %autoreload 2
 
 import numpy as np
 
-# %%
-from scene_graph.utils import load_vrd_data
+# -
+
+# If you are running this notebook for the first time, it will take ~15 mins to download all the required sample data.
+#
+# The sampled version of the dataset **uses the same 26 examples across the train, dev, and test sets. This setting is meant to demonstrate how Snorkel works with this task, not to demonstrate performance.**
+
+# +
+from visual_relation.utils import load_vrd_data
 
-train_df, valid_df, test_df = load_vrd_data()
+# setting sample=False will take ~3 hours to run (downloads full VRD dataset)
+sample = True
+is_travis = "TRAVIS" in os.environ
+train_df, valid_df, test_df = load_vrd_data(sample, is_travis)
 
 print("Train Relationships: ", len(train_df))
 print("Dev Relationships: ", len(valid_df))
 print("Test Relationships: ", len(test_df))
+# -
+
+# Note that the training `DataFrame` will have a labels field with all -1s. This denotes the lack of labels for that particular dataset. In this tutorial, we will assign probabilistic labels to the training set by writing labeling functions over attributes of the subject and objects!
 
-# %% [markdown]
 # ## 2. Writing Labeling Functions
-# We now write labeling functions to detect what relationship exists between pairs of bounding boxes. To do so, we can encode various intuitions into the labeling functions. _Categorical_ intution: knowledge about the categories of subjects and objects usually involved in these relationships (e.g., `person` is usually the subject for predicates like `ride` and `carry`), and _spatial_ intuition: knowledge about the relative positions of the subject and objects (e.g., subject is usually higher than the object for the predicate `ride`).
+# We now write labeling functions to detect what relationship exists between pairs of bounding boxes. To do so, we can encode various intuitions into the labeling functions:
+# * _Categorical_ intution: knowledge about the categories of subjects and objects usually involved in these relationships (e.g., `person` is usually the subject for predicates like `ride` and `carry`)
+# * _Spatial_ intuition: knowledge about the relative positions of the subject and objects (e.g., subject is usually higher than the object for the predicate `ride`)
 
-# %%
 RIDE = 0
 CARRY = 1
 OTHER = 2
 ABSTAIN = -1
 
-# %% [markdown]
 # We begin with labeling functions that encode categorical intuition: we use knowledge about common subject-object category pairs that are common for `RIDE` and `CARRY` and also knowledge about what subjects or objects are unlikely to be involved in the two relationships.
 
-# %%
+# +
 from snorkel.labeling import labeling_function
 
 # Category-based LFs
@@ -100,10 +124,11 @@ def LF_person(x):
     return ABSTAIN
 
 
-# %% [markdown]
+# -
+
 # We now encode our spatial intuition, which includes measuring the distance between the bounding boxes and comparing their relative areas.
 
-# %%
+# +
 # Distance-based LFs
 @labeling_function()
 def LF_ydist(x):
@@ -134,10 +159,11 @@ def LF_area(x):
     return ABSTAIN
 
 
-# %% [markdown]
+# -
+
 # Note that the labeling functions have varying empirical accuracies and coverages. Due to class imbalance in our chosen relationships, labeling functions that label the `OTHER` class have higher coverage than labeling functions for `RIDE` or `CARRY`. This reflects the distribution of classes in the dataset as well.
 
-# %%
+# +
 from snorkel.labeling import PandasLFApplier
 
 lfs = [
@@ -155,43 +181,43 @@ def LF_area(x):
 L_train = applier.apply(train_df)
 L_valid = applier.apply(valid_df)
 
-# %%
+# +
 from snorkel.labeling import LFAnalysis
 
 Y_valid = valid_df.label.values
 LFAnalysis(L_valid, lfs).lf_summary(Y_valid)
+# -
 
-# %% [markdown]
 # ## 3. Train Label Model
 # We now train a multi-class `LabelModel` to assign training labels to the unalabeled training set.
 
-# %%
+# +
 from snorkel.labeling import LabelModel
 
 label_model = LabelModel(cardinality=3, verbose=True)
 label_model.fit(L_train, seed=123, lr=0.01, log_freq=10, n_epochs=100)
+# -
 
-# %% [markdown]
-# We use [F1](https://scikit-learn.org/stable/modules/generated/sklearn.metrics.f1_score.html) Micro average for the multiclass setting, which calculates metrics globally by counting the total true positives, false negatives and false positives.
+# We use [F1](https://scikit-learn.org/stable/modules/generated/sklearn.metrics.f1_score.html) Micro average for the multiclass setting, which calculates metrics globally across classes, by counting the total true positives, false negatives and false positives.
 
-# %%
 label_model.score(L_valid, Y_valid, metrics=["f1_micro"])
 
-# %% [markdown]
 # ## 4. Train a Classifier
 # You can then use these training labels to train any standard discriminative model, such as [an off-the-shelf ResNet](https://github.com/KaimingHe/deep-residual-networks), which should learn to generalize beyond the LF's we've developed!
 
-# %% [markdown]
 # #### Create DataLoaders for Classifier
 
-# %%
+# +
 from snorkel.classification import DictDataLoader
-from scene_graph.model import FlatConcat, SceneGraphDataset, WordEmb, init_fc
+from visual_relation.model import FlatConcat, SceneGraphDataset, WordEmb, init_fc
 
-# change to "scene_graph/data/VRD/sg_dataset/sg_train_images" for full set
-TRAIN_DIR = "scene_graph/data/VRD/sg_dataset/samples"
 train_df["labels"] = label_model.predict(L_train)
 
+if sample:
+    TRAIN_DIR = "visual_relation/data/VRD/sg_dataset/samples"
+else:
+    TRAIN_DIR = "visual_relation/data/VRD/sg_dataset/sg_train_images"
+
 train_dl = DictDataLoader(
     SceneGraphDataset("train_dataset", "train", TRAIN_DIR, train_df),
     batch_size=16,
@@ -203,11 +229,11 @@ def LF_area(x):
     batch_size=16,
     shuffle=False,
 )
+# -
 
-# %% [markdown]
 # #### Define Model Architecture
 
-# %%
+# +
 import torchvision.models as models
 import torch.nn as nn
 
@@ -241,32 +267,34 @@ def LF_area(x):
     }
 )
 
-# %%
-from scene_graph.model import get_task_flow
+# +
+from visual_relation.model import get_task_flow
 
 # define task flow through modules
 task_flow = get_task_flow()
 pred_cls_task = Task(
-    name="scene_graph_task",
+    name="visual_relation_task",
     module_pool=module_pool,
     task_flow=task_flow,
     scorer=Scorer(metrics=["f1_micro"]),
 )
+# -
 
-# %% [markdown]
 # ### Train and Evaluate Model
 
-# %%
+# +
 from snorkel.classification import MultitaskClassifier, Trainer
 
 model = MultitaskClassifier([pred_cls_task])
 trainer = Trainer(
-    n_epochs=1,
+    n_epochs=1,  # increase for improved performance
     lr=1e-3,
     checkpointing=True,
     checkpointer_config={"checkpoint_dir": "checkpoint"},
 )
 trainer.fit(model, [train_dl])
+# -
 
-# %%
 model.score([valid_dl])
+
+# We have successfully trained a visual relationship detection model! Using categorical and spatial intuition about how objects in a visual relationship interact with each other, we are able to assign high quality training labels to object pairs in the VRD dataset in a multi-class classification setting.