microsoft · miguelgfierro · Aug 7, 2019 · Aug 1, 2019 · Aug 2, 2019 · Aug 2, 2019
diff --git a/scenarios/entailment/entailment_multinli_bert.ipynb b/scenarios/entailment/entailment_multinli_bert.ipynb
@@ -6,10 +6,75 @@
    "source": [
     "*Copyright (c) Microsoft Corporation. All rights reserved.*  \n",
     "\n",
-    "*Licensed under the MIT License.*\n",
+    "*Licensed under the MIT License.*"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Natural Language Inference on MultiNLI Dataset using BERT"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Before You Start\n",
+    "\n",
+    "The running time shown in this notebook is on a Standard_NC24s_v3 Azure Deep Learning Virtual Machine with 4 NVIDIA Tesla V100 GPUs. If you want to run through the notebook quickly, you can set the **`QUICK_RUN`** flag in the cell below to **`True`** to run the notebook on a small subset of the data and a smaller number of epochs. \n",
+    "The table below provides some reference running time on different machine configurations.  \n",
+    "\n",
+    "|QUICK_RUN|Machine Configurations|Running time|\n",
+    "|:---------|:----------------------|:------------|\n",
+    "|True|4 **CPU**s, 14GB memory| ~ 15 minutes|\n",
+    "|True|1 NVIDIA Tesla K80 GPUs, 12GB GPU memory| ~ 5 minutes|\n",
+    "|False|1 NVIDIA Tesla K80 GPUs, 12GB GPU memory| ~ 10.5 hours|\n",
+    "|False|4 NVIDIA Tesla V100 GPUs, 64GB GPU memory| ~ 2.5 hours|\n",
+    "\n",
+    "If you run into CUDA out-of-memory error, try reducing the `BATCH_SIZE` and `MAX_SEQ_LENGTH`, but note that model performance will be compromised. "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {
+    "tags": [
+     "parameters"
+    ]
+   },
+   "outputs": [],
+   "source": [
+    "## Set QUICK_RUN = True to run the notebook on a small subset of data and a smaller number of epochs.\n",
+    "QUICK_RUN = False\n",
     "\n",
-    "# Natural Language Inference on MultiNLI Dataset using BERT\n",
+    "TRAIN_DATA_USED_PERCENT = 1\n",
+    "DEV_DATA_USED_PERCENT = 1\n",
+    "NUM_EPOCHS = 2"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "if QUICK_RUN:\n",
+    "    TRAIN_DATA_USED_PERCENT = 0.001\n",
+    "    DEV_DATA_USED_PERCENT = 0.01\n",
+    "    NUM_EPOCHS = 1\n",
     "\n",
+    "import torch\n",
+    "if torch.cuda.is_available():\n",
+    "    BATCH_SIZE = 32\n",
+    "else:\n",
+    "    BATCH_SIZE = 16"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
     "## Summary\n",
     "In this notebook, we demostrate using [BERT](https://arxiv.org/abs/1810.04805) to perform Natural Language Inference (NLI). We use the [MultiNLI](https://www.nyu.edu/projects/bowman/multinli/) dataset and the task is to classify sentence pairs into three classes: contradiction, entailment, and neutral.   \n",
     "The figure below shows how [BERT](https://arxiv.org/abs/1810.04805) classifies sentence pairs. It concatenates the tokens in each sentence pairs and separates the sentences by the [SEP] token. A [CLS] token is prepended to the token list and used as the aggregate sequence representation for the classification task.\n",
@@ -31,8 +96,6 @@
     "from sklearn.metrics import classification_report\n",
     "from sklearn.preprocessing import LabelEncoder\n",
     "\n",
-    "import torch\n",
-    "\n",
     "nlp_path = os.path.abspath('../../')\n",
     "if nlp_path not in sys.path:\n",
     "    sys.path.insert(0, nlp_path)\n",
@@ -47,8 +110,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "## Configurations\n",
-    "Note that the running time shown in this notebook are on a Standard_NC24s_v3 Azure Deep Learning Virtual Machine with 4 NVIDIA Tesla V100 GPUs. If you want to run through the notebook quickly, you can change the `TRAIN_DATA_USED_PERCENT` to a small number, e.g. 0.01. "
+    "## Configurations"
    ]
   },
   {
@@ -57,8 +119,6 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "TRAIN_DATA_USED_PERCENT = 1.0\n",
-    "\n",
     "# set random seeds\n",
     "RANDOM_SEED = 42\n",
     "random.seed(RANDOM_SEED)\n",
@@ -73,11 +133,6 @@
     "TO_LOWER = True\n",
     "MAX_SEQ_LENGTH = 128\n",
     "\n",
-    "# training configurations\n",
-    "NUM_GPUS = 4\n",
-    "BATCH_SIZE = 32\n",
-    "NUM_EPOCHS = 2\n",
-    "\n",
     "# optimizer configurations\n",
     "LEARNING_RATE= 5e-5\n",
     "WARMUP_PROPORTION= 0.1\n",
@@ -250,8 +305,9 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "train_data_used_count = round(TRAIN_DATA_USED_PERCENT * train_df.shape[0])\n",
-    "train_df = train_df.loc[:train_data_used_count]"
+    "train_df = train_df.sample(frac=TRAIN_DATA_USED_PERCENT).reset_index(drop=True)\n",
+    "dev_df_matched = dev_df_matched.sample(frac=DEV_DATA_USED_PERCENT).reset_index(drop=True)\n",
+    "dev_df_mismatched = dev_df_mismatched.sample(frac=DEV_DATA_USED_PERCENT).reset_index(drop=True)"
    ]
   },
   {
@@ -664,7 +720,6 @@
     "                   input_mask=train_input_mask,\n",
     "                   token_type_ids=train_token_type_ids,\n",
     "                   labels=train_labels,\n",
-    "                   num_gpus=NUM_GPUS,\n",
     "                   num_epochs=NUM_EPOCHS,\n",
     "                   batch_size=BATCH_SIZE,\n",
     "                   lr=LEARNING_RATE,\n",
@@ -814,6 +869,7 @@
   }
  ],
  "metadata": {
+  "celltoolbar": "Tags",
   "kernelspec": {
    "display_name": "nlp_gpu",
    "language": "python",

diff --git a/tests/conftest.py b/tests/conftest.py
@@ -18,7 +18,8 @@
 from utils_nlp.models.bert.common import Language
 from utils_nlp.models.bert.common import Tokenizer as BERTTokenizer
 from utils_nlp.azureml import azureml_utils
-from azureml.core.webservice import AciWebservice, Webservice
+from azureml.core.webservice import Webservice
+
 
 @pytest.fixture(scope="module")
 def notebooks():
@@ -33,23 +34,32 @@ def notebooks():
             folder_notebooks, "embeddings", "embedding_trainer.ipynb"
         ),
         "bert_qa_trainer": os.path.join(
-            folder_notebooks, "question_answering", "pretrained-BERT-SQuAD-deep-dive-aml.ipynb"
+            folder_notebooks,
+            "question_answering",
+            "pretrained-BERT-SQuAD-deep-dive-aml.ipynb",
         ),
         "similarity_automl_local": os.path.join(
-            folder_notebooks, "sentence_similarity", "automl_local_deployment_aci.ipynb"
+            folder_notebooks,
+            "sentence_similarity",
+            "automl_local_deployment_aci.ipynb",
         ),
         "bidaf_deep_dive": os.path.join(
             folder_notebooks, "question_answering", "bidaf_aml_deep_dive.ipynb"
         ),
         "bidaf_quickstart": os.path.join(
-            folder_notebooks, "question_answering", "question_answering_system_bidaf_quickstart.ipynb"
+            folder_notebooks,
+            "question_answering",
+            "question_answering_system_bidaf_quickstart.ipynb",
         ),
         "bert_encoder": os.path.join(
             folder_notebooks, "sentence_similarity", "bert_encoder.ipynb"
         ),
         "gensen_local": os.path.join(
             folder_notebooks, "sentence_similarity", "gensen_local.ipynb"
         ),
+        "entailment_multinli_bert": os.path.join(
+            folder_notebooks, "entailment", "entailment_multinli_bert.ipynb"
+        ),
         "gensen_azureml": os.path.join(
             folder_notebooks, "sentence_similarity", "gensen_aml_deep_dive.ipynb"
         ),
@@ -175,14 +185,16 @@ def ner_test_data():
 
 
 def pytest_addoption(parser):
-    parser.addoption("--subscription_id",
-                        help="Azure Subscription Id to create resources in")
-    parser.addoption("--resource_group",
-                        help="Name of the resource group")
-    parser.addoption("--workspace_name",
-                        help="Name of Azure ML Workspace")
-    parser.addoption("--workspace_region",
-                        help="Azure region to create the workspace in")
+    parser.addoption(
+        "--subscription_id",
+        help="Azure Subscription Id to create resources in",
+    )
+    parser.addoption("--resource_group", help="Name of the resource group")
+    parser.addoption("--workspace_name", help="Name of Azure ML Workspace")
+    parser.addoption(
+        "--workspace_region", help="Azure region to create the workspace in"
+    )
+
 
 @pytest.fixture(scope="module")
 def subscription_id(request):
@@ -208,15 +220,15 @@ def workspace_region(request):
 def bert_english_tokenizer():
     return BERTTokenizer(language=Language.ENGLISHCASED, to_lower=False)
 
+
 @pytest.fixture(scope="module")
-def teardown_service(subscription_id,
-                     resource_group,
-                     workspace_name,
-                     workspace_region):
+def teardown_service(
+    subscription_id, resource_group, workspace_name, workspace_region
+):
 
     yield
 
-    #connect to workspace
+    # connect to workspace
     ws = azureml_utils.get_or_create_workspace(
         config_path="tests/ci",
         subscription_id=subscription_id,
@@ -225,8 +237,8 @@ def teardown_service(subscription_id,
         workspace_region=workspace_region,
     )
 
-    #connect to aci_service
+    # connect to aci_service
     aci_service = Webservice(workspace=ws, name="aci-test-service")
 
-    #delete aci_service
+    # delete aci_service
     aci_service.delete()
diff --git a/tests/integration/test_notebooks_entailment_multinli_bert.py b/tests/integration/test_notebooks_entailment_multinli_bert.py
@@ -0,0 +1,22 @@
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# Licensed under the MIT License.
+
+import pytest
+import papermill as pm
+from tests.notebooks_common import OUTPUT_NOTEBOOK, KERNEL_NAME
+
+
+@pytest.mark.gpu
+@pytest.mark.integration
+def test_entailment_multinli_bert(notebooks):
+    notebook_path = notebooks["entailment_multinli_bert"]
+    pm.execute_notebook(
+        notebook_path,
+        OUTPUT_NOTEBOOK,
+        parameters={
+            "TRAIN_DATA_USED_PERCENT": 0.001,
+            "DEV_DATA_USED_PERCENT": 0.01,
+            "NUM_EPOCHS": 1,
+        },
+        kernel_name=KERNEL_NAME,
+    )