Changes for code review comments.

1. Added compute_correlation_coeff method to utils and separated it from the predict method to ensure single responsbility. 2. Added tests accordingly. 3. In the notebook added a scrap to track preditions and assert it in tests. 4. Also added extra documentation to explain what the predict method is doing. 5. Minor fix to stop train at max_epoch.
microsoft · Jul 25, 2019 · c6ab656 · c6ab656
1 parent ddf810f
commit c6ab656
Show file tree

Hide file tree

Showing 6 changed files with 142 additions and 49 deletions.
diff --git a/scenarios/sentence_similarity/gensen_local.ipynb b/scenarios/sentence_similarity/gensen_local.ipynb
@@ -58,7 +58,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 3,
+   "execution_count": 11,
    "metadata": {
     "pycharm": {
      "name": "#%%\n"
@@ -84,16 +84,16 @@
     "from utils_nlp.dataset.preprocess import to_lowercase, to_nltk_tokens\n",
     "from utils_nlp.dataset import snli, preprocess\n",
     "from scenarios.sentence_similarity.gensen_wrapper import GenSenClassifier\n",
-    "from utils_nlp.models.pretrained_embeddings.glove import download_and_extract \n",
+    "from utils_nlp.models.pretrained_embeddings.glove import download_and_extract\n",
+    "import scrapbook as sb\n",
     "\n",
     "\n",
-    "print(\"System version: {}\".format(sys.version))\n",
-    "BASE_DATA_PATH = '../../data'"
+    "print(\"System version: {}\".format(sys.version))"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 12,
    "metadata": {
     "tags": [
      "parameters"
@@ -102,7 +102,9 @@
    "outputs": [],
    "source": [
     "max_epoch = None\n",
-    "config_filepath = 'gensen_config.json'"
+    "config_filepath = 'gensen_config.json'\n",
+    "base_data_path = '../../data'\n",
+    "nrows = None"
    ]
   },
   {
@@ -142,7 +144,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 4,
+   "execution_count": 3,
    "metadata": {
     "pycharm": {
      "name": "#%%\n"
@@ -327,15 +329,15 @@
        "4  2267923837.jpg#2r1e     entailment    NaN    NaN    NaN    NaN  "
       ]
      },
-     "execution_count": 4,
+     "execution_count": 3,
      "metadata": {},
      "output_type": "execute_result"
     }
    ],
    "source": [
-    "train = snli.load_pandas_df(BASE_DATA_PATH, file_split=\"train\")\n",
-    "dev = snli.load_pandas_df(BASE_DATA_PATH, file_split=\"dev\")\n",
-    "test = snli.load_pandas_df(BASE_DATA_PATH, file_split=\"test\")\n",
+    "train = snli.load_pandas_df(base_data_path, file_split=\"train\", nrows=nrows)\n",
+    "dev = snli.load_pandas_df(base_data_path, file_split=\"dev\", nrows=nrows)\n",
+    "test = snli.load_pandas_df(base_data_path, file_split=\"test\", nrows=nrows)\n",
     "\n",
     "train.head()"
    ]
@@ -351,7 +353,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 5,
+   "execution_count": 4,
    "metadata": {
     "pycharm": {
      "name": "#%%\n"
@@ -380,7 +382,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 6,
+   "execution_count": 5,
    "metadata": {
     "pycharm": {
      "name": "#%%\n"
@@ -490,7 +492,7 @@
        "4  [two, kids, at, a, ballgame, wash, their, hand...  "
       ]
      },
-     "execution_count": 6,
+     "execution_count": 5,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -527,7 +529,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 7,
+   "execution_count": 14,
    "metadata": {
     "pycharm": {
      "name": "#%%\n"
@@ -543,7 +545,7 @@
     }
    ],
    "source": [
-    "pretrained_embedding_path = download_and_extract(BASE_DATA_PATH)"
+    "pretrained_embedding_path = download_and_extract(base_data_path)"
    ]
   },
   {
@@ -555,27 +557,18 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 9,
+   "execution_count": 15,
    "metadata": {
     "pycharm": {
      "name": "#%%\n"
     }
    },
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "The autoreload extension is already loaded. To reload it, use:\n",
-      "  %reload_ext autoreload\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "clf = GenSenClassifier(config_file = config_filepath, \n",
     "                       pretrained_embedding_path = pretrained_embedding_path,\n",
     "                       learning_rate = 0.0001, \n",
-    "                       cache_dir=BASE_DATA_PATH,\n",
+    "                       cache_dir=base_data_path,\n",
     "                      max_epoch=max_epoch)"
    ]
   },
@@ -588,13 +581,46 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 8,
    "metadata": {
     "pycharm": {
      "name": "#%%\n"
     }
    },
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/data/anaconda/envs/nlp_gpu/lib/python3.6/site-packages/torch/nn/modules/rnn.py:46: UserWarning: dropout option adds dropout after all but last recurrent layer, so non-zero dropout expects num_layers greater than 1, but got dropout=0.8 and num_layers=1\n",
+      "  \"num_layers={}\".format(dropout, num_layers))\n",
+      "../../scenarios/sentence_similarity/gensen_train.py:431: UserWarning: torch.nn.utils.clip_grad_norm is now deprecated in favor of torch.nn.utils.clip_grad_norm_.\n",
+      "  torch.nn.utils.clip_grad_norm(model.parameters(), 1.0)\n",
+      "../../utils_nlp/models/gensen/utils.py:364: UserWarning: volatile was removed and now has no effect. Use `with torch.no_grad():` instead.\n",
+      "  Variable(torch.LongTensor(sorted_src_lens), volatile=True)\n",
+      "/data/anaconda/envs/nlp_gpu/lib/python3.6/site-packages/torch/nn/functional.py:1332: UserWarning: nn.functional.sigmoid is deprecated. Use torch.sigmoid instead.\n",
+      "  warnings.warn(\"nn.functional.sigmoid is deprecated. Use torch.sigmoid instead.\")\n",
+      "/data/anaconda/envs/nlp_gpu/lib/python3.6/site-packages/torch/nn/functional.py:1320: UserWarning: nn.functional.tanh is deprecated. Use torch.tanh instead.\n",
+      "  warnings.warn(\"nn.functional.tanh is deprecated. Use torch.tanh instead.\")\n",
+      "../../scenarios/sentence_similarity/gensen_train.py:523: UserWarning: torch.nn.utils.clip_grad_norm is now deprecated in favor of torch.nn.utils.clip_grad_norm_.\n",
+      "  torch.nn.utils.clip_grad_norm(model.parameters(), 1.0)\n",
+      "/data/anaconda/envs/nlp_gpu/lib/python3.6/site-packages/horovod/torch/__init__.py:163: UserWarning: optimizer.step(synchronize=True) called after optimizer.synchronize(). This can cause training slowdown. You may want to consider using optimizer.step(synchronize=False) if you use optimizer.synchronize() in your code.\n",
+      "  warnings.warn(\"optimizer.step(synchronize=True) called after \"\n",
+      "../../scenarios/sentence_similarity/gensen_train.py:243: UserWarning: Implicit dimension choice for softmax has been deprecated. Change the call to include dim=X as an argument.\n",
+      "  f.softmax(class_logits).data.cpu().numpy().argmax(axis=-1)\n",
+      "../../scenarios/sentence_similarity/gensen_train.py:262: UserWarning: Implicit dimension choice for softmax has been deprecated. Change the call to include dim=X as an argument.\n",
+      "  f.softmax(class_logits).data.cpu().numpy().argmax(axis=-1)\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "CPU times: user 1h 19min 28s, sys: 22min 1s, total: 1h 41min 30s\n",
+      "Wall time: 1h 41min 22s\n"
+     ]
+    }
+   ],
    "source": [
     "%%time\n",
     "clf.fit(train, dev, test)"
@@ -604,27 +630,43 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "### 2.3 Predict"
+    "### 2.3 Predict\n",
+    "\n",
+    "In the predict method we perform Pearson's Correlation computation [\\[2\\]](#References) on the outputs of the model. The predictions of the model can be further improved by hyperparameter tuning which we walk through in the other example [here](gensen_aml_deep_dive.ipynb). "
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 16,
    "metadata": {
     "pycharm": {
      "name": "#%%\n"
     }
    },
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "******** Similarity Score for sentences **************\n",
+      "          0         1\n",
+      "0  1.000000  0.966793\n",
+      "1  0.966793  1.000000\n"
+     ]
+    }
+   ],
    "source": [
     "sentences = [\n",
-    "        'the quick brown fox jumped over the lazy dog',\n",
-    "        'bright sunshiny day tomorrow.'\n",
+    "        'The sky is blue and beautiful',\n",
+    "        'Love this blue and beautiful sky!'\n",
     "    ]\n",
     "\n",
     "results = clf.predict(sentences)\n",
     "print(\"******** Similarity Score for sentences **************\")\n",
-    "print(results)"
+    "print(results)\n",
+    "\n",
+    "# Record results with scrapbook for tests\n",
+    "sb.glue(\"results\", results.to_dict())"
    ]
   },
   {
@@ -634,16 +676,19 @@
     "## References\n",
     "\n",
     "1. Subramanian, Sandeep and Trischler, Adam and Bengio, Yoshua and Pal, Christopher J, [*Learning general purpose distributed sentence representations via large scale multi-task learning*](https://arxiv.org/abs/1804.00079), ICLR, 2018.\n",
-    "3. Semantic textual similarity. url: http://nlpprogress.com/english/semantic_textual_similarity.html"
+    "2. Pearson's Correlation Coefficient. url: https://en.wikipedia.org/wiki/Pearson_correlation_coefficient\n",
+    "3. Semantic textual similarity. url: http://nlpprogress.com/english/semantic_textual_similarity.html\n",
+    "4. Minh-Thang Luong, Quoc V Le, Ilya Sutskever, Oriol Vinyals, and Lukasz Kaiser. [*Multi-task sequence to sequence learning*](https://arxiv.org/abs/1511.06114), 2015.\n",
+    "5. Bryan McCann, James Bradbury, Caiming Xiong, and Richard Socher. [*Learned in translation: Contextualized word vectors](https://arxiv.org/abs/1708.00107), 2017. "
    ]
   }
  ],
  "metadata": {
   "celltoolbar": "Tags",
   "kernelspec": {
-   "display_name": "Python [conda env:nlp_gpu]",
+   "display_name": "Python (nlp_gpu)",
    "language": "python",
-   "name": "conda-env-nlp_gpu-py"
+   "name": "nlp_gpu"
   },
   "language_info": {
    "codemirror_mode": {

diff --git a/scenarios/sentence_similarity/gensen_train.py b/scenarios/sentence_similarity/gensen_train.py
@@ -199,7 +199,7 @@ def evaluate(
         )
         if (monitor_epoch - min_val_loss_epoch) > config["training"][
             "stop_patience"
-        ] or (max_epoch is not None and monitor_epoch > max_epoch):
+        ] or (max_epoch is not None and monitor_epoch >= max_epoch):
             logging.info("Saving model ...")
             # Save the name with validation loss.
             torch.save(

diff --git a/scenarios/sentence_similarity/gensen_wrapper.py b/scenarios/sentence_similarity/gensen_wrapper.py
@@ -3,11 +3,11 @@
 import json
 import os
 
-import numpy as np
-import pandas as pd
-
 from scenarios.sentence_similarity.gensen_train import train
-from utils_nlp.models.gensen.create_gensen_model import create_multiseq2seq_model
+from utils_nlp.eval.classification import compute_correlation_coefficients
+from utils_nlp.models.gensen.create_gensen_model import (
+    create_multiseq2seq_model,
+)
 from utils_nlp.models.gensen.gensen import GenSenSingle
 from utils_nlp.models.gensen.preprocess_utils import gensen_preprocess
 
@@ -135,13 +135,13 @@ def predict(self, sentences):
             sentences(list) : List of sentences.
 
         Returns
-            array: A pairwise cosine similarity for the sentences provided based on their gensen
-            vector representations.
+            pd.Dataframe: A pairwise cosine similarity for the sentences provided based on their
+            gensen vector representations.
 
         """
 
         # self.cache_dir = os.path.join(self.cache_dir, "clean/snli_1.0")
-        self._create_multiseq2seq_model()
+        # self._create_multiseq2seq_model()
 
         gensen_model = GenSenSingle(
             model_folder=os.path.join(
@@ -152,7 +152,7 @@ def predict(self, sentences):
         )
 
         reps_h, reps_h_t = gensen_model.get_representation(
-            sentences, pool="last", return_numpy=True
+            sentences, pool="last", return_numpy=True, tokenize=True
         )
 
-        return pd.DataFrame(np.corrcoef(reps_h_t))
+        return compute_correlation_coefficients(reps_h_t)
diff --git a/tests/integration/test_notebooks_sentence_similarity.py b/tests/integration/test_notebooks_sentence_similarity.py
@@ -12,6 +12,7 @@
 
 sys.path.append("../../")
 ABS_TOL = 0.2
+ABS_TOL_PEARSONS = 0.05
 
 
 @pytest.fixture(scope="module")
@@ -96,5 +97,13 @@ def test_gensen_local(notebooks):
         parameters=dict(
             max_epoch=1,
             config_filepath="../../scenarios/sentence_similarity/gensen_config.json",
+            base_data_path="../../data",
         ),
     )
+
+    results = sb.read_notebook(OUTPUT_NOTEBOOK).scraps.data_dict["results"]
+    expected = {"0": {"0": 1, "1": 0.95}, "1": {"0": 0.95, "1": 1}}
+
+    for key, value in expected.items():
+        for k, v in value.items():
+            assert results[key][k] == pytest.approx(v, abs=ABS_TOL_PEARSONS)
diff --git a/tests/unit/test_eval_classification.py b/tests/unit/test_eval_classification.py
@@ -0,0 +1,16 @@
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# Licensed under the MIT License.
+
+import numpy as np
+
+from utils_nlp.eval.classification import compute_correlation_coefficients
+
+
+def test_compute():
+    x = np.random.rand(2, 100)
+    df = compute_correlation_coefficients(x)
+    assert df.shape == (2, 2)
+
+    y = np.random.rand(2, 100)
+    df = compute_correlation_coefficients(x, y)
+    assert df.shape == (4, 4)
diff --git a/utils_nlp/eval/classification.py b/utils_nlp/eval/classification.py
@@ -8,6 +8,9 @@
     f1_score,
 )
 
+from numpy import corrcoef
+import pandas as pd
+
 
 def eval_classification(actual, predicted, round_decimals=4):
     """Returns common classification evaluation metrics.
@@ -32,3 +35,23 @@ def eval_classification(actual, predicted, round_decimals=4):
             f1_score(actual, predicted, average=None).round(round_decimals)
         ),
     }
+
+
+def compute_correlation_coefficients(x, y=None):
+    """
+    Compute Pearson product-moment correlation coefficients.
+
+    Args:
+        x: array_like
+            A 1-D or 2-D array containing multiple variables and observations.
+            Each row of `x` represents a variable, and each column a single
+            observation of all those variables.
+
+        y: array_like, optional
+            An additional set of variables and observations. `y` has the same
+            shape as `x`.
+
+    Returns:
+        pd.DataFrame : A pandas dataframe from the correlation coefficient matrix of the variables.
+    """
+    return pd.DataFrame(corrcoef(x, y))