microsoft · miguelgfierro · Jul 26, 2019 · Jul 19, 2019 · Jul 19, 2019 · Jul 25, 2019
diff --git a/scenarios/sentence_similarity/gensen_local.ipynb b/scenarios/sentence_similarity/gensen_local.ipynb
diff --git a/scenarios/sentence_similarity/gensen_train.py b/scenarios/sentence_similarity/gensen_train.py
@@ -134,10 +134,12 @@ def evaluate(
     save_dir,
     starting_time,
     model_state,
+    max_epoch,
 ):
     """ Function to validate the model.
 
     Args:
+        max_epoch(int): Limit training to specified number of epochs.
         model_state(dict): Saved model weights.
         config(dict): Config object.
         train_iterator(BufferedDataIterator): BufferedDataIterator object.
@@ -197,7 +199,7 @@ def evaluate(
         )
         if (monitor_epoch - min_val_loss_epoch) > config["training"][
             "stop_patience"
-        ]:
+        ] or (max_epoch is not None and monitor_epoch >= max_epoch):
             logging.info("Saving model ...")
             # Save the name with validation loss.
             torch.save(
@@ -269,10 +271,11 @@ def evaluate_nli(nli_iterator, model, batch_size, n_gpus):
     logging.info("******************************************************")
 
 
-def train(config, data_folder, learning_rate=0.0001):
+def train(config, data_folder, learning_rate=0.0001, max_epoch=None):
     """ Train the Gensen model.
 
     Args:
+        max_epoch(int): Limit training to specified number of epochs.
         config(dict): Loaded json file as a python object.
         data_folder(str): Path to the folder containing the data.
         learning_rate(float): Learning rate for the model.
@@ -588,6 +591,7 @@ def train(config, data_folder, learning_rate=0.0001):
                         save_dir=save_dir,
                         starting_time=start,
                         model_state=model_state,
+                        max_epoch=max_epoch,
                     )
                     if training_complete:
                         break
@@ -621,6 +625,12 @@ def read_config(json_file):
     parser.add_argument(
         "--learning_rate", type=float, default=0.0001, help="learning rate"
     )
+    parser.add_argument(
+        "--max_epoch",
+        type=int,
+        default=None,
+        help="Limit training to specified number of epochs.",
+    )
 
     args = parser.parse_args()
     data_path = args.data_folder

diff --git a/scenarios/sentence_similarity/gensen_wrapper.py b/scenarios/sentence_similarity/gensen_wrapper.py
@@ -3,11 +3,11 @@
 import json
 import os
 
-import numpy as np
-import pandas as pd
-
 from scenarios.sentence_similarity.gensen_train import train
-from utils_nlp.models.gensen.create_gensen_model import create_multiseq2seq_model
+from utils_nlp.eval.classification import compute_correlation_coefficients
+from utils_nlp.models.gensen.create_gensen_model import (
+    create_multiseq2seq_model,
+)
 from utils_nlp.models.gensen.gensen import GenSenSingle
 from utils_nlp.models.gensen.preprocess_utils import gensen_preprocess
 
@@ -30,12 +30,14 @@ def __init__(
         pretrained_embedding_path,
         learning_rate=0.0001,
         cache_dir=".",
+        max_epoch=None,
     ):
         self.learning_rate = learning_rate
         self.config_file = config_file
         self.cache_dir = cache_dir
         self.pretrained_embedding_path = pretrained_embedding_path
         self.model_name = "gensen_multiseq2seq"
+        self.max_epoch = max_epoch
 
         self._validate_params()
 
@@ -118,6 +120,7 @@ def fit(self, train_df, dev_df, test_df):
             data_folder=os.path.abspath(self.cache_dir),
             config=self.config,
             learning_rate=self.learning_rate,
+            max_epoch=self.max_epoch,
         )
 
         self._create_multiseq2seq_model()
@@ -132,13 +135,13 @@ def predict(self, sentences):
             sentences(list) : List of sentences.
 
         Returns
-            array: A pairwise cosine similarity for the sentences provided based on their gensen
-            vector representations.
+            pd.Dataframe: A pairwise cosine similarity for the sentences provided based on their
+            gensen vector representations.
 
         """
 
         # self.cache_dir = os.path.join(self.cache_dir, "clean/snli_1.0")
-        self._create_multiseq2seq_model()
+        # self._create_multiseq2seq_model()
 
         gensen_model = GenSenSingle(
             model_folder=os.path.join(
@@ -149,7 +152,7 @@ def predict(self, sentences):
         )
 
         reps_h, reps_h_t = gensen_model.get_representation(
-            sentences, pool="last", return_numpy=True
+            sentences, pool="last", return_numpy=True, tokenize=True
         )
 
-        return pd.DataFrame(np.corrcoef(reps_h_t))
+        return compute_correlation_coefficients(reps_h_t)
diff --git a/tests/conftest.py b/tests/conftest.py
@@ -31,7 +31,12 @@ def notebooks():
         "embedding_trainer": os.path.join(
             folder_notebooks, "embeddings", "embedding_trainer.ipynb"
         ),
-        "bert_encoder": os.path.join(folder_notebooks, "sentence_similarity", "bert_encoder.ipynb")
+        "bert_encoder": os.path.join(
+            folder_notebooks, "sentence_similarity", "bert_encoder.ipynb"
+        ),
+        "gensen_local": os.path.join(
+            folder_notebooks, "sentence_similarity", "gensen_local.ipynb"
+        ),
     }
     return paths
 

diff --git a/tests/integration/test_notebooks_sentence_similarity.py b/tests/integration/test_notebooks_sentence_similarity.py
@@ -5,11 +5,14 @@
 import pytest
 import papermill as pm
 import scrapbook as sb
-
+from azureml.core import Experiment
+from azureml.core.run import Run
+from utils_nlp.azureml.azureml_utils import get_or_create_workspace
 from tests.notebooks_common import OUTPUT_NOTEBOOK
 
-
+sys.path.append("../../")
 ABS_TOL = 0.2
+ABS_TOL_PEARSONS = 0.05
 
 
 @pytest.fixture(scope="module")
@@ -42,3 +45,65 @@ def test_similarity_embeddings_baseline_runs(notebooks, baseline_results):
     for key, value in baseline_results.items():
         assert results[key] == pytest.approx(value, abs=ABS_TOL)
 
+
+@pytest.mark.notebooks
+@pytest.mark.gpu
+def test_similarity_senteval_local_runs(notebooks, gensen_senteval_results):
+    notebook_path = notebooks["senteval_local"]
+    pm.execute_notebook(
+        notebook_path,
+        OUTPUT_NOTEBOOK,
+        parameters=dict(
+            PATH_TO_SENTEVAL="../SentEval", PATH_TO_GENSEN="../gensen"
+        ),
+    )
+    out = sb.read_notebook(OUTPUT_NOTEBOOK).scraps.data_dict["results"]
+    for key, val in gensen_senteval_results.items():
+        for task, result in val.items():
+            assert out[key][task] == result
+
+
+@pytest.mark.notebooks
+@pytest.mark.azureml
+def test_similarity_senteval_azureml_runs(notebooks, gensen_senteval_results):
+    notebook_path = notebooks["senteval_azureml"]
+    pm.execute_notebook(
+        notebook_path,
+        OUTPUT_NOTEBOOK,
+        parameters=dict(
+            PATH_TO_SENTEVAL="../SentEval",
+            PATH_TO_GENSEN="../gensen",
+            PATH_TO_SER="utils_nlp/eval/senteval.py",
+            AZUREML_VERBOSE=False,
+            config_path="tests/ci",
+        ),
+    )
+    result = sb.read_notebook(OUTPUT_NOTEBOOK).scraps.data_dict
+    ws = get_or_create_workspace(config_path="tests/ci")
+    experiment = Experiment(ws, name=result["experiment_name"])
+    run = Run(experiment, result["run_id"])
+    assert run.get_metrics()["STSBenchmark::pearson"] == pytest.approx(
+        gensen_senteval_results["pearson"]["STSBenchmark"], abs=ABS_TOL
+    )
+
+
+@pytest.mark.notebooks
+@pytest.mark.gpu
+def test_gensen_local(notebooks):
+    notebook_path = notebooks["gensen_local"]
+    pm.execute_notebook(
+        notebook_path,
+        OUTPUT_NOTEBOOK,
+        parameters=dict(
+            max_epoch=1,
+            config_filepath="../../scenarios/sentence_similarity/gensen_config.json",
+            base_data_path="../../data",
+        ),
+    )
+
+    results = sb.read_notebook(OUTPUT_NOTEBOOK).scraps.data_dict["results"]
+    expected = {"0": {"0": 1, "1": 0.95}, "1": {"0": 0.95, "1": 1}}
+
+    for key, value in expected.items():
+        for k, v in value.items():
+            assert results[key][k] == pytest.approx(v, abs=ABS_TOL_PEARSONS)
diff --git a/tests/unit/test_eval_classification.py b/tests/unit/test_eval_classification.py
@@ -0,0 +1,16 @@
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# Licensed under the MIT License.
+
+import numpy as np
+
+from utils_nlp.eval.classification import compute_correlation_coefficients
+
+
+def test_compute():
+    x = np.random.rand(2, 100)
+    df = compute_correlation_coefficients(x)
+    assert df.shape == (2, 2)
+
+    y = np.random.rand(2, 100)
+    df = compute_correlation_coefficients(x, y)
+    assert df.shape == (4, 4)
diff --git a/utils_nlp/eval/classification.py b/utils_nlp/eval/classification.py
@@ -8,6 +8,9 @@
     f1_score,
 )
 
+from numpy import corrcoef
+import pandas as pd
+
 
 def eval_classification(actual, predicted, round_decimals=4):
     """Returns common classification evaluation metrics.
@@ -32,3 +35,23 @@ def eval_classification(actual, predicted, round_decimals=4):
             f1_score(actual, predicted, average=None).round(round_decimals)
         ),
     }
+
+
+def compute_correlation_coefficients(x, y=None):
+    """
+    Compute Pearson product-moment correlation coefficients.
+
+    Args:
+        x: array_like
+            A 1-D or 2-D array containing multiple variables and observations.
+            Each row of `x` represents a variable, and each column a single
+            observation of all those variables.
+
+        y: array_like, optional
+            An additional set of variables and observations. `y` has the same
+            shape as `x`.
+
+    Returns:
+        pd.DataFrame : A pandas dataframe from the correlation coefficient matrix of the variables.
+    """
+    return pd.DataFrame(corrcoef(x, y))
diff --git a/utils_nlp/models/gensen/utils.py b/utils_nlp/models/gensen/utils.py
@@ -393,7 +393,7 @@ def __init__(
             test(torch.Tensor): Testing dataset.
             vocab_size(int): The size of the vocabulary.
             lowercase(bool): If lowercase the dataset.
-            vocab(list): The list of the vocabulary.
+            vocab(Union[bytes,str): The list of the vocabulary.
         """
         self.seed = seed
         self.train = train