# SentEval with AzureML
[SentEval](https://github.com/facebookresearch/SentEval) is a widely used benchmarking tool for evaluating general-purpose sentence embeddings. It provides a simple interface for evaluating your embeddings on up to 17 supported downstream tasks (such as sentiment classification, natural language inference, semantic similarity, etc.)

This notebook shows how to run SentEval for [Gensen](https://github.com/Maluuba/gensen) with the AzureML SDK, where
- the model weights are on AzureML Datastore. To download the pre-trained Gensen model, run `bash download_models.sh` from the gensen/data/models directory. 
- the embeddings are on AzureML Datastore. To download the pre-trained embeddings, run `bash glove2h5.sh` from the gensen/data/embedding directory.
- the data for the SentEval transfer tasks are on AzureML Datastore. To download these datasets, run `bash get_transfer_data.bash` from the SentEval/data/downstream directory.
- evaluation runs on the AzureML Workspace GPU Compute Target (no extra provisioning/config needed).

### Global Settings

In [None]:
import os
import sys
import shutil
import scrapbook as sb

import azureml.core
from azureml.core.workspace import Workspace

from azureml.core.compute import ComputeTarget, AmlCompute
from azureml.core.compute_target import ComputeTargetException

from azureml.core import Datastore
import azureml.data
from azureml.data.azure_storage_datastore import AzureFileDatastore

from azureml.train.dnn import PyTorch
from azureml.core.runconfig import MpiConfiguration
from azureml.core import Experiment
from azureml.widgets import RunDetails

sys.path.append("../../")
from utils_nlp.azureml.azureml_utils import get_or_create_workspace

In [None]:
PATH_TO_GENSEN = (
    "../../../gensen"
)  # Set this path to where you have cloned the gensen source code
PATH_TO_SENTEVAL = (
    "../../../SentEval"
)  # Set this path to where you have cloned the senteval source code
PATH_TO_SER = "../../utils_nlp/eval/senteval.py"
PATH_TO_AML = "../../utils_nlp/azureml/azureml_utils.py"
config_path = (
    "./.azureml"
)  # Path to the directory containing config.json with azureml credentials

AZUREML_VERBOSE = True
cluster_name = "eval-gpu"  # Name of AzureML Compute Target cluster
experiment_name = "senteval-pytorch-gensen"  # Name of the AzureML experiment

### Define the AzureML Workspace

In [None]:
ws = get_or_create_workspace(
    config_path=config_path,
    subscription_id="<SUBSCRIPTION_ID>",
    resource_group="<RESOURCE_GROUP>",
    workspace_name="<WORKSPACE_NAME>",
    workspace_region="<WORKSPACE_REGION>",
)

if AZUREML_VERBOSE:
    print("Workspace name: {}".format(ws.name))
    print("Resource group: {}".format(ws.resource_group))

Attach the gpu-enabled compute target, or create a new one if it doesn't already exist.

In [None]:
try:
    compute_target = ComputeTarget(workspace=ws, name=cluster_name)
    print("Found compute target: {}".format(cluster_name))
except ComputeTargetException:
    print("Creating new compute target: {}".format(cluster_name))
    compute_config = AmlCompute.provisioning_configuration(
        vm_size="STANDARD_NC6", max_nodes=4
    )
    compute_target = ComputeTarget.create(ws, cluster_name, compute_config)
    compute_target.wait_for_completion(show_output=True)

if AZUREML_VERBOSE:
    print(compute_target.get_status().serialize())

Define the datastore. Here we will use the default datastore and then upload our external dependencies. 

If your data is already on the cloud, you can register your resource on any Azure storage account as the datastore. (Currently, the list of supported Azure storage services that can be registered as datastores are Azure Blob Container, Azure File Share, Azure Data Lake, Azure Data Lake Gen2, Azure SQL Database, Azure PostgreSQL, and Databricks File System. Learn more about the Datastore module [here](https://docs.microsoft.com/en-us/python/api/azureml-core/azureml.core.datastore?view=azure-ml-py).)

In [None]:
ds = ws.get_default_datastore()
if AZUREML_VERBOSE:
    print("Default datastore: {}".format(ds.name))

In [None]:
# Upload the gensen dependency
ds.upload(
    src_dir=os.path.join(PATH_TO_GENSEN),
    target_path=os.path.join(experiment_name, "gensen_lib"),
    overwrite=False,
    show_progress=AZUREML_VERBOSE,
);

# Upload the senteval dependency
ds.upload(
    src_dir=os.path.join(PATH_TO_SENTEVAL),
    target_path=os.path.join(experiment_name, "senteval_lib"),
    overwrite=False,
    show_progress=AZUREML_VERBOSE,
);

# Upload the utils_nlp dependencies
ds.upload_files(
    files=[
        os.path.join(os.path.commonprefix([PATH_TO_SER, PATH_TO_AML]), "__init__.py"),
    ],
    target_path=os.path.join(experiment_name, "utils_nlp"),
    overwrite=True,
    show_progress=AZUREML_VERBOSE,
);

ds.upload_files(
    files=[PATH_TO_SER],
    target_path=os.path.join(experiment_name, "utils_nlp/eval"),
    overwrite=True,
    show_progress=AZUREML_VERBOSE,
);

ds.upload_files(
    files=[PATH_TO_AML],
    target_path=os.path.join(experiment_name, "utils_nlp/azureml"),
    overwrite=True,
    show_progress=AZUREML_VERBOSE,
);

Note that after the upload is complete, you can safely delete the dependencies from your local machine to free up some memory.

### Create the evaluation script

In [None]:
src_dir = os.path.join(os.getcwd(), experiment_name)
os.makedirs(src_dir, exist_ok=True)

In [None]:
%%writefile $src_dir/evaluate.py
import os
import sys
import argparse
import torch
import pandas as pd
from azureml.core.run import Run

if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument("--ds_gensen", type=str, dest="ds_gensen")
    parser.add_argument("--ds_senteval", type=str, dest="ds_senteval")
    parser.add_argument("--ds_utils", type=str, dest="ds_utils")
    parser.add_argument("--ds_utils_azureml", type=str, dest="ds_utils_azureml")
    args = parser.parse_args()

    # Import the dependencies
    sys.path.append(args.ds_gensen)
    from gensen import GenSen, GenSenSingle

    sys.path.append(args.ds_utils)
    from eval.senteval import SentEvalRunner
    sys.path.append(args.ds_utils_azureml)
    from azureml_utils import log_metrics_table
    
    
    TRANSFER_TASKS = ["STSBenchmark", "STS12", "STS13", "STS14", "STS15", "STS16"]

    # Define the model
    model_params = {}
    model_params["folder_path"] = os.path.join(args.ds_gensen, "data/models")
    model_params["prefix"] = "nli_large_bothskip"
    model_params["pretrain"] = os.path.join(
        args.ds_gensen, "data/embedding/glove.840B.300d.h5"
    )
    model_params["cuda"] = torch.cuda.is_available()

    gensen = GenSenSingle(
        model_folder=model_params["folder_path"],
        filename_prefix=model_params["prefix"],
        pretrained_emb=model_params["pretrain"],
        cuda=model_params["cuda"],
    )

    # Define SentEval Runner
    ser = SentEvalRunner(path_to_senteval=args.ds_senteval)
    ser.set_transfer_data_path(relative_path="data")
    ser.set_transfer_tasks(TRANSFER_TASKS)
    ser.set_model(gensen)
    ser.set_params({"usepytorch": True, "kfold": 10})

    # Define the batcher and prepare functions for SentEval
    def prepare(params, samples):
        vocab = set()
        for sample in samples:
            if params.current_task != "TREC":
                sample = " ".join(sample).lower().split()
            else:
                sample = " ".join(sample).split()
            for word in sample:
                if word not in vocab:
                    vocab.add(word)

        vocab.add("<s>")
        vocab.add("<pad>")
        vocab.add("<unk>")
        vocab.add("</s>")
        # Optional vocab expansion
        # params["model"].vocab_expansion(vocab)

    def batcher(params, batch):
        # batch contains list of words
        max_tasks = ["MR", "CR", "SUBJ", "MPQA", "ImageCaptionRetrieval"]
        if params.current_task in max_tasks:
            strategy = "max"
        else:
            strategy = "last"

        sentences = [" ".join(s).lower() for s in batch]
        _, embeddings = params["model"].get_representation(
            sentences, pool=strategy, return_numpy=True
        )
        return embeddings

    # Run SentEval
    results = ser.run(batcher, prepare)

    # Log results as scalars in AzureML
    eval_metrics = ser.log_mean(results, selected_metrics=["pearson", "spearman"])
    log_metrics_table(eval_metrics, Run.get_context(), as_scalar=True)

### Create a Pytorch Estimator to submit the evaluation script to the compute target

In [None]:
est = PyTorch(
    source_directory=src_dir,
    script_params={
        "--ds_gensen": ds.path("{}/gensen_lib".format(experiment_name)).as_mount(),
        "--ds_senteval": ds.path("{}/senteval_lib".format(experiment_name)).as_mount(),
        "--ds_utils": ds.path("{}/utils_nlp".format(experiment_name)).as_mount(),
        "--ds_utils_azureml": ds.path("{}/utils_nlp/azureml".format(experiment_name)).as_mount(),
    },
    compute_target=compute_target,
    entry_script="evaluate.py",
    node_count=4,
    process_count_per_node=1,
    distributed_training=MpiConfiguration(),
    use_gpu=True,
    framework_version="1.0",
    conda_packages=["scikit-learn==0.20.3", "h5py", "nltk"],
    pip_packages=["pandas"],
)

### Run Evaluation

In [None]:
experiment = Experiment(ws, name=experiment_name)
run = experiment.submit(est)

Visualize the run via a Jupyter widget. Alternatively, block until the script has completed.

In [None]:
RunDetails(run).show()
# run.wait_for_completion(show_output=AZUREML_VERBOSE)

In [None]:
# Persist properties of the run so we can access the logged metrics later
sb.glue("run_id", run.get_details()["runId"])
sb.glue("experiment_name", experiment_name)
sb.glue("ws_config", config_path)

In [None]:
# Cleanup
shutil.rmtree(src_dir)