In [None]:
!pip install datasets
!pip install tensorflow-hub
!pip install sentence-transformers

In [None]:
from datasets import load_metric, load_dataset

metric = load_metric("glue", "mrpc")
mrpc = load_dataset("glue", "mrpc")

In [None]:
metric = load_metric("glue", "stsb")
metric.compute(predictions=[1, 2, 3], references=[5, 2, 2])

In [None]:
from datasets import load_metric, load_dataset

stsb_metric = load_metric("glue", "stsb")
stsb = load_dataset("glue", "stsb")

In [None]:
import tensorflow_hub as hub

use_model = hub.load("https://tfhub.dev/google/universal-sentence-encoder/4")
from sentence_transformers import SentenceTransformer

distilroberta = SentenceTransformer("stsb-distilroberta-base-v2")

In [None]:
import tensorflow as tf
import math


def use_sts_benchmark(batch):
    sts_encode1 = tf.nn.l2_normalize(use_model(tf.constant(batch["sentence1"])), axis=1)
    sts_encode2 = tf.nn.l2_normalize(use_model(tf.constant(batch["sentence2"])), axis=1)
    cosine_similarities = tf.reduce_sum(tf.multiply(sts_encode1, sts_encode2), axis=1)
    clip_cosine_similarities = tf.clip_by_value(cosine_similarities, -1.0, 1.0)
    scores = 1.0 - tf.acos(clip_cosine_similarities) / math.pi
    return scores

In [None]:
def roberta_sts_benchmark(batch):
    sts_encode1 = tf.nn.l2_normalize(distilroberta.encode(batch["sentence1"]), axis=1)
    sts_encode2 = tf.nn.l2_normalize(distilroberta.encode(batch["sentence2"]), axis=1)
    cosine_similarities = tf.reduce_sum(tf.multiply(sts_encode1, sts_encode2), axis=1)
    clip_cosine_similarities = tf.clip_by_value(cosine_similarities, -1.0, 1.0)
    scores = 1.0 - tf.acos(clip_cosine_similarities) / math.pi
    return scores

In [None]:
use_results = use_sts_benchmark(stsb["validation"])
distilroberta_results = roberta_sts_benchmark(stsb["validation"])

In [None]:
references = [item["label"] for item in stsb["validation"]]

In [None]:
results = {
    "USE": stsb_metric.compute(predictions=use_results, references=references),
    "DistillRoberta": stsb_metric.compute(
        predictions=distilroberta_results, references=references
    ),
}

In [None]:
import pandas as pd

pd.DataFrame(results)