# Deps

In [None]:
!pip install checklist-plus --upgrade



In [None]:
!python -m spacy download en_core_web_sm


Collecting en-core-web-sm==3.7.1
  Downloading https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.7.1/en_core_web_sm-3.7.1-py3-none-any.whl (12.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m12.8/12.8 MB[0m [31m13.0 MB/s[0m  [33m0:00:01[0m
[38;5;2m✔ Download and installation successful[0m
You can now load the package via spacy.load('en_core_web_sm')
[38;5;3m⚠ Restart to reload dependencies[0m
If you are in a Jupyter or Colab notebook, you may need to restart Python in
order to load all the package's dependencies. You can do this by selecting the
'Restart kernel' or 'Restart runtime' option.


# Load our Example Embedding Model

In [None]:
# prompt: import bert base embedding model from hugging face

from transformers import BertModel, BertTokenizer
import torch

# Load pre-trained model and tokenizer
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
model = BertModel.from_pretrained('bert-base-uncased')

# Example usage:
text = "This is a sample sentence."
encoded_input = tokenizer(text, return_tensors='pt')
output = model(**encoded_input)

# The output is a dictionary containing 'last_hidden_state' and 'pooler_output'
last_hidden_states = output.last_hidden_state
pooler_output = output.pooler_output

print("Last hidden states shape:", last_hidden_states.shape)
print("Pooler output shape:", pooler_output.shape)


2025-09-12 04:19:09.952817: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:485] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2025-09-12 04:19:09.981678: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:8454] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2025-09-12 04:19:09.991349: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1452] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2025-09-12 04:19:10.021137: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


AttributeError: 'MessageFactory' object has no attribute 'GetPrototype'

AttributeError: 'MessageFactory' object has no attribute 'GetPrototype'

AttributeError: 'MessageFactory' object has no attribute 'GetPrototype'

AttributeError: 'MessageFactory' object has no attribute 'GetPrototype'



AttributeError: 'MessageFactory' object has no attribute 'GetPrototype'

Last hidden states shape: torch.Size([1, 8, 768])
Pooler output shape: torch.Size([1, 768])


In [None]:
# prompt: now compuse cosine similarity between three texts (1 relevant pair and 1 irrelevant text)

from sklearn.metrics.pairwise import cosine_similarity
from transformers import BertModel, BertTokenizer
import torch

# Load pre-trained model and tokenizer
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
model = BertModel.from_pretrained('bert-base-uncased')

def get_embedding(text):
  encoded_input = tokenizer(text, return_tensors='pt', padding=True, truncation=True)
  with torch.no_grad():
    output = model(**encoded_input)
  # Use the CLS token embedding as the sentence embedding
  sentence_embedding = output.last_hidden_state[:, 0, :].numpy()
  return sentence_embedding

# Define the texts
text1 = "The weather is nice today."
text2 = "It's a beautiful day outside."
text3 = "The stock market crashed yesterday."

# Get embeddings for each text
embedding1 = get_embedding(text1)
embedding2 = get_embedding(text2)
embedding3 = get_embedding(text3)

# Calculate cosine similarity
similarity_1_2 = cosine_similarity(embedding1, embedding2)[0][0]
similarity_1_3 = cosine_similarity(embedding1, embedding3)[0][0]
similarity_2_3 = cosine_similarity(embedding2, embedding3)[0][0]

print(f"Cosine similarity between text1 and text2: {similarity_1_2:.4f}")
print(f"Cosine similarity between text1 and text3: {similarity_1_3:.4f}")
print(f"Cosine similarity between text2 and text3: {similarity_2_3:.4f}")

Cosine similarity between text1 and text2: 0.9522
Cosine similarity between text1 and text3: 0.8442
Cosine similarity between text2 and text3: 0.8108


# Set OPENAI API KEY

In [None]:
import os
os.environ['OPENAI_API_KEY'] = "your-api-key"


In [None]:
import checklist_plus
from checklist_plus.editor import Editor
from checklist_plus.perturb import LLMPerturb

In [None]:
llm_editor = Editor(
             use_llm=True,
            model_name='gpt-4o-mini')

## Generate Examples data

In [None]:
ret = llm_editor.template('The football game was very good, I especially liked {mask}', context="different experiences in football games", remove_duplicates=True, n_completions=100)
original_texts = ret.data

In [None]:
original_texts = list(set(original_texts))
print(original_texts[:5])

["The football game was very good, I especially liked team's performance", "The football game was very good, I especially liked player's positioning", "The football game was very good, I especially liked fan's chants", "The football game was very good, I especially liked opponent's challenge", "The football game was very good, I especially liked defender's clearance"]


In [None]:
len(original_texts)

100

## Paraphrase Example Data

In [None]:
ret = llm_editor.paraphrase_llm(original_texts, n_paraphrases=1, length_preference='similar')

In [None]:
paraphrased_texts = ret.data

In [None]:
assert len(paraphrased_texts) == len(original_texts)

In [None]:
paraphrased_texts[:5]

['The soccer match was excellent, and I particularly enjoyed how well the team played.',
 'The soccer match was quite impressive; I particularly appreciated how well the players were positioned.',
 "The match was excellent, and I particularly enjoyed the supporters' chants.",
 'The match was excellent, and I particularly enjoyed the challenge posed by the opposing team.',
 'The soccer match was quite impressive, and I particularly appreciated the way the defender cleared the ball.']

## Negate Example Data

In [None]:
perturb = LLMPerturb()

In [None]:
ret = perturb.add_negation_llm(original_texts, n_variations=1)



In [None]:
negated_texts = [x[0] for x in ret]

In [None]:
negated_texts[:5]

["The football game was not good; I did not like the team's performance at all.",
 "The football game was not very good, and I did not particularly like the player's positioning.",
 "The football game was not very good, and I did not especially like the fans' chants.",
 "The football game was not very good; I did not especially like the opponent's challenge.",
 "The football game was not very good; I did not especially like the defender's clearance."]

# Perform Simple INV test

In [None]:
from checklist_plus.test_types import MFT, INV, DIR
from checklist_plus.expect import Expect

In [None]:
# expect original text is more similar to the paraphrased one
def similar_paraphrase(x, pred, conf, label=None, meta=None):
    return pred == 0
expect_fn = Expect.single(similar_paraphrase)

In [None]:
test = MFT(list(zip(original_texts, paraphrased_texts, negated_texts)), expect=expect_fn, name='Simple negation',
           capability='Negation', description='Very simple negations.')

In [None]:
import numpy as np
def get_cosine_similarities(data):
  similarities = []
  for original, paraphrased, negated in data:
    original_embedding = get_embedding(original)
    paraphrased_embedding = get_embedding(paraphrased)
    negated_embedding = get_embedding(negated)

    sim_paraphrased = cosine_similarity(original_embedding, paraphrased_embedding)[0][0]
    sim_negated = cosine_similarity(original_embedding, negated_embedding)[0][0]

    similarities.append([sim_paraphrased, sim_negated])
  similarities = np.array(similarities)
  return np.argmax(similarities, axis=-1), similarities

cosine_sims = get_cosine_similarities(list(zip(original_texts, paraphrased_texts, negated_texts))[:5])

In [None]:
print(cosine_sims[:5])


(array([1, 1, 1, 1, 1]), array([[0.9101763 , 0.9135145 ],
       [0.90789115, 0.91039443],
       [0.90674376, 0.9305458 ],
       [0.8987609 , 0.933882  ],
       [0.91462123, 0.94341195]], dtype=float32))


In [None]:
test.run(get_cosine_similarities)

Predicting 100 examples


In [None]:
# bert-base-uncased is not sensitive to negations
test.summary()

Test cases:      100
Fails (rate):    87 (87.0%)

Example fails:
0.9 ("The football game was very good, I especially liked player's fitness", "The soccer match was excellent; I particularly appreciated the athletes' conditioning.", "The football game was not very good; I did not especially like the player's fitness.")
----
0.9 ("The football game was very good, I especially liked midfielder's vision", "The soccer match was excellent; I particularly appreciated the midfielder's insight.", "The football game was not very good; I did not especially like the midfielder's vision.")
----
0.9 ("The football game was very good, I especially liked team's unity", "The soccer match was excellent, and I particularly appreciated the team's camaraderie.", "The football game was not very good; I did not especially like the team's unity.")
----
