In [43]:
import torch
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM

# Load the tokenizer and model
tokenizer = AutoTokenizer.from_pretrained("google/flan-t5-base")
model = AutoModelForSeq2SeqLM.from_pretrained("google/flan-t5-base")

def get_score(model, tokenizer, input_ids):
    pos_ids = tokenizer('Yes', return_tensors='pt').input_ids
    neg_ids = tokenizer('No', return_tensors='pt').input_ids
    pos_id = pos_ids[0, 0]
    neg_id = neg_ids[0, 0]

    logits = model(input_ids, decoder_input_ids=torch.zeros((input_ids.size(0), 1), dtype=torch.long).to(model.device)).logits
    pos_logits = logits[:, 0, pos_id]
    neg_logits = logits[:, 0, neg_id]
    posneg_logits = torch.cat([pos_logits.unsqueeze(-1), neg_logits.unsqueeze(-1)], dim=1)
    scores = torch.nn.functional.softmax(posneg_logits, dim=1)[:, 0]

    return scores

entailment_pairs = [
    ("In his speech, the President emphasized the need for bipartisan cooperation to pass significant climate change legislation.", "The President highlighted the importance of working together across party lines to address climate change.", True),
    ("The latest economic report shows a significant increase in job growth, especially in the technology and healthcare sectors.", "Job growth has risen notably, particularly in technology and healthcare according to the recent economic report.", True),
    ("The Supreme Court ruled that the new voting regulations are unconstitutional, citing violations of the Fourteenth Amendment.", "The Supreme Court found the new voting laws unconstitutional due to conflicts with the Fourteenth Amendment.", True),
    ("In his speech, the President emphasized the need for bipartisan cooperation to pass significant climate change legislation.", "The President announced new healthcare policies aimed at reducing drug prices.", False),
    ("The latest economic report shows a significant increase in job growth, especially in the technology and healthcare sectors.", "The economic report indicates a downturn in the technology and healthcare sectors.", False),
    ("The Supreme Court ruled that the new voting regulations are unconstitutional, citing violations of the Fourteenth Amendment.", "The Supreme Court upheld the new voting regulations as constitutional.", False)
]

# Difficult question-answer pairs
qa_pairs = [
    ("What did the President emphasize in his speech regarding climate change legislation?", "The President emphasized the need for bipartisan cooperation to pass significant climate change legislation.", True),
    ("What did the President emphasize in his speech regarding climate change legislation?", "He emphasized it was not so good for you.", False),
    ("Which sectors showed significant job growth in the latest economic report?", "The technology and healthcare sectors showed significant job growth in the latest economic report.", True),
    ("What was the Supreme Court's ruling on the new voting regulations?", "The Supreme Court ruled that the new voting regulations are unconstitutional, citing violations of the Fourteenth Amendment.", True),
    ("What did the President emphasize in his speech regarding climate change legislation?", "The President announced new healthcare policies aimed at reducing drug prices.", False),
    ("Which sectors showed significant job growth in the latest economic report?", "The economic report indicates a downturn in the technology and healthcare sectors.", False),
    ("What was the Supreme Court's ruling on the new voting regulations?", "The Supreme Court upheld the new voting regulations as constitutional.", False)
]


# Generate entailment predictions and scores
for premise, hypothesis, truth in entailment_pairs:
    print(f"Premise: {premise}\nHypothesis: {hypothesis}\nTrue Entailment: {truth} \n")

    prompt = f"Premise: {premise}\nHypothesis: {hypothesis}\nDoes the hypothesis follow from the premise?\nAnswer:"
    input_ids = tokenizer(prompt, return_tensors='pt').input_ids
    scores = get_score(model, tokenizer, input_ids)
    print(f'Probability that the hypothesis follows from the premise: {scores[0].item()}')

    prompt = f"Premise: {premise}\nHypothesis: {hypothesis}\nIs the hypothesis a logical conclusion based on the premise?\nAnswer:"
    input_ids = tokenizer(prompt, return_tensors='pt').input_ids
    scores = get_score(model, tokenizer, input_ids)
    print(f'Probability that the hypothesis is a logical conclusion based on the premise: {scores[0].item()}')

    prompt = f"Sentence1: {premise}\nSentence2: {hypothesis}\nDo these sentences discuss the same event?\nAnswer:"
    input_ids = tokenizer(prompt, return_tensors='pt').input_ids
    scores = get_score(model, tokenizer, input_ids)
    print(f'Probability that sentence2 discusses the same event as sentence1: {scores[0].item()}')

    prompt_concrete = f"Premise: {premise}\nHypothesis: {hypothesis}\nAre the premise and hypothesis discussing the same topic?\nAnswer:"
    input_ids = tokenizer(prompt_concrete, return_tensors='pt').input_ids
    scores_concrete = get_score(model, tokenizer, input_ids)
    print(f'Probability the premise and hypothesis are about the same topic: {scores_concrete[0].item()} \n')

# Generate scores for question-answer pairs
print("Complex and Subjective Question-Answer Pairs:")
for question, answer, truth in qa_pairs:
    print(f"Question: {question}\nAnswer: {answer}\nTrue QA Pair: {truth}")

    # Check if it is a question-answer pair
    prompt_qa = f"Question: {question}\nAnswer: {answer}\nIs this a valid question-answer pair?\nAnswer:"
    input_ids = tokenizer(prompt_qa, return_tensors='pt').input_ids
    scores_qa = get_score(model, tokenizer, input_ids)
    print(f'Probability that this is a valid question-answer pair: {scores_qa[0].item()} \n')

    # Check if the answer addresses the question
    prompt = f"Question: {question}\nAnswer: {answer}\nDoes this answer correctly respond to the question?\nAnswer:"
    input_ids = tokenizer(prompt, return_tensors='pt').input_ids
    scores_qa = get_score(model, tokenizer, input_ids)
    print(f'Probability that this answer correctly responds to the question: {scores_qa[0].item()} \n')


Premise: In his speech, the President emphasized the need for bipartisan cooperation to pass significant climate change legislation.
Hypothesis: The President highlighted the importance of working together across party lines to address climate change.
True Entailment: True 

Probability that the hypothesis follows from the premise: 0.9891353845596313
Probability that the hypothesis is a logical conclusion based on the premise: 0.9884614944458008
Probability that sentence2 discusses the same event as sentence1: 0.9778763055801392
Probability the premise and hypothesis are about the same topic: 0.9860095977783203 

Premise: The latest economic report shows a significant increase in job growth, especially in the technology and healthcare sectors.
Hypothesis: Job growth has risen notably, particularly in technology and healthcare according to the recent economic report.
True Entailment: True 

Probability that the hypothesis follows from the premise: 0.9973511695861816
Probability that the

In [41]:
import torch
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM

# Load the tokenizer and model
tokenizer = AutoTokenizer.from_pretrained("google/flan-t5-base")
model = AutoModelForSeq2SeqLM.from_pretrained("google/flan-t5-base")

def get_score(model, tokenizer, input_ids):
    pos_ids = tokenizer('Yes', return_tensors='pt').input_ids
    neg_ids = tokenizer('No', return_tensors='pt').input_ids
    pos_id = pos_ids[0, 0]
    neg_id = neg_ids[0, 0]

    logits = model(input_ids, decoder_input_ids=torch.zeros((input_ids.size(0), 1), dtype=torch.long).to(model.device)).logits
    pos_logits = logits[:, 0, pos_id]
    neg_logits = logits[:, 0, neg_id]
    posneg_logits = torch.cat([pos_logits.unsqueeze(-1), neg_logits.unsqueeze(-1)], dim=1)
    scores = torch.nn.functional.softmax(posneg_logits, dim=1)[:, 0]

    return scores

# Complex and subjective entailment pairs
entailment_pairs = [
    ("The new economic policy introduced by the government aims to stabilize the market. Experts believe it will have long-term benefits.", "Experts think the government's new economic policy will benefit the market in the long run.", True),
    ("The actor gave an emotional speech at the awards ceremony, thanking his family and fans for their support.", "During the awards ceremony, the actor expressed gratitude towards his family and fans.", True),
    ("The new study suggests a strong correlation between regular exercise and mental health improvements. It also points out the benefits of outdoor activities.", "The study indicates that regular exercise, especially outdoor activities, improves mental health.", True),
    ("The new economic policy introduced by the government aims to stabilize the market. Experts believe it will have long-term benefits.", "The government's economic policy is expected to have short-term disadvantages.", False),
    ("The actor gave an emotional speech at the awards ceremony, thanking his family and fans for their support.", "The actor criticized his family and fans during the awards ceremony.", False),
    ("The new study suggests a strong correlation between regular exercise and mental health improvements. It also points out the benefits of outdoor activities.", "The study concludes that indoor activities have no impact on mental health.", False)
]

# Complex and subjective question-answer pairs
qa_pairs = [
    ("What is the expected impact of the government's new economic policy?", "Experts believe it will have long-term benefits.", True),
    ("Who did the actor thank in his speech at the awards ceremony?", "He thanked his family and fans for their support.", True),
    ("What does the new study suggest about regular exercise?", "It suggests a strong correlation between regular exercise and mental health improvements.", True),
    ("What is the expected impact of the government's new economic policy?", "Experts believe it will have short-term disadvantages.", False),
    ("Who did the actor thank in his speech at the awards ceremony?", "He criticized his family and fans for their support.", False),
    ("What does the new study suggest about regular exercise?", "It concludes that indoor activities have no impact on mental health.", False)
]

# Generate scores for entailment pairs
print("Complex and Subjective Entailment Pairs:")
for premise, hypothesis, truth in entailment_pairs:
    print(f"Premise: {premise}\nHypothesis: {hypothesis}\nTrue Entailment: {truth} \n")

    prompt = f"Premise: {premise}\nHypothesis: {hypothesis}\nDoes the hypothesis follow from the premise?\nAnswer:"
    input_ids = tokenizer(prompt, return_tensors='pt').input_ids
    scores = get_score(model, tokenizer, input_ids)
    print(f'Probability that the hypothesis follows from the premise: {scores[0].item()}')

    prompt = f"Premise: {premise}\nHypothesis: {hypothesis}\nIs the hypothesis a logical conclusion based on the premise?\nAnswer:"
    input_ids = tokenizer(prompt, return_tensors='pt').input_ids
    scores = get_score(model, tokenizer, input_ids)
    print(f'Probability that the hypothesis is a logical conclusion based on the premise: {scores[0].item()}')

    prompt = f"Sentence1: {premise}\nSentence2: {hypothesis}\nDo these sentences discuss the same event?\nAnswer:"
    input_ids = tokenizer(prompt, return_tensors='pt').input_ids
    scores = get_score(model, tokenizer, input_ids)
    print(f'Probability that sentence2 discusses the same event as sentence1: {scores[0].item()}')

    prompt_concrete = f"Premise: {premise}\nHypothesis: {hypothesis}\nAre the premise and hypothesis discussing the same topic?\nAnswer:"
    input_ids = tokenizer(prompt_concrete, return_tensors='pt').input_ids
    scores_concrete = get_score(model, tokenizer, input_ids)
    print(f'Probability the premise and hypothesis are about the same topic: {scores_concrete[0].item()} \n')

# Generate scores for question-answer pairs
print("Complex and Subjective Question-Answer Pairs:")
for question, answer, truth in qa_pairs:
    print(f"Question: {question}\nAnswer: {answer}\nTrue QA Pair: {truth}")

    # Check if it is a question-answer pair
    prompt_qa = f"Question: {question}\nAnswer: {answer}\nIs this a valid question-answer pair?\nAnswer:"
    input_ids = tokenizer(prompt_qa, return_tensors='pt').input_ids
    scores_qa = get_score(model, tokenizer, input_ids)
    print(f'Probability that this is a valid question-answer pair: {scores_qa[0].item()} \n')

    # Check if the answer addresses the question
    prompt = f"Question: {question}\nAnswer: {answer}\nDoes this answer correctly respond to the question?\nAnswer:"
    input_ids = tokenizer(prompt, return_tensors='pt').input_ids
    scores_qa = get_score(model, tokenizer, input_ids)
    print(f'Probability that this answer correctly responds to the question: {scores_qa[0].item()} \n')


Complex and Subjective Entailment Pairs:
Premise: The new economic policy introduced by the government aims to stabilize the market. Experts believe it will have long-term benefits.
Hypothesis: Experts think the government's new economic policy will benefit the market in the long run.
True Entailment: True 

Probability that the hypothesis follows from the premise: 0.9904779195785522
Probability that the hypothesis is a logical conclusion based on the premise: 0.9885774850845337
Probability that sentence2 discusses the same event as sentence1: 0.9891749620437622
Probability the premise and hypothesis are about the same topic: 0.9898779392242432 

Premise: The actor gave an emotional speech at the awards ceremony, thanking his family and fans for their support.
Hypothesis: During the awards ceremony, the actor expressed gratitude towards his family and fans.
True Entailment: True 

Probability that the hypothesis follows from the premise: 0.9933193922042847
Probability that the hypothes