In [None]:
from transformers import AutoTokenizer, AutoModelForQuestionAnswering
from sentence_transformers import SentenceTransformer, util
import torch

# Load tokenizer and model for question answering
tokenizer = AutoTokenizer.from_pretrained("bert-large-uncased-whole-word-masking-finetuned-squad")
model = AutoModelForQuestionAnswering.from_pretrained("bert-large-uncased-whole-word-masking-finetuned-squad")

# Load Sentence-BERT model for dense retrieval
retriever = SentenceTransformer('all-MiniLM-L6-v2')

# Define the text corpus for retrieval
corpus = [
    "Transformers provides general-purpose architectures (BERT, GPT-2, RoBERTa, XLM, DistilBert, XLNet…) for Natural Language Understanding (NLU) and Natural Language Generation (NLG).",
    "Transformers has over 32+ pretrained models in 100+ languages.",
    "Transformers allows interoperability between TensorFlow 2.0 and PyTorch.",
]

# Encode the corpus once using the retriever model
corpus_embeddings = retriever.encode(corpus, convert_to_tensor=True)

# List of questions
questions = [
    "How many pretrained models are available in Transformers?",
    "can we use transformers for sentiment analysis?",
    "Transformers provides interoperability between which frameworks?",
    "What is the size of the Transformers models?",
    "What is the capital of Czech Republic?",
]

# Iterate over each question
for question in questions:
    # Generate the embedding for the question
    question_embedding = retriever.encode(question, convert_to_tensor=True)

    # Compute cosine similarities between the question and the corpus
    similarities = util.pytorch_cos_sim(question_embedding, corpus_embeddings)[0]

    # Find the index of the most relevant passage
    most_relevant_idx = torch.argmax(similarities)
    passage = corpus[most_relevant_idx]

    # Print the similarity value for debugging
    print(f"Cosine Similarity for Question: '{question}' with Passage: '{passage}' is {similarities[most_relevant_idx]}")

    # If the cosine similarity is below a threshold, consider it irrelevant
    if similarities[most_relevant_idx] < 0.3:  # Lowered threshold
        print(f"Question: {question}")
        print("Sorry, I couldn't find a relevant passage.\n")
        continue

    # Tokenize input question and the retrieved passage
    inputs = tokenizer.encode_plus(question, passage, add_special_tokens=True, return_tensors="pt")

    # Get the input_ids
    input_ids = inputs["input_ids"].tolist()[0]

    # Get the start and end logits from the model
    outputs = model(**inputs)
    start_logits = outputs.start_logits
    end_logits = outputs.end_logits

    # Get the positions with the highest scores for start and end
    start_index = torch.argmax(start_logits)
    end_index = torch.argmax(end_logits) + 1  # Add 1 because we are dealing with inclusive end indexing

    # If the start and end index are the same, it suggests that no valid answer was found
    if start_index == end_index or similarities[most_relevant_idx] < 0.3:
        print(f"Question: {question}")
        print("Sorry, I couldn't find the answer.\n")
    else:
        # Convert the token ids to string to extract the answer
        answer = tokenizer.convert_tokens_to_string(tokenizer.convert_ids_to_tokens(input_ids[start_index:end_index]))

        # Output the question, retrieved passage, and predicted answer
        print(f"Question: {question}")
        print(f"Retrieved Passage: {passage}")
        print(f"Answer: {answer}\n")


Some weights of the model checkpoint at bert-large-uncased-whole-word-masking-finetuned-squad were not used when initializing BertForQuestionAnswering: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight']
- This IS expected if you are initializing BertForQuestionAnswering from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForQuestionAnswering from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Cosine Similarity for Question: 'How many pretrained models are available in Transformers?' with Passage: 'Transformers has over 32+ pretrained models in 100+ languages.' is 0.7704507112503052
Question: How many pretrained models are available in Transformers?
Retrieved Passage: Transformers has over 32+ pretrained models in 100+ languages.
Answer: over 32

Cosine Similarity for Question: 'can we use transformers for sentiment analysis?' with Passage: 'Transformers provides general-purpose architectures (BERT, GPT-2, RoBERTa, XLM, DistilBert, XLNet…) for Natural Language Understanding (NLU) and Natural Language Generation (NLG).' is 0.5176990628242493
Question: can we use transformers for sentiment analysis?
Retrieved Passage: Transformers provides general-purpose architectures (BERT, GPT-2, RoBERTa, XLM, DistilBert, XLNet…) for Natural Language Understanding (NLU) and Natural Language Generation (NLG).
Answer: transformers provides general - purpose architectures ( bert, gpt - 2, robe