In [None]:
from transformers import BertForQuestionAnswering, BertTokenizer
import torch
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np


In [None]:
model_name = "bert-large-uncased-whole-word-masking-finetuned-squad"
model = BertForQuestionAnswering.from_pretrained(model_name)
tokenizer = BertTokenizer.from_pretrained(model_name)

In [None]:
question = "When was the first DVD released?"
text = "The first DVD was released on March 24th, 1997, and quickly gained popularity worldwide."


In [None]:
encodings = tokenizer.encode_plus(question, text, return_tensors="pt")
input_ids = encodings["input_ids"]
token_type_ids = encodings["token_type_ids"]

# Get tokens
tokens = tokenizer.convert_ids_to_tokens(input_ids[0])

# Pass through model
outputs = model(input_ids, token_type_ids=token_type_ids)

# Get start and end indexes
start_idx = torch.argmax(outputs.start_logits)
end_idx = torch.argmax(outputs.end_logits)

# Get answer
answer = " ".join(tokens[start_idx:end_idx+1])
print("Predicted Answer:", tokenizer.convert_tokens_to_string(tokens[start_idx:end_idx+1]))


In [None]:
# Visualization of probabilities
start_scores = outputs.start_logits.detach().numpy().flatten()
end_scores = outputs.end_logits.detach().numpy().flatten()
token_labels = [f"{token}_{i}" for i, token in enumerate(tokens)]

plt.figure(figsize=(15, 5))
sns.barplot(x=token_labels, y=start_scores)
plt.xticks(rotation=90)
plt.title("Start Token Scores")
plt.grid(axis="y")
plt.show()


In [None]:
plt.figure(figsize=(15, 5))
sns.barplot(x=token_labels, y=end_scores)
plt.xticks(rotation=90)
plt.title("End Token Scores")
plt.grid(axis="y")
plt.show()

In [None]:
def FAQ_bot(question):
    context = """Sunset Motors opened in 2005 and is located in Crestwood.
    The dealership covers ten acres and sells Ford, Toyota, Honda, Chevrolet, and BMW cars."""
    
    input_ids = tokenizer.encode(question, context)
    tokens = tokenizer.convert_ids_to_tokens(input_ids)
    sep_index = input_ids.index(tokenizer.sep_token_id)
    num_seg_a = sep_index + 1
    num_seg_b = len(input_ids) - num_seg_a
    segment_ids = [0]*num_seg_a + [1]*num_seg_b

    outputs = model(torch.tensor([input_ids]), token_type_ids=torch.tensor([segment_ids]))
    start_idx = torch.argmax(outputs.start_logits)
    end_idx = torch.argmax(outputs.end_logits)

    if start_idx <= end_idx:
        answer = tokens[start_idx:end_idx+1]
        corrected_answer = " ".join([word.replace("##", "") for word in answer])
        return corrected_answer
    else:
        return "Sorry, I couldn't find the answer. Please ask another question."

In [None]:
print("Q: Where is the dealership located?")
print("A:", FAQ_bot("Where is the dealership located?"))
print("Q: What make of cars are available?")
print("A:", FAQ_bot("What make of cars are available?"))
print("Q: How large is the dealership?")
print("A:", FAQ_bot("How large is the dealership?"))

In [None]:
from transformers import RobertaTokenizer, RobertaForQuestionAnswering

roberta_tokenizer = RobertaTokenizer.from_pretrained("roberta-base")
roberta_model = RobertaForQuestionAnswering.from_pretrained("roberta-base")


In [None]:
from transformers import DistilBertTokenizer, DistilBertForQuestionAnswering

distil_tokenizer = DistilBertTokenizer.from_pretrained("distilbert-base-uncased-distilled-squad")
distil_model = DistilBertForQuestionAnswering.from_pretrained("distilbert-base-uncased-distilled-squad")
