In [2]:
import torch
from transformers import T5ForConditionalGeneration, T5Tokenizer

In [5]:
# valhalla/t5-small-qa-qg-hl

# Load the tokenizer and model
model_name = "valhalla/t5-small-qa-qg-hl"
tokenizer = T5Tokenizer.from_pretrained(model_name)
model = T5ForConditionalGeneration.from_pretrained(model_name)

# Define the input text
question = "Who was the leader of the expedition?"
context = "The expedition was led by Captain James Cook, a British explorer. provide answer in one line"

# The model expects a specific input format for question answering
input_text = f"question: {question} context: {context}"

# Tokenize the input text
input_ids = tokenizer(input_text, return_tensors="pt").input_ids

# Generate the answer
# The `max_length` and `num_beams` parameters can be tuned for better results.
outputs = model.generate(input_ids, max_length=50, num_beams=4, early_stopping=True)

# Decode the generated tokens back into a string
answer = tokenizer.decode(outputs[0], skip_special_tokens=True)

print(f"Input: {input_text}")
print(f"Generated Answer: {answer}")
# Expected output: 'Titan'

# You can use a similar approach for question generation, but the input format is different.
# It requires highlighting the answer span with '<hl>' tokens.

Input: question: Who was the leader of the expedition? context: The expedition was led by Captain James Cook, a British explorer. provide answer in one line
Generated Answer: Captain James Cook


In [8]:
# iarfmoose/t5-base-question-generator
import torch
from transformers import T5ForConditionalGeneration, T5Tokenizer

# Load model and tokenizer
model_name = "iarfmoose/t5-base-question-generator"
tokenizer = T5Tokenizer.from_pretrained(model_name)
model = T5ForConditionalGeneration.from_pretrained(model_name)

# Define the answer and context
answer = "The Eiffel Tower"
context = "The Eiffel Tower is a wrought-iron lattice tower on the Champ de Mars in Paris, France."

# The model requires a specific input format
input_text = f"<answer> {answer} <context> {context}"

# Tokenize the input text
input_ids = tokenizer.encode(input_text, return_tensors="pt")

# Generate the question.
# The `generate` method is key for text-to-text models.
output_ids = model.generate(
    input_ids, 
    max_length=50, 
    num_beams=4,
    early_stopping=True
)

# Decode the generated tokens
generated_question = tokenizer.decode(output_ids[0], skip_special_tokens=True)

print(f"Input: {input_text}")
print(f"Generated Question: {generated_question}")

Input: <answer> The Eiffel Tower <context> The Eiffel Tower is a wrought-iron lattice tower on the Champ de Mars in Paris, France.
Generated Question: The Eiffel Tower is a wrought-iron lattice tower on the Champ de Mars in Paris, France.


In [9]:
# deepset/roberta-base-squad2
import torch
from transformers import AutoModelForQuestionAnswering, AutoTokenizer

# Load model and tokenizer
model_name = "deepset/roberta-base-squad2"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForQuestionAnswering.from_pretrained(model_name)

# Define the question and context
question = "Who was the leader of the expedition?"
context = "The expedition was led by Captain James Cook, a British explorer."

# Tokenize the input. We combine the question and context with special tokens.
inputs = tokenizer(question, context, return_tensors="pt")

# Get the model's output
with torch.no_grad():
    outputs = model(**inputs)

# The model's output contains logits for the start and end of the answer span
answer_start_scores = outputs.start_logits
answer_end_scores = outputs.end_logits

# Find the index with the highest score for both start and end
answer_start = torch.argmax(answer_start_scores)
answer_end = torch.argmax(answer_end_scores) + 1  # Add 1 to include the end token

# Get the tokens for the answer span
input_ids = inputs["input_ids"].squeeze(0)
answer_tokens = input_ids[answer_start:answer_end]

# Decode the tokens back to a string
answer = tokenizer.decode(answer_tokens)

print(f"Question: {question}")
print(f"Context: {context}")
print(f"Predicted Answer: {answer}")

tokenizer_config.json:   0%|          | 0.00/79.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/571 [00:00<?, ?B/s]

vocab.json: 0.00B [00:00, ?B/s]

merges.txt: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/772 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/496M [00:00<?, ?B/s]

Question: Who was the leader of the expedition?
Context: The expedition was led by Captain James Cook, a British explorer.
Predicted Answer:  Captain James Cook


In [10]:
# google/flan-t5-small / flan-t5-base
import torch
from transformers import T5ForConditionalGeneration, T5Tokenizer

# Load model and tokenizer
model_name = "google/flan-t5-small"
tokenizer = T5Tokenizer.from_pretrained(model_name)
model = T5ForConditionalGeneration.from_pretrained(model_name)

# Define the task and input text. The instruction is part of the input.
input_text = "Summarize the following: The sun is the star at the center of the Solar System. It is a nearly perfect ball of hot plasma, heated to incandescence by nuclear fusion reactions in its core."

# Tokenize the input
input_ids = tokenizer(input_text, return_tensors="pt").input_ids

# Generate the output
output_ids = model.generate(input_ids)

# Decode the generated text
summary = tokenizer.decode(output_ids[0], skip_special_tokens=True)

print(f"Input: {input_text}")
print(f"Generated Summary: {summary}")

tokenizer_config.json: 0.00B [00:00, ?B/s]

spiece.model:   0%|          | 0.00/792k [00:00<?, ?B/s]

special_tokens_map.json: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

config.json: 0.00B [00:00, ?B/s]

model.safetensors:   0%|          | 0.00/308M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/147 [00:00<?, ?B/s]

Input: Summarize the following: The sun is the star at the center of the Solar System. It is a nearly perfect ball of hot plasma, heated to incandescence by nuclear fusion reactions in its core.
Generated Summary: Find the sun. Find the helium. Find the helium. Find the heli


In [11]:
# distilbert-base-uncased-distilled-squad
import torch
from transformers import DistilBertForQuestionAnswering, DistilBertTokenizer

# Load model and tokenizer
model_name = "distilbert-base-uncased-distilled-squad"
tokenizer = DistilBertTokenizer.from_pretrained(model_name)
model = DistilBertForQuestionAnswering.from_pretrained(model_name)

# Define the question and context
question = "Where does the name 'squad' come from?"
context = "The Stanford Question Answering Dataset (SQuAD) is a reading comprehension dataset, consisting of questions posed by crowdworkers on a set of Wikipedia articles."

# Tokenize the inputs. DistilBERT also uses special tokens to separate question and context.
inputs = tokenizer(question, context, return_tensors="pt")

# Get the model's output
with torch.no_grad():
    outputs = model(**inputs)

# The model returns start and end logits
answer_start_scores = outputs.start_logits
answer_end_scores = outputs.end_logits

# Find the indices with the highest scores
answer_start = torch.argmax(answer_start_scores)
answer_end = torch.argmax(answer_end_scores) + 1  # Add 1 to include the end token

# Extract the answer tokens and decode
input_ids = inputs["input_ids"].squeeze(0)
answer_tokens = input_ids[answer_start:answer_end]
answer = tokenizer.decode(answer_tokens)

print(f"Question: {question}")
print(f"Context: {context}")
print(f"Predicted Answer: {answer}")

tokenizer_config.json:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

config.json:   0%|          | 0.00/451 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/265M [00:00<?, ?B/s]

Question: Where does the name 'squad' come from?
Context: The Stanford Question Answering Dataset (SQuAD) is a reading comprehension dataset, consisting of questions posed by crowdworkers on a set of Wikipedia articles.
Predicted Answer: stanford question answering dataset
