In [None]:
! pip install transformers safetensors sentencepiece

Looking in indexes: http://mirrors.aliyun.com/pypi/simple


In [6]:
from transformers import T5ForConditionalGeneration, T5Tokenizer
from safetensors.torch import load_model
import torch

# base
# model_path = 'base/checkpoint-6111'
# add explain
# model_path = 't5-QCE-A/checkpoint-630'
# add explain freeze most parameterer
# model_path = 't5-QCE-A-freeze/checkpoint-630'
# two-stage 
# model_path = 't5-two-stage-freeze-2/checkpoint-630'
tokenizer = T5Tokenizer.from_pretrained(model_path)

model = T5ForConditionalGeneration.from_pretrained(model_path)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)


Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


T5ForConditionalGeneration(
  (shared): Embedding(32128, 512)
  (encoder): T5Stack(
    (embed_tokens): Embedding(32128, 512)
    (block): ModuleList(
      (0): T5Block(
        (layer): ModuleList(
          (0): T5LayerSelfAttention(
            (SelfAttention): T5Attention(
              (q): Linear(in_features=512, out_features=512, bias=False)
              (k): Linear(in_features=512, out_features=512, bias=False)
              (v): Linear(in_features=512, out_features=512, bias=False)
              (o): Linear(in_features=512, out_features=512, bias=False)
              (relative_attention_bias): Embedding(32, 8)
            )
            (layer_norm): T5LayerNorm()
            (dropout): Dropout(p=0.1, inplace=False)
          )
          (1): T5LayerFF(
            (DenseReluDense): T5DenseActDense(
              (wi): Linear(in_features=512, out_features=2048, bias=False)
              (wo): Linear(in_features=2048, out_features=512, bias=False)
              (dropout): Drop

In [7]:
from transformers.data.processors.squad import SquadV2Processor

# this processor loads the SQuAD2.0 dev set examples
processor = SquadV2Processor()
examples = processor.get_dev_examples("./", filename="../MISC/Dataset/dev-v2.0.json")
print(len(examples))

# generate some maps to help us identify examples of interest
qid_to_example_index = {example.qas_id: i for i, example in enumerate(examples)}
qid_to_has_answer = {example.qas_id: bool(example.answers) for example in examples}
answer_qids = [qas_id for qas_id, has_answer in qid_to_has_answer.items() if has_answer]
no_answer_qids = [qas_id for qas_id, has_answer in qid_to_has_answer.items() if not has_answer]


100%|██████████| 35/35 [00:04<00:00,  8.02it/s]


11873


In [8]:
def get_prediction(qid):
    # given a question id (qas_id or qid), load the example, get the model outputs and generate an answer
    question = examples[qid_to_example_index[qid]].question_text
    context = examples[qid_to_example_index[qid]].context_text

    input_text = f"question: {question} context: {context}"
    input_ids = tokenizer(input_text, return_tensors='pt').input_ids.to(device)
    # inputs_ids = tokenizer.encode(question, context, return_tensors='pt')

    outputs = model.generate(input_ids)
    predicted_answer = tokenizer.decode(outputs[0], skip_special_tokens=True)
    
    return predicted_answer

In [9]:

# these functions are heavily influenced by the HF squad_metrics.py script
def normalize_text(s):
    """Removing articles and punctuation, and standardizing whitespace are all typical text processing steps."""
    import string, re

    def remove_articles(text):
        regex = re.compile(r"\b(a|an|the)\b", re.UNICODE)
        return re.sub(regex, " ", text)

    def white_space_fix(text):
        return " ".join(text.split())

    def remove_punc(text):
        exclude = set(string.punctuation)
        return "".join(ch for ch in text if ch not in exclude)

    def lower(text):
        return text.lower()

    return white_space_fix(remove_articles(remove_punc(lower(s))))

def compute_exact_match(prediction, truth):
    return int(normalize_text(prediction) == normalize_text(truth))

def compute_f1(prediction, truth):
    pred_tokens = normalize_text(prediction).split()
    truth_tokens = normalize_text(truth).split()
    
    # if either the prediction or the truth is no-answer then f1 = 1 if they agree, 0 otherwise
    if len(pred_tokens) == 0 or len(truth_tokens) == 0:
        return int(pred_tokens == truth_tokens)
    
    common_tokens = set(pred_tokens) & set(truth_tokens)
    
    # if there are no common tokens then f1 = 0
    if len(common_tokens) == 0:
        return 0
    
    prec = len(common_tokens) / len(pred_tokens)
    rec = len(common_tokens) / len(truth_tokens)
    
    return 2 * (prec * rec) / (prec + rec)

def get_gold_answers(example):
    """helper function that retrieves all possible true answers from a squad2.0 example"""
    
    gold_answers = [answer["text"] for answer in example.answers if answer["text"]]

    # if gold_answers doesn't exist it's because this is a negative example - 
    # the only correct answer is an empty string
    if not gold_answers:
        gold_answers = [""]
        
    return gold_answers

In [10]:
from tqdm import tqdm

def calculate_metrics(qids):
    total_em = 0
    total_f1 = 0
    
    for qid in tqdm(qids, desc="Evaluating"):
        predicted_answer = get_prediction(qid)
        gold_answers = examples[qid_to_example_index[qid]].answers
        
        em_score = max([int(predicted_answer == gold_answer["text"]) for gold_answer in gold_answers]) if gold_answers else int(predicted_answer == "")
        f1_score_temp = max([compute_f1(gold_answer["text"], predicted_answer) for gold_answer in gold_answers]) if gold_answers else 0
        
        total_em += em_score
        total_f1 += f1_score_temp
    
    avg_em = total_em / len(qids)
    avg_f1 = total_f1 / len(qids)
    
    return avg_em, avg_f1

# Calculate EM and F1 scores for samples answering the question
em_with_answers, f1_with_answers = calculate_metrics(answer_qids)
print(f"Exact Match (with answers): {em_with_answers:.4f}")
print(f"F1 Score (with answers): {f1_with_answers:.4f}")





Evaluating:  27%|██▋       | 1588/5928 [00:41<02:35, 27.92it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (640 > 512). Running this sequence through the model will result in indexing errors
Evaluating: 100%|██████████| 5928/5928 [02:49<00:00, 34.88it/s]

Exact Match (with answers): 0.7213
F1 Score (with answers): 0.8351





In [11]:
import torch


device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# Define problem and context
context = "The Normans (Norman: Nourmands; French: Normands; Latin: Normanni) were the people who in the 10th and 11th centuries gave their name to Normandy, a region in France. They were descended from Norse (\"Norman\" comes from \"Norseman\") raiders and pirates from Denmark, Iceland and Norway who, under their leader Rollo, agreed to swear fealty to King Charles III of West Francia. Through generations of assimilation and mixing with the native Frankish and Roman-Gaulish populations, their descendants would gradually merge with the Carolingian-based cultures of West Francia. The distinct cultural and ethnic identity of the Normans emerged initially in the first half of the 10th century, and it continued to evolve over the succeeding centuries."

question = "What century did the Normans first gain their separate identity?"
truth = "10th"
# Encode the input text using a tokenizer and make sure the input data is also on the correct device
input_text = f"question: {question} context: {context}"
input_ids = tokenizer(input_text, return_tensors='pt').input_ids.to(device)

# Generate answer
outputs = model.generate(input_ids, max_length=40)

# Convert the output token IDs to text
answer = tokenizer.decode(outputs[0], skip_special_tokens=True)

print("Generated Answer:", answer)
print("truth:", truth)

Generated Answer: 10th
truth: 10th


In [12]:
question = "Who was the Norse leader?"
truth = "Rollo"

input_text = f"question: {question} context: {context}"
input_ids = tokenizer(input_text, return_tensors='pt').input_ids.to(device)


outputs = model.generate(input_ids, max_length=40)


answer = tokenizer.decode(outputs[0], skip_special_tokens=True)

print("Generated Answer:", answer)
print("truth:", truth)

Generated Answer: Rollo
truth: Rollo
