In [1]:
from datasets import load_dataset
from transformers import T5ForConditionalGeneration, T5TokenizerFast
import evaluate

In [2]:
ds = load_dataset("noor-zalouk/wiki-math-articles")
ds

DatasetDict({
    train: Dataset({
        features: ['title', 'sub_title', 'text', 'category'],
        num_rows: 61954
    })
    valid: Dataset({
        features: ['title', 'sub_title', 'text', 'category'],
        num_rows: 6884
    })
    rag: Dataset({
        features: ['title', 'sub_title', 'text', 'category'],
        num_rows: 143309
    })
})

In [3]:
def query(row):
    title = row["title"]
    sub_title = row["sub_title"]
    if not title:
        title = ""
    elif not sub_title:
        sub_title = ""
    else:
        pass

    row["query"] = f"EXPLAIN {sub_title} {title}"

    return row

def context(row):
    row['query'] = row['query'] + ' ' + row['text']
    return row

In [4]:
ds = ds.map(query)
ds['rag'] = ds['rag'].map(context)
ds

DatasetDict({
    train: Dataset({
        features: ['title', 'sub_title', 'text', 'category', 'query'],
        num_rows: 61954
    })
    valid: Dataset({
        features: ['title', 'sub_title', 'text', 'category', 'query'],
        num_rows: 6884
    })
    rag: Dataset({
        features: ['title', 'sub_title', 'text', 'category', 'query'],
        num_rows: 143309
    })
})

In [5]:
def generate(row):
    input_ids = tokenizer(row['query'], return_tensors="pt", padding="max_length", truncation=True).input_ids
    generated_ids = model.generate(
        input_ids,
        num_beams=4,
        repetition_penalty=2.0,
        no_repeat_ngram_size=3,         
        max_length=250,       
        early_stopping=True  
    )
    row['generation'] = tokenizer.decode(generated_ids[0], skip_special_tokens=True)
    
    return row

In [6]:
rouge = evaluate.load('rouge')
cps = {}

In [7]:
for cp in [2907, 3876]:
    model_path = f"./t5_explain_runs/exp4/checkpoint-{cp}"
    tokenizer = T5TokenizerFast.from_pretrained(model_path)
    model = T5ForConditionalGeneration.from_pretrained(model_path)

    ds_shuffle = ds.shuffle(seed=42)
    sample_size = 2000

    scores = {}
    for split in ['train', 'valid', 'rag']:
        ds_sample = ds_shuffle[split].select(range(sample_size))
        ds_sample = ds_sample.map(generate)

        result = rouge.compute(
            predictions=ds_sample['generation'],
            references=ds_sample['text'],
            use_stemmer=True
        )

        scores[split] = {rn: result[rn].item() for rn in ["rouge1", "rouge2", "rougeL", "rougeLsum"]}
    
    cps[cp] = scores
    

Map:   0%|          | 0/2000 [00:00<?, ? examples/s]

Map:   0%|          | 0/2000 [00:00<?, ? examples/s]

Map:   0%|          | 0/2000 [00:00<?, ? examples/s]

Map:   0%|          | 0/2000 [00:00<?, ? examples/s]

Map:   0%|          | 0/2000 [00:00<?, ? examples/s]

Map:   0%|          | 0/2000 [00:00<?, ? examples/s]

In [8]:
cps

{2907: {'train': {'rouge1': 0.14091725018622864,
   'rouge2': 0.03986693116928433,
   'rougeL': 0.11316536970084391,
   'rougeLsum': 0.11647618593571739},
  'valid': {'rouge1': 0.14161441870935584,
   'rouge2': 0.040939198120793246,
   'rougeL': 0.11341310919600264,
   'rougeLsum': 0.11674422290029361},
  'rag': {'rouge1': 0.5135945011638156,
   'rouge2': 0.4648456297342026,
   'rougeL': 0.4495653746956766,
   'rougeLsum': 0.46546503414429885}},
 3876: {'train': {'rouge1': 0.14396557070718213,
   'rouge2': 0.04016723243424131,
   'rougeL': 0.11534701228296274,
   'rougeLsum': 0.1186913046350499},
  'valid': {'rouge1': 0.1443174891695106,
   'rouge2': 0.04128512011147161,
   'rougeL': 0.11493799608819302,
   'rougeLsum': 0.1187569628365387},
  'rag': {'rouge1': 0.5165961514511581,
   'rouge2': 0.4669226389590829,
   'rougeL': 0.4501568353616199,
   'rougeLsum': 0.46667859615805585}}}

In [11]:
import pickle 
with open('cps.pkl', 'wb') as file:
    pickle.dump(cps, file)