# Questions Generator

In [4]:
!pip install -r requirements.txt

Looking in indexes: https://download.pytorch.org/whl/cu121


In [6]:
import torch
from datasets import load_dataset
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, Seq2SeqTrainingArguments, Seq2SeqTrainer
# import BartForConditionalGeneration
from transformers import BartTokenizer, BartForConditionalGeneration

# make sure to include cuda
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)

model = BartForConditionalGeneration.from_pretrained('facebook/bart-base')
tokenizer = AutoTokenizer.from_pretrained('facebook/bart-base')

cpu


In [11]:
data = load_dataset("allenai/sciq")
train_data = data['train']
eval_data = data['test']
test_data = data['validation']

In [4]:
max_input = 512
max_target = 128
batch_size = 2

In [24]:
# dataset has:
# question, distractor3, distractor1, distractor2, correct_answer, support
def pre_process_data(data):
    # tokenize the data
    inputs = tokenizer(data['support'], padding="max_length", truncation=True, max_length=max_input, return_tensors="pt")
    targets = tokenizer(data['question'], padding="max_length", truncation=True, max_length=max_target, return_tensors="pt")
    return {"input_ids": inputs.input_ids, "attention_mask": inputs.attention_mask, "labels": targets.input_ids}

train_data = train_data.map(pre_process_data, batched=True).shuffle(seed=42).select(range(1000))
eval_data = eval_data.map(pre_process_data, batched=True).shuffle(seed=42).select(range(100))
test_data = test_data.map(pre_process_data, batched=True).shuffle(seed=42).select(range(100))

Map:   0%|          | 0/11679 [00:00<?, ? examples/s]

Map:   0%|          | 0/1000 [00:00<?, ? examples/s]

Map:   0%|          | 0/1000 [00:00<?, ? examples/s]

In [5]:
# empty memory
torch.cuda.empty_cache()

In [6]:
# TODO: add versioning

model.to(device)
args = Seq2SeqTrainingArguments(
    output_dir="./results",
    evaluation_strategy='epoch',
    learning_rate=2e-5,
    per_device_train_batch_size=batch_size,
    per_device_eval_batch_size= batch_size,
    gradient_accumulation_steps=2,
    weight_decay=0.01,
    save_total_limit=2,
    num_train_epochs=32,
    predict_with_generate=True,
    eval_accumulation_steps=32,
    fp16=True #available only with CUDA
)


trainer = Seq2SeqTrainer(
    model, 
    args,
    train_dataset=train_data,
    eval_dataset=eval_data,
    tokenizer=tokenizer,
)

trainer.train()
# lets save the model
OUT_DIR = "sciq"
model.save_pretrained(OUT_DIR)
tokenizer.save_pretrained(OUT_DIR)


dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)
  attn_output = torch.nn.functional.scaled_dot_product_attention(
                                                  
  3%|▎         | 250/8000 [01:21<42:56,  3.01it/s]

{'eval_loss': 0.23404675722122192, 'eval_runtime': 2.0107, 'eval_samples_per_second': 49.734, 'eval_steps_per_second': 24.867, 'epoch': 1.0}


Non-default generation parameters: {'early_stopping': True, 'num_beams': 4, 'no_repeat_ngram_size': 3, 'forced_bos_token_id': 0, 'forced_eos_token_id': 2}


{'loss': 1.4821, 'grad_norm': 1.7026487588882446, 'learning_rate': 1.876e-05, 'epoch': 2.0}


                                                  
  6%|▋         | 500/8000 [02:49<40:26,  3.09it/s]

{'eval_loss': 0.21688079833984375, 'eval_runtime': 1.9602, 'eval_samples_per_second': 51.016, 'eval_steps_per_second': 25.508, 'epoch': 2.0}


                                                    
  9%|▉         | 750/8000 [04:10<36:20,  3.33it/s]

{'eval_loss': 0.21881717443466187, 'eval_runtime': 1.7749, 'eval_samples_per_second': 56.34, 'eval_steps_per_second': 28.17, 'epoch': 3.0}


Non-default generation parameters: {'early_stopping': True, 'num_beams': 4, 'no_repeat_ngram_size': 3, 'forced_bos_token_id': 0, 'forced_eos_token_id': 2}


{'loss': 0.1481, 'grad_norm': 1.3198992013931274, 'learning_rate': 1.751e-05, 'epoch': 4.0}


                                                   
 12%|█▎        | 1000/8000 [05:32<37:08,  3.14it/s]

{'eval_loss': 0.22326040267944336, 'eval_runtime': 1.7272, 'eval_samples_per_second': 57.898, 'eval_steps_per_second': 28.949, 'epoch': 4.0}


                                                     
 16%|█▌        | 1250/8000 [06:52<33:52,  3.32it/s]

{'eval_loss': 0.23713943362236023, 'eval_runtime': 1.7609, 'eval_samples_per_second': 56.79, 'eval_steps_per_second': 28.395, 'epoch': 5.0}


Non-default generation parameters: {'early_stopping': True, 'num_beams': 4, 'no_repeat_ngram_size': 3, 'forced_bos_token_id': 0, 'forced_eos_token_id': 2}


{'loss': 0.1033, 'grad_norm': 0.8450888991355896, 'learning_rate': 1.626e-05, 'epoch': 6.0}


                                                   
 19%|█▉        | 1500/8000 [08:13<30:17,  3.58it/s]

{'eval_loss': 0.24260342121124268, 'eval_runtime': 1.6307, 'eval_samples_per_second': 61.322, 'eval_steps_per_second': 30.661, 'epoch': 6.0}


                                                     
 22%|██▏       | 1750/8000 [09:27<29:08,  3.57it/s]

{'eval_loss': 0.2525208294391632, 'eval_runtime': 1.6318, 'eval_samples_per_second': 61.283, 'eval_steps_per_second': 30.641, 'epoch': 7.0}


Non-default generation parameters: {'early_stopping': True, 'num_beams': 4, 'no_repeat_ngram_size': 3, 'forced_bos_token_id': 0, 'forced_eos_token_id': 2}


{'loss': 0.0738, 'grad_norm': 1.6719452142715454, 'learning_rate': 1.501e-05, 'epoch': 8.0}


                                                   
 25%|██▌       | 2000/8000 [10:42<27:47,  3.60it/s]

{'eval_loss': 0.26093629002571106, 'eval_runtime': 1.6162, 'eval_samples_per_second': 61.874, 'eval_steps_per_second': 30.937, 'epoch': 8.0}


                                                     
 28%|██▊       | 2250/8000 [11:58<29:43,  3.22it/s]

{'eval_loss': 0.27157336473464966, 'eval_runtime': 1.7333, 'eval_samples_per_second': 57.695, 'eval_steps_per_second': 28.847, 'epoch': 9.0}


Non-default generation parameters: {'early_stopping': True, 'num_beams': 4, 'no_repeat_ngram_size': 3, 'forced_bos_token_id': 0, 'forced_eos_token_id': 2}


{'loss': 0.0522, 'grad_norm': 0.9570529460906982, 'learning_rate': 1.37625e-05, 'epoch': 10.0}


                                                   
 31%|███▏      | 2500/8000 [13:22<30:34,  3.00it/s]

{'eval_loss': 0.2749168872833252, 'eval_runtime': 2.1786, 'eval_samples_per_second': 45.9, 'eval_steps_per_second': 22.95, 'epoch': 10.0}


                                                     
 34%|███▍      | 2750/8000 [14:44<27:15,  3.21it/s]

{'eval_loss': 0.2830430269241333, 'eval_runtime': 1.7495, 'eval_samples_per_second': 57.158, 'eval_steps_per_second': 28.579, 'epoch': 11.0}


Non-default generation parameters: {'early_stopping': True, 'num_beams': 4, 'no_repeat_ngram_size': 3, 'forced_bos_token_id': 0, 'forced_eos_token_id': 2}


{'loss': 0.0384, 'grad_norm': 1.583770513534546, 'learning_rate': 1.2512500000000001e-05, 'epoch': 12.0}


                                                   
 38%|███▊      | 3000/8000 [16:08<27:24,  3.04it/s]

{'eval_loss': 0.2863128185272217, 'eval_runtime': 1.7679, 'eval_samples_per_second': 56.563, 'eval_steps_per_second': 28.282, 'epoch': 12.0}


                                                     
 41%|████      | 3250/8000 [17:29<25:53,  3.06it/s]

{'eval_loss': 0.30117201805114746, 'eval_runtime': 1.9748, 'eval_samples_per_second': 50.638, 'eval_steps_per_second': 25.319, 'epoch': 13.0}


Non-default generation parameters: {'early_stopping': True, 'num_beams': 4, 'no_repeat_ngram_size': 3, 'forced_bos_token_id': 0, 'forced_eos_token_id': 2}


{'loss': 0.0285, 'grad_norm': 1.838121771812439, 'learning_rate': 1.1262500000000001e-05, 'epoch': 14.0}


                                                   
 44%|████▍     | 3500/8000 [18:54<23:06,  3.25it/s]

{'eval_loss': 0.2988032102584839, 'eval_runtime': 1.9138, 'eval_samples_per_second': 52.252, 'eval_steps_per_second': 26.126, 'epoch': 14.0}


                                                     
 47%|████▋     | 3750/8000 [20:12<19:45,  3.59it/s]

{'eval_loss': 0.3071799874305725, 'eval_runtime': 1.6178, 'eval_samples_per_second': 61.811, 'eval_steps_per_second': 30.905, 'epoch': 15.0}


Non-default generation parameters: {'early_stopping': True, 'num_beams': 4, 'no_repeat_ngram_size': 3, 'forced_bos_token_id': 0, 'forced_eos_token_id': 2}


{'loss': 0.0222, 'grad_norm': 1.8917053937911987, 'learning_rate': 1.0012500000000001e-05, 'epoch': 16.0}


                                                   
 50%|█████     | 4000/8000 [21:28<18:50,  3.54it/s]

{'eval_loss': 0.31092241406440735, 'eval_runtime': 1.6871, 'eval_samples_per_second': 59.274, 'eval_steps_per_second': 29.637, 'epoch': 16.0}


                                                     
 53%|█████▎    | 4250/8000 [22:40<17:02,  3.67it/s]

{'eval_loss': 0.31307610869407654, 'eval_runtime': 1.6017, 'eval_samples_per_second': 62.432, 'eval_steps_per_second': 31.216, 'epoch': 17.0}


Non-default generation parameters: {'early_stopping': True, 'num_beams': 4, 'no_repeat_ngram_size': 3, 'forced_bos_token_id': 0, 'forced_eos_token_id': 2}


{'loss': 0.0177, 'grad_norm': 0.8407136797904968, 'learning_rate': 8.762500000000001e-06, 'epoch': 18.0}


                                                   
 56%|█████▋    | 4500/8000 [23:55<16:38,  3.51it/s]

{'eval_loss': 0.3142528831958771, 'eval_runtime': 1.5696, 'eval_samples_per_second': 63.712, 'eval_steps_per_second': 31.856, 'epoch': 18.0}


                                                     
 59%|█████▉    | 4750/8000 [25:07<15:42,  3.45it/s]

{'eval_loss': 0.31991955637931824, 'eval_runtime': 1.6328, 'eval_samples_per_second': 61.245, 'eval_steps_per_second': 30.622, 'epoch': 19.0}


Non-default generation parameters: {'early_stopping': True, 'num_beams': 4, 'no_repeat_ngram_size': 3, 'forced_bos_token_id': 0, 'forced_eos_token_id': 2}


{'loss': 0.015, 'grad_norm': 0.9104005694389343, 'learning_rate': 7.5125000000000005e-06, 'epoch': 20.0}


                                                   
 62%|██████▎   | 5000/8000 [26:22<15:31,  3.22it/s]

{'eval_loss': 0.31863442063331604, 'eval_runtime': 1.9455, 'eval_samples_per_second': 51.402, 'eval_steps_per_second': 25.701, 'epoch': 20.0}


                                                     
 66%|██████▌   | 5250/8000 [27:36<12:49,  3.57it/s]

{'eval_loss': 0.3220687508583069, 'eval_runtime': 1.6168, 'eval_samples_per_second': 61.85, 'eval_steps_per_second': 30.925, 'epoch': 21.0}


Non-default generation parameters: {'early_stopping': True, 'num_beams': 4, 'no_repeat_ngram_size': 3, 'forced_bos_token_id': 0, 'forced_eos_token_id': 2}


{'loss': 0.0135, 'grad_norm': 1.3722989559173584, 'learning_rate': 6.262500000000001e-06, 'epoch': 22.0}


                                                   
 69%|██████▉   | 5500/8000 [28:50<11:42,  3.56it/s]

{'eval_loss': 0.3214161992073059, 'eval_runtime': 1.6299, 'eval_samples_per_second': 61.354, 'eval_steps_per_second': 30.677, 'epoch': 22.0}


                                                   
 72%|███████▏  | 5750/8000 [30:03<10:21,  3.62it/s]

{'eval_loss': 0.32765263319015503, 'eval_runtime': 1.6153, 'eval_samples_per_second': 61.907, 'eval_steps_per_second': 30.954, 'epoch': 23.0}


Non-default generation parameters: {'early_stopping': True, 'num_beams': 4, 'no_repeat_ngram_size': 3, 'forced_bos_token_id': 0, 'forced_eos_token_id': 2}


{'loss': 0.0121, 'grad_norm': 0.8747310638427734, 'learning_rate': 5.0125e-06, 'epoch': 24.0}


                                                   
 75%|███████▌  | 6000/8000 [31:19<09:15,  3.60it/s]

{'eval_loss': 0.32277077436447144, 'eval_runtime': 1.696, 'eval_samples_per_second': 58.963, 'eval_steps_per_second': 29.481, 'epoch': 24.0}


                                                   
 78%|███████▊  | 6250/8000 [32:31<08:22,  3.49it/s]

{'eval_loss': 0.3261013925075531, 'eval_runtime': 1.6974, 'eval_samples_per_second': 58.915, 'eval_steps_per_second': 29.457, 'epoch': 25.0}


Non-default generation parameters: {'early_stopping': True, 'num_beams': 4, 'no_repeat_ngram_size': 3, 'forced_bos_token_id': 0, 'forced_eos_token_id': 2}


{'loss': 0.0107, 'grad_norm': 0.3925653398036957, 'learning_rate': 3.7625e-06, 'epoch': 26.0}


                                                   
 81%|████████▏ | 6500/8000 [33:50<08:23,  2.98it/s]

{'eval_loss': 0.33099183440208435, 'eval_runtime': 1.8394, 'eval_samples_per_second': 54.365, 'eval_steps_per_second': 27.182, 'epoch': 26.0}


                                                   
 84%|████████▍ | 6750/8000 [35:15<06:57,  2.99it/s]

{'eval_loss': 0.3315889835357666, 'eval_runtime': 1.9, 'eval_samples_per_second': 52.632, 'eval_steps_per_second': 26.316, 'epoch': 27.0}


Non-default generation parameters: {'early_stopping': True, 'num_beams': 4, 'no_repeat_ngram_size': 3, 'forced_bos_token_id': 0, 'forced_eos_token_id': 2}


{'loss': 0.0101, 'grad_norm': 0.5510970950126648, 'learning_rate': 2.5125e-06, 'epoch': 28.0}


                                                   
 88%|████████▊ | 7000/8000 [36:43<05:32,  3.01it/s]

{'eval_loss': 0.33281752467155457, 'eval_runtime': 1.8233, 'eval_samples_per_second': 54.847, 'eval_steps_per_second': 27.423, 'epoch': 28.0}


                                                   
 91%|█████████ | 7250/8000 [38:07<04:04,  3.07it/s]

{'eval_loss': 0.3364467918872833, 'eval_runtime': 1.8872, 'eval_samples_per_second': 52.987, 'eval_steps_per_second': 26.494, 'epoch': 29.0}


Non-default generation parameters: {'early_stopping': True, 'num_beams': 4, 'no_repeat_ngram_size': 3, 'forced_bos_token_id': 0, 'forced_eos_token_id': 2}


{'loss': 0.0091, 'grad_norm': 0.24454258382320404, 'learning_rate': 1.2625000000000002e-06, 'epoch': 30.0}


                                                   
 94%|█████████▍| 7500/8000 [39:35<02:46,  3.00it/s]

{'eval_loss': 0.33470889925956726, 'eval_runtime': 1.8545, 'eval_samples_per_second': 53.924, 'eval_steps_per_second': 26.962, 'epoch': 30.0}


                                                   
 97%|█████████▋| 7750/8000 [40:59<01:23,  3.01it/s]

{'eval_loss': 0.3345467746257782, 'eval_runtime': 1.8695, 'eval_samples_per_second': 53.49, 'eval_steps_per_second': 26.745, 'epoch': 31.0}


Non-default generation parameters: {'early_stopping': True, 'num_beams': 4, 'no_repeat_ngram_size': 3, 'forced_bos_token_id': 0, 'forced_eos_token_id': 2}


{'loss': 0.0095, 'grad_norm': 0.2064138799905777, 'learning_rate': 1.5000000000000002e-08, 'epoch': 32.0}


                                                   
100%|██████████| 8000/8000 [42:28<00:00,  3.14it/s]
Non-default generation parameters: {'early_stopping': True, 'num_beams': 4, 'no_repeat_ngram_size': 3, 'forced_bos_token_id': 0, 'forced_eos_token_id': 2}


{'eval_loss': 0.3348207175731659, 'eval_runtime': 1.9055, 'eval_samples_per_second': 52.48, 'eval_steps_per_second': 26.24, 'epoch': 32.0}
{'train_runtime': 2548.7872, 'train_samples_per_second': 12.555, 'train_steps_per_second': 3.139, 'train_loss': 0.12788135969638825, 'epoch': 32.0}


('sciq_model\\tokenizer_config.json',
 'sciq_model\\special_tokens_map.json',
 'sciq_model\\vocab.json',
 'sciq_model\\merges.txt',
 'sciq_model\\added_tokens.json',
 'sciq_model\\tokenizer.json')

In [19]:
MODEL_FOLDER = "sciq"
model = BartForConditionalGeneration.from_pretrained(f"./{MODEL_FOLDER}")
tokenizer = BartTokenizer.from_pretrained(f"./{MODEL_FOLDER}")
# put them both on the same device
_ = model.to(device)

In [21]:
# now lets test it with an input
input_text = "Milk has a white color."
inputs = tokenizer(input_text, padding="max_length", truncation=True, max_length=max_input, return_tensors="pt")
inputs = {k: inputs[k].to(device) for k in inputs}

result = model.generate(**inputs)
output = tokenizer.decode(result[0], skip_special_tokens=True)
print(output)


What color is milk?


In [8]:
model = BartForConditionalGeneration.from_pretrained('nlp-group-6/sciq-question-generator', token="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")

model.to(device)
args = Seq2SeqTrainingArguments(
    output_dir="./results",
    evaluation_strategy='epoch',
    learning_rate=2e-5,
    per_device_train_batch_size=batch_size,
    per_device_eval_batch_size= batch_size,
    gradient_accumulation_steps=2,
    weight_decay=0.01,
    save_total_limit=2,
    num_train_epochs=32,
    predict_with_generate=True,
    eval_accumulation_steps=32
    # fp16=True #available only with CUDA
)


trainer = Seq2SeqTrainer(
    model, 
    args,
    train_dataset=train_data,
    eval_dataset=eval_data,
    tokenizer=tokenizer,
)

dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)


In [68]:
from nltk.translate.bleu_score import sentence_bleu

def compute_bleu_score(targets, predictions):
    total_bleu_score = 0
    for target_sentence, prediction in zip(targets, predictions):
        predicted_sentence = tokenizer.decode(prediction, skip_special_tokens=True)
        bleu_score = sentence_bleu(target_sentence, predicted_sentence)
        print(bleu_score, target_sentence, predicted_sentence)
        total_bleu_score += bleu_score
    total_bleu_score /= len(targets)
    print(total_bleu_score)
    
predictions = trainer.predict(test_data)
compute_bleu_score(test_data, predictions[0])


KeyboardInterrupt: 

In [69]:
compute_bleu_score(test_data['question'], predictions[0])

1.2000064705012679e-231 A habitat’s features are determined mainly by abiotic factors such as? What is the physical environment in which a species lives and to which it is adapted?
1.2217404365441333e-231 What part of the body does caffeine stimulate? Some psychoactive drugs, such as caffeine, stimulate the central nervous system. they may
1.3067701985851573e-231 An extensive property is a property that depends on the amount of what in a sample? What is a property that depends on the amount of matter in a sample?
1.2882297539194154e-231 Electron capture occurs when an inner shell electron combines with a proton and is converted into what? What does air passing between the vehicles flow in a narrower channel cause?
1.362137122503591e-231 What is an area in a body of water where nothing grows because there is too little oxygen known as? What does rain dissolves fertilizer in the soil?
1.2383665771889249e-231 Usually done on computers, what do you call sets of equations that take into acc