# Correct Answer Generator (Experimental)

In [None]:
! pip install -r requirements.txt

In [1]:
import torch
from datasets import load_dataset
from transformers import Seq2SeqTrainingArguments, Seq2SeqTrainer
from transformers import BartTokenizer, BartForConditionalGeneration

# make sure to include cuda
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)

model = BartForConditionalGeneration.from_pretrained('facebook/bart-base')
tokenizer = BartTokenizer.from_pretrained('facebook/bart-base')

  from .autonotebook import tqdm as notebook_tqdm


cuda


In [None]:
data = load_dataset("allenai/sciq")
train_data = data['train']
val_data = data['validation']

In [None]:
max_input = 512
max_target = 128
batch_size = 2

In [None]:
# dataset has:
# question, distractor3, distractor1, distractor2, correct_answer, support
def pre_process_data(data):
    # tokenize the data
    inputs = tokenizer(
        data['question'],
        data['support'],
        max_length=384,
        padding="max_length",
        truncation="only_second",
        return_tensors="pt"
      )
    targets = tokenizer(
        data['correct_answer'],
        max_length=128,
        padding="max_length",
        truncation=True,
        return_tensors="pt"
      )
    # update the input_ids and attention_mask
    inputs["input_ids"] = inputs["input_ids"].squeeze()
    inputs["attention_mask"] = inputs["attention_mask"].squeeze()
    targets["input_ids"] = targets["input_ids"].squeeze()
    targets["attention_mask"] = targets["attention_mask"].squeeze()
    return {
        "source_ids": inputs["input_ids"],
        "source_mask": inputs["attention_mask"],
        "target_ids": targets["input_ids"],
        # "target_mask": targets["attention_mask"],
    }


    
train_data = train_data.map(pre_process_data, batched=True).shuffle(seed=42).select(range(1000))
val_data = val_data.map(pre_process_data, batched=True).shuffle(seed=42).select(range(100))

In [None]:
# empty memory
torch.cuda.empty_cache()
# Set the environment variable
# make PYTORCH_CUDA_ALLOC_CONF True
PYTORCH_CUDA_ALLOC_CONF = True

In [None]:
model.to(device)
args = Seq2SeqTrainingArguments(
    output_dir="./results_option_generation",
    evaluation_strategy='epoch',
    learning_rate=2e-5,
    per_device_train_batch_size=batch_size,
    per_device_eval_batch_size= batch_size,
    gradient_accumulation_steps=2,
    weight_decay=0.01,
    save_total_limit=2,
    num_train_epochs=32,
    predict_with_generate=True,
    eval_accumulation_steps=32,
    fp16=True #available only with CUDA
)


trainer = Seq2SeqTrainer(
    model, 
    args,
    train_dataset=train_data,
    eval_dataset=val_data,
    tokenizer=tokenizer,
)

trainer.train()


In [4]:
# lets save the model
#!!! we also have the one with context.
OUT_DIR = "sciq_correct_answer_generator"

In [None]:
model.save_pretrained(OUT_DIR)
tokenizer.save_pretrained(OUT_DIR)

In [5]:
from transformers import BartTokenizer, BartForConditionalGeneration

model = BartForConditionalGeneration.from_pretrained(f"./{OUT_DIR}")
tokenizer = BartTokenizer.from_pretrained(f"./{OUT_DIR}")
# put them both on the same device
_ = model.to(device)

In [8]:
input_text = "What amazing machines smash particles that are smaller than atoms into each other head-on?"
support = "Scientists have built machines called particle accelerators. These amazing tools smash particles that are smaller than atoms into each other head-on. This creates new particles. Scientists use particle accelerators to learn about nuclear fusion in stars. They can also learn about how atoms came together in the early universe. Two well-known accelerators are SLAC, in California, and CERN, in Switzerland."

input_ids = tokenizer(input_text, support, return_tensors="pt").input_ids.to(device)
output = model.generate(input_ids, max_length=128, early_stopping=True)
outputs = [tokenizer.decode(g, skip_special_tokens=True, clean_up_tokenization_spaces=False) for g in output]
print(outputs[0])


particle accelerators
