<h1> Question Generation </h1>


*   This notebook requires SQuAD dataset or a QA dataset fomratted as squad dataset.
*   We will use the translations of SQuAD1.1 or SQuAD2.0 that we did using the notebooks "NLLB translation squad 1.ipynb" or "NLLB translation squad 2.ipynb"
*   The final translations using notebooks "NLLB translation squad 1.ipynb" and "NLLB translation squad 2.ipynb" are "squad1_translated_final_aligned.json" and "squad2_translated_final_aligned.json"



In [None]:
!pip install transformers=="4.25.1" sentencepiece=="0.1.97" utoken=="0.1.8" nltk=="3.8.1" datasets=="2.8.0" torch=="1.13.1+cu116" numpy=="1.21.6" tqdm=="4.64.1" --quiet

In [None]:
# import os
# os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"
# os.environ["CUDA_VISIBLE_DEVICES"]="0"

In [None]:
import random
import numpy as np
import torch
def set_seed(seed):
  random.seed(seed)
  np.random.seed(seed)
  torch.manual_seed(seed)
  if torch.cuda.is_available():
    torch.cuda.manual_seed_all(seed)

set_seed(42)

In [None]:
import json
from pathlib import Path
import torch
from torch.utils.data import DataLoader
import time

## ***Step 1:*** Format and split the translated QA dataset

In [None]:
import re
import string
from collections import Counter
from utoken import utokenize
from utoken import detokenize
tok = utokenize.Tokenizer(lang_code='ben')
detok = detokenize.Detokenizer(lang_code='ben')

def repeating_prob(context):
    context_tok = tok.utokenize_string(context.translate(str.maketrans('', '', string.punctuation))).split(" ")
    tok_count =  [context_tok.count(i) for i in context_tok]
    rep_token =  context_tok[tok_count.index(max(tok_count))]
    indices_obj = re.finditer(rep_token,context)
    indices = [index.start() for index in indices_obj]
    indices_diff = [t - s for s, t in zip(indices, indices[1:])]
    if len(indices_diff)==0:
        return 0
    indices_diff_count = [indices_diff.count(i) for i in indices_diff]
    return max(indices_diff_count)


def repeating_char_prob(context):
    context_tok = tok.utokenize_string(context.translate(str.maketrans('', '', string.punctuation))).split(" ")
    max_char_in_each_tok = [Counter(tok).most_common(1)[0][1] for tok in context_tok]
    return max(max_char_in_each_tok)

In [None]:
path = Path('./squad1_translated_final_aligned.json')

# Open .json file
with open(path, 'rb') as f:
    squad_dict = json.load(f)

texts = []
queries = []
answers = []

# Search for each passage, its question and its answer
for gi in range(400):
    group = squad_dict['data'][gi]
    for passage in group['paragraphs']:
        context_list = passage['bangla_context_list']
        for qa in passage['qas']:
            question = qa['q_tran'].strip() # question
            if repeating_prob(question)>3 or repeating_char_prob(question)>10:
                continue
            context = context_list[qa['answers'][0]['index_c_tran_with_ans']]
            if repeating_prob(context)>3 or repeating_char_prob(context)>10:
                continue
            if qa['answers'][0]['align_score'] >= 0.5 and qa['answers'][0]['a_tran'] in context:
                texts.append(context)
                queries.append(question)
                answers.append(qa['answers'][0]['a_tran'])


train_texts, train_queries, train_answers = texts, queries, answers

In [None]:
path = Path('./squad1_translated_final_aligned.json')

# Open .json file
with open(path, 'rb') as f:
    squad_dict = json.load(f)

texts = []
queries = []
answers = []

# Search for each passage, its question and its answer
for gi in range(400,442):
    group = squad_dict['data'][gi]
    for passage in group['paragraphs']:
        context_list = passage['bangla_context_list']
        for qa in passage['qas']:
            question = qa['q_tran'].strip() # question
            if repeating_prob(question)>3 or repeating_char_prob(question)>10:
                continue
            context = context_list[qa['answers'][0]['index_c_tran_with_ans']]
            if repeating_prob(context)>3 or repeating_char_prob(context)>10:
                continue
            if qa['answers'][0]['align_score'] >= 0.5 and qa['answers'][0]['a_tran'] in context:
                texts.append(context)
                queries.append(question)
                answers.append(qa['answers'][0]['a_tran'])


val_texts, val_queries, val_answers = texts, queries, answers

In [None]:
path = Path('./squad1_dev_translated_final_aligned.json') # index_c_tran_with_ans

# Open .json file
with open(path, 'rb') as f:
    squad_dict = json.load(f)

q_ids = []
texts = []
queries = []
answers = []

# Search for each passage, its question and its answer
for gi in range(len(squad_dict['data'])):
    group = squad_dict['data'][gi]
    for passage in group['paragraphs']:
        context_list = passage['bangla_context_list']
        for qa in passage['qas']:
            qid = qa['id']
            question = qa['q_tran'].strip() # question
            if repeating_prob(question)>3 or repeating_char_prob(question)>10:
                continue
            context = context_list[qa['answers'][0]['index_c_tran_with_ans']]
            if repeating_prob(context)>3 or repeating_char_prob(context)>10:
                continue
            if qa['answers'][0]['align_score'] >= 0.5 and qa['answers'][0]['a_tran'] in context:
                texts.append(context)
                queries.append(question)
                answers.append(qa['answers'][0]['a_tran'])
                q_ids.append(qid)


test_texts, test_queries, test_answers = texts, queries, answers

## ***Step 2:*** Check the data

In [None]:
print(len(train_texts))
print(len(train_queries))
print(len(train_answers))

71423
71423
71423


In [None]:
x=100
print("Passage: ",train_texts[x])  
print("Query: ",train_queries[x])
print("Answer: ",train_answers[x])

Passage:  ক্যাথলিকরা দলটির চারপাশে জড়ো হয়ে রেডিওতে খেলা শুনত, বিশেষ করে যখন এটি আমেরিকার প্রোটেস্ট্যান্ট প্রতিষ্ঠানের প্রতীক যে স্কুলগুলিকে আঘাত করেছিল  হার্ভার্ড, ইয়েল, প্রিন্সটন এবং আর্মি।
Query:  ক্যাথলিকরা নটরডামের সাথে যুক্ত, কোন ধর্মীয় গোষ্ঠীকে মানুষ মনে করে যে ইয়েল প্রতিনিধিত্ব করে?
Answer:  প্রোটেস্ট্যান্ট প্রতিষ্ঠানের


As you can see we have 20302 passages, queries and answers from the validation data

In [None]:
print(len(val_texts))
print(len(val_queries))
print(len(val_answers))

7344
7344
7344


In [None]:
print("Passage: ",val_texts[0])  
print("Query: ",val_queries[0])
print("Answer: ",val_answers[0])

Passage:  এটি উত্তর-পশ্চিমে আর্মেনিয়া, প্রকৃতপক্ষে নাগোরনো-কারাবাখ প্রজাতন্ত্র এবং আজারবাইজান; উত্তরে কাশ্মীর এবং রাশিয়া কাস্পিয়ান সাগর জুড়ে; উত্তর-পূর্বে তুর্কমেনিস্তান; পূর্ব দিকে আফগানিস্তান এবং পাকিস্তান; দক্ষিণে পারস্য উপসাগর এবং ওমান উপসাগর; এবং পশ্চিমে তুরস্ক এবং ইরাক।
Query:  ইরানের উত্তর-পশ্চিমে ইরানের সাথে কোন দেশ সীমান্তবর্তী?
Answer:  আর্মেনিয়া


In [None]:
# train_texts = train_texts[:10000]
# train_queries = train_queries[:10000]
# train_answers = train_answers[:10000]

# val_texts = val_texts[:1000]
# val_queries = val_queries[:1000]
# val_answers = val_answers[:1000]

In [None]:
# print(len(train_texts))
# print(len(train_queries))
# print(len(train_answers))

In [None]:
# print(len(val_texts))
# print(len(val_queries))
# print(len(val_answers))

## ***Step 3:*** Format according to input of train data

In [None]:
for i in range(len(train_texts)):
    real_answer = train_answers[i]
    train_texts[i] = train_texts[i] + " <sep> " + real_answer

In [None]:
for i in range(len(val_texts)):
    real_answer = val_answers[i]
    val_texts[i] = val_texts[i] + " <sep> " + real_answer

In [None]:
train_dataset = {'source_text': train_texts, 'target_text': train_queries}
val_dataset = {'source_text': val_texts, 'target_text': val_queries}

In [None]:
from datasets import Dataset
train_dataset = Dataset.from_dict(train_dataset)
val_dataset = Dataset.from_dict(val_dataset)

## ***Step 4:*** Tokenize passages and queries

In [None]:
class DataProcessor:
    def __init__(self, tokenizer, max_source_length=512, max_target_length=32):
        self.tokenizer = tokenizer
        self.max_source_length = max_source_length
        self.max_target_length = max_target_length
        self.sep_token = "<sep>"
  
    def process(self, dataset):

        dataset = dataset.map(self._add_eos_examples)
        dataset = dataset.map(self._convert_to_features, batched=True)
        
        return dataset
  
    def _add_eos_examples(self, example):
        example['source_text'] = example['source_text'] + " </s>"
        example['target_text'] = example['target_text'] + " </s>"
        return example
  
    # tokenize the examples
    def _convert_to_features(self, example_batch):
        source_encoding = self.tokenizer.batch_encode_plus(
            example_batch['source_text'],
            max_length=self.max_source_length,
            padding='max_length',
            pad_to_max_length=True,
            truncation=True, 
        )
        target_encoding = self.tokenizer.batch_encode_plus(
            example_batch['target_text'],
            max_length=self.max_target_length,
            padding='max_length',
            pad_to_max_length=True,
            truncation=True, 
        )

        encodings = {
            'source_ids': source_encoding['input_ids'], 
            'target_ids': target_encoding['input_ids'],
            'attention_mask': source_encoding['attention_mask'],
        }

        return encodings

In [None]:
from transformers import T5Tokenizer, T5ForConditionalGeneration
tokenizer = T5Tokenizer.from_pretrained("csebuetnlp/banglat5")
tokenizer.add_tokens(['<sep>'])

In [None]:
processor = DataProcessor(
        tokenizer,
        max_source_length=512,
        max_target_length=64
    )

train_dataset_final = processor.process(train_dataset)
val_dataset_final = processor.process(val_dataset)

In [None]:
columns = ["source_ids", "target_ids", "attention_mask"]
train_dataset_final.set_format(type='torch', columns=columns)
val_dataset_final.set_format(type='torch', columns=columns)

## ***Step 5:*** Training

In [None]:
question_generator_trainer_args=dict(
    debug=False,
    model_name="csebuetnlp/banglat5",
    model_path="csebuetnlp/banglat5",
    output_dir="./QGmodel",
    data_dir="./QGdata",
    train_file=None,
    config_name="csebuetnlp/banglat5",
    tokenizer_name="csebuetnlp/banglat5",
    cache_dir="./QGdata",
    max_seq_length=512,
    doc_stride=128,
    max_query_length=64,
    max_ans_length=30,
    do_lower_case=False,
    train_batch_size=16,
    dev_batch_size=16,
    dynamic_batching=False,
    learning_rate=2e-4,
    gradient_accumulation_steps=1,
    weight_decay=0.0,
    max_grad_norm=1.0,
    num_train_epochs=3,
    max_steps=-1,
    warmup_steps=0,
    verbose_logging=False,
    train_logging_steps=2000,
    dev_logging_steps=2000,
    evaluate_during_training=True,
    save_steps=2000,
    no_cuda=False,
    overwrite_output_dir=True,
    overwrite_cache=False,
    seed=42,
    threads=1,
)

from types import SimpleNamespace

qg_trainer_args = SimpleNamespace(**question_generator_trainer_args)

In [None]:
import logging as logger
import os
from tqdm.notebook import tqdm, trange
from functools import partial

import torch
from torch.utils.data import TensorDataset, SequentialSampler, DataLoader, RandomSampler

from torch.optim import AdamW
from transformers import (
    T5Config,
    T5Tokenizer,
    T5ForConditionalGeneration,
    get_linear_schedule_with_warmup,
)

from torch.utils.tensorboard import SummaryWriter


logger.basicConfig(level=logger.INFO)



def train(args, train_dataset, dev_dataset, model, tokenizer):
    """ Train the model """
    args.device = torch.device("cuda" if torch.cuda.is_available() and not args.no_cuda else "cpu")
    
    model.to(args.device)

    tb_writer = SummaryWriter(os.path.join(args.output_dir, 'TB_writer'))

    Rsampler = RandomSampler(train_dataset)
    train_dataloader = DataLoader(train_dataset, sampler=Rsampler, batch_size=args.train_batch_size)

    Ssampler = SequentialSampler(dev_dataset)
    dev_dataloader = DataLoader(dev_dataset, sampler=Ssampler, batch_size=args.dev_batch_size)

    if args.max_steps > 0:
        t_total = args.max_steps
        args.num_train_epochs = args.max_steps // (len(train_dataloader) // args.gradient_accumulation_steps) + 1
    else:
        t_total = len(train_dataloader) // args.gradient_accumulation_steps * args.num_train_epochs

    # Prepare optimizer and schedule (linear warmup and decay)
    no_decay = ["bias", "LayerNorm.weight"]
    optimizer_grouped_parameters = [
        {
            "params": [p for n, p in model.named_parameters() if not any(nd in n for nd in no_decay)],
            "weight_decay": args.weight_decay,
        },
        {"params": [p for n, p in model.named_parameters() if any(nd in n for nd in no_decay)], "weight_decay": 0.0},
    ]
    optimizer = AdamW(optimizer_grouped_parameters, lr=args.learning_rate)
    scheduler = get_linear_schedule_with_warmup(
        optimizer, num_warmup_steps=args.warmup_steps, num_training_steps=t_total
    )

    # Check if saved optimizer or scheduler states exist
    if os.path.isfile(os.path.join(args.model_path, "optimizer.pt")) and os.path.isfile(
        os.path.join(args.model_path, "scheduler.pt")
    ):
        # Load in optimizer and scheduler states
        optimizer.load_state_dict(torch.load(os.path.join(args.model_path, "optimizer.pt")))
        scheduler.load_state_dict(torch.load(os.path.join(args.model_path, "scheduler.pt")))

    # Train!
    logger.info("***** Running training *****")
    logger.info("  Num examples = %d", len(train_dataset))
    logger.info("  Num Epochs = %d", args.num_train_epochs)
    logger.info("  Batch size = %d", args.train_batch_size)
    logger.info("  Gradient Accumulation steps = %d", args.gradient_accumulation_steps)
    logger.info("  Total optimization steps = %d", t_total)

    global_step = 1
    epochs_trained = 0
    steps_trained_in_current_epoch = 0

    # Check if continuing training from a checkpoint
    if os.path.exists(args.model_path):
        try:
            # set global_step to gobal_step of last saved checkpoint from model path
            checkpoint_suffix = args.model_path.split("-")[-1].split("/")[0]
            global_step = int(checkpoint_suffix)
            epochs_trained = global_step // (len(train_dataloader) // args.gradient_accumulation_steps)
            steps_trained_in_current_epoch = global_step % (len(train_dataloader) // args.gradient_accumulation_steps)

            logger.info("  Continuing training from checkpoint, will skip to saved global_step")
            logger.info("  Continuing training from epoch %d", epochs_trained)
            logger.info("  Continuing training from global step %d", global_step)
            logger.info("  Will skip the first %d steps in the first epoch", steps_trained_in_current_epoch)
        except ValueError:
            logger.info("  Starting fine-tuning.")

    model.train()
    model.zero_grad()
    train_iterator = trange(epochs_trained, int(args.num_train_epochs), desc="Epoch")

    # Added here for reproductibility
    # set_seed(args)

    loss_cum = None
    # torch.autograd.set_detect_anomaly(True)
    for _ in train_iterator:

        epoch_iterator = tqdm(train_dataloader, desc="Iteration", smoothing=0.05)
        for step, batch in enumerate(epoch_iterator):

            # Skip past any already trained steps if resuming training
            if steps_trained_in_current_epoch > 0:
                steps_trained_in_current_epoch -= 1
                continue


            inputs = {
                "input_ids": (batch['source_ids']).to(args.device),
                "attention_mask": (batch['attention_mask']).to(args.device),
                "labels": (batch['target_ids']).to(args.device)
            }

            outputs = model(**inputs)
            loss = outputs.loss

            if args.gradient_accumulation_steps > 1:
                loss = loss / args.gradient_accumulation_steps

            
            loss.backward()
            if loss_cum is None:
                loss_cum = loss.detach()
            else:
                loss_cum += loss.detach()

            if (step + 1) % args.gradient_accumulation_steps == 0:
                
                torch.nn.utils.clip_grad_norm_(model.parameters(), args.max_grad_norm)

                optimizer.step()
                scheduler.step()  # Update learning rate schedule
                model.zero_grad()
                global_step += 1

                # Log train metrics
                if (not global_step % args.train_logging_steps) and args.train_logging_steps > 0:
                    logger.info("train_loss %lf global step %d", loss_cum.item() / args.train_logging_steps, global_step)
                    tb_writer.add_scalar('train_loss', loss_cum.item() / args.train_logging_steps, global_step)

                    loss_cum = None
                # Log dev metrics
                if args.dev_logging_steps > 0 and global_step % args.dev_logging_steps == 0 and args.evaluate_during_training:
                    dev_loss = evaluate(args, dev_dataset, model)
                    logger.info("dev_loss %lf %d", dev_loss, global_step)
                    tb_writer.add_scalar("dev_loss", dev_loss, global_step)
                    tb_writer.add_scalar("lr", scheduler.get_last_lr()[0], global_step)

                # Save model checkpoint
                if args.save_steps > 0 and global_step % args.save_steps == 0:
                    output_dir = os.path.join(args.output_dir, "checkpoint-{}".format(global_step))
                    logger.info("Saving model checkpoint to %s", output_dir)

                    model.save_pretrained(output_dir)
                    tokenizer.save_pretrained(output_dir)

                    torch.save(args, os.path.join(output_dir, "training_args.bin"))
                    torch.save(optimizer.state_dict(), os.path.join(output_dir, "optimizer.pt"))
                    torch.save(scheduler.state_dict(), os.path.join(output_dir, "scheduler.pt"))

            if args.max_steps > 0 and global_step > args.max_steps:
                epoch_iterator.close()
                break
        if args.max_steps > 0 and global_step > args.max_steps:
            train_iterator.close()
            break

        tb_writer.close()


def evaluate(args, dev_dataset, model):
    """ Evaluate loss on the dev set """

    Ssampler = SequentialSampler(dev_dataset)
    dev_dataloader = DataLoader(dev_dataset, sampler=Ssampler, batch_size=8)

    model.eval()
    iterator = tqdm(dev_dataloader, desc="Evaluation", smoothing=0.05)
    loss_cum = None
    num_batch = 0
    for step, batch in enumerate(iterator):
        num_batch += 1

        inputs = {
            "input_ids": (batch['source_ids']).to(args.device),
            "attention_mask": (batch['attention_mask']).to(args.device),
            "labels": (batch['target_ids']).to(args.device)
        }

        with torch.no_grad():
            outputs = model(**inputs)
            if loss_cum is None:
                loss_cum = outputs.loss
            else:
                loss_cum += outputs.loss

    model.train()

    return loss_cum.item() / num_batch

In [None]:
from transformers import T5Config, T5ForConditionalGeneration
# Load pretrained model and tokenizer
config = T5Config.from_pretrained(
    qg_trainer_args.config_name if qg_trainer_args.config_name else qg_trainer_args.model_path,
    # cache_dir=qg_trainer_args.cache_dir if qg_trainer_args.cache_dir else None,
)
tokenizer = T5Tokenizer.from_pretrained(
    qg_trainer_args.tokenizer_name if qg_trainer_args.tokenizer_name else qg_trainer_args.model_path,
    do_lower_case=qg_trainer_args.do_lower_case
    # cache_dir=qg_trainer_args.cache_dir if qg_trainer_args.cache_dir else None,
)
tokenizer.add_tokens(['<sep>'])
model = T5ForConditionalGeneration.from_pretrained(
    qg_trainer_args.model_path,
    from_tf=bool(".ckpt" in qg_trainer_args.model_path),
    config=config
    # cache_dir=qg_trainer_args.cache_dir if qg_trainer_args.cache_dir else None,
)
model.resize_token_embeddings(len(tokenizer))

Embedding(32101, 768)

In [None]:
train(qg_trainer_args, train_dataset_final, val_dataset_final, model, tokenizer)

INFO:root:***** Running training *****
INFO:root:  Num examples = 71423
INFO:root:  Num Epochs = 3
INFO:root:  Batch size = 16
INFO:root:  Gradient Accumulation steps = 1
INFO:root:  Total optimization steps = 13392


Epoch:   0%|          | 0/3 [00:00<?, ?it/s]

Iteration:   0%|          | 0/4464 [00:00<?, ?it/s]

INFO:root:train_loss 3.036305 global step 2000


Evaluation:   0%|          | 0/918 [00:00<?, ?it/s]

INFO:root:dev_loss 0.460082 2000
INFO:root:Saving model checkpoint to ./QGmodel/checkpoint-2000
INFO:root:train_loss 0.526687 global step 4000


Evaluation:   0%|          | 0/918 [00:00<?, ?it/s]

INFO:root:dev_loss 0.432929 4000
INFO:root:Saving model checkpoint to ./QGmodel/checkpoint-4000


Iteration:   0%|          | 0/4464 [00:00<?, ?it/s]

INFO:root:train_loss 0.482775 global step 6000


Evaluation:   0%|          | 0/918 [00:00<?, ?it/s]

INFO:root:dev_loss 0.426978 6000
INFO:root:Saving model checkpoint to ./QGmodel/checkpoint-6000
INFO:root:train_loss 0.468506 global step 8000


Evaluation:   0%|          | 0/918 [00:00<?, ?it/s]

INFO:root:dev_loss 0.421188 8000
INFO:root:Saving model checkpoint to ./QGmodel/checkpoint-8000


Iteration:   0%|          | 0/4464 [00:00<?, ?it/s]

INFO:root:train_loss 0.449008 global step 10000


Evaluation:   0%|          | 0/918 [00:00<?, ?it/s]

INFO:root:dev_loss 0.418560 10000
INFO:root:Saving model checkpoint to ./QGmodel/checkpoint-10000
INFO:root:train_loss 0.437592 global step 12000


Evaluation:   0%|          | 0/918 [00:00<?, ?it/s]

INFO:root:dev_loss 0.416925 12000
INFO:root:Saving model checkpoint to ./QGmodel/checkpoint-12000


In [None]:
train_texts[9]

'১৯৮৭ সালে, যখন কিছু ছাত্র বিশ্বাস করেছিলেন যে দ্য ওভারভারভার একটি রক্ষণশীল পজিশন প্রদর্শন করতে শুরু করেছিল, একটি লিবারাল পত্রিকা, কমন সান্সেস প্রকাশিত হয়েছিল। <sep> ১৯৮৭'

In [None]:
device = torch.device("cuda")
model.to(device)
text = train_texts[245]
print(text)
input_ids = tokenizer(text, return_tensors="pt").input_ids
outputs = model.generate(input_ids.to(device),
                        max_length=50, 
                        num_beams=5, 
                        no_repeat_ngram_size=2, 
                        num_return_sequences=5, 
                        early_stopping=True)
for output in outputs:
    print(tokenizer.decode(output, skip_special_tokens=True))

টেলিভিশন স্টেশন, এনডিটিভি, ২০০২ সালে একটি শো থেকে ২০০৬ সালের সেপ্টেম্বরের মধ্যে মূল প্রোগ্রামিং সহ একটি পূর্ণ ২৪ ঘন্টা চ্যানেলে পরিণত হয়েছিল। <sep> এনডিটিভি
কোন টেলিভিশন স্টেশন ২০০৬ সালের সেপ্টেম্বরে একটি সম্পূর্ণ ২৪ ঘন্টা প্রোগ্রামিং সহ একটি মূল চ্যানেলে পরিণত হয়েছিল?
কোন টেলিভিশন স্টেশন ২০০৬ সালের সেপ্টেম্বরে পূর্ণ ২৪ ঘন্টা সম্প্রচার শুরু করে?
কোন টেলিভিশন স্টেশন ২০০৬ সালের সেপ্টেম্বরে পূর্ণ ২৪ ঘন্টা সম্প্রচার করে?
কোন টেলিভিশন স্টেশন ২০০৬ সালের সেপ্টেম্বরে একটি সম্পূর্ণ ২৪ ঘন্টা প্রোগ্রামিং সহ একটি মূল চ্যানেল হয়ে ওঠে?
কোন টেলিভিশন স্টেশন ২০০৬ সালের সেপ্টেম্বরে পূর্ণ ২৪ ঘন্টা সম্প্রচার করেছিল?


In [None]:
qg_trainer_args.output_dir

'./QGmodel'

In [None]:
model.save_pretrained(qg_trainer_args.output_dir)
tokenizer.save_pretrained(qg_trainer_args.output_dir)
torch.save(qg_trainer_args, os.path.join(qg_trainer_args.output_dir, "training_args.bin"))