In [1]:
from transformers import AutoTokenizer
import random
import torch

tokenizer = AutoTokenizer.from_pretrained('tokenizer/gpt2')
tokenizer.pad_token_id = 0

tokenizer

  from .autonotebook import tqdm as notebook_tqdm
Using sep_token, but it is not set yet.
Using cls_token, but it is not set yet.
Using mask_token, but it is not set yet.


GPT2TokenizerFast(name_or_path='tokenizer/gpt2', vocab_size=50257, model_max_length=1024, is_fast=True, padding_side='right', truncation_side='right', special_tokens={'bos_token': '<|endoftext|>', 'eos_token': '<|endoftext|>', 'unk_token': '<|endoftext|>', 'pad_token': '!'}, clean_up_tokenization_spaces=True),  added_tokens_decoder={
	50256: AddedToken("<|endoftext|>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
}

In [2]:
from datasets import load_from_disk

dataset = load_from_disk('dataset/b-mc2/sql-create-context')['train']


def f(data):
    question = 'context:%s question:%s answer:' % (data['context'],
                                                   data['question'])
    answer = data['answer']
    return {'question': question, 'answer': answer}


dataset = dataset.map(f, remove_columns=['context'])


def f(data):
    question = len(tokenizer.encode(data['question']))
    answer = len(tokenizer.encode(data['answer']))
    return 25 <= question <= 65 and 10 <= answer <= 35


dataset = dataset.filter(f)


def f(data):
    return {
        'prompt': data['question'],
        'chosen': data['answer'],
        'rejected': ''
    }


dataset = dataset.map(f, remove_columns=['question', 'answer'])
dataset = dataset.train_test_split(test_size=200)

dataset, dataset['train'][0]

(DatasetDict({
     train: Dataset({
         features: ['prompt', 'chosen', 'rejected'],
         num_rows: 71745
     })
     test: Dataset({
         features: ['prompt', 'chosen', 'rejected'],
         num_rows: 200
     })
 }),
 {'prompt': 'context:CREATE TABLE TV_series (SHARE INTEGER) question:What is minimum and maximum share of TV series? answer:',
  'chosen': 'SELECT MAX(SHARE), MIN(SHARE) FROM TV_series',
  'rejected': ''})

In [3]:
from transformers import AutoModelForCausalLM

model_dpo = AutoModelForCausalLM.from_pretrained('model/gpt2').to('cuda')
model_dpo_ref = AutoModelForCausalLM.from_pretrained('model/gpt2').to('cuda')

In [4]:
import torch
import random


@torch.no_grad()
def generate(input_ids):
    lens = input_ids.shape[1]
    while True:
        out = model_dpo(input_ids=input_ids)
        topk = out['logits'][0, -1].topk(1)

        values = topk.values.softmax(0).tolist()
        indices = topk.indices.tolist()
        next_word = random.choices(indices, weights=values)

        next_word = torch.LongTensor(next_word).unsqueeze(0).to('cuda')
        input_ids = torch.cat([input_ids, next_word], dim=1)

        if input_ids.shape[1] - lens >= 35:
            break

        if input_ids[0, -1] == tokenizer.eos_token_id:
            break

    return input_ids


input_ids = 'context:CREATE TABLE department (num_employees INTEGER, ranking INTEGER) question:What is the average number of employees of the departments whose rank is between 10 and 15? answer:'
input_ids = tokenizer.encode(input_ids, return_tensors='pt').to('cuda')

out = generate(input_ids)

tokenizer.decode(out[0])

'context:CREATE TABLE department (num_employees INTEGER, ranking INTEGER) question:What is the average number of employees of the departments whose rank is between 10 and 15? answer:10\n\nThe answer is:\n\nThe answer is:\n\nThe answer is:\n\nThe answer is:\n\nThe answer is:\n\nThe answer'

In [5]:
from transformers import TrainingArguments, TrainerCallback
from trl import DPOTrainer
import random

args = TrainingArguments(per_device_train_batch_size=16,
                         max_steps=2000,
                         learning_rate=1e-5,
                         evaluation_strategy='no',
                         optim='rmsprop',
                         report_to='none',
                         save_strategy='no',
                         output_dir='output_dir')


class MyCallback(TrainerCallback):

    def on_step_end(self, args, state, control, **kwargs):
        if state.global_step % 100 == 0:
            print(state.global_step)

            data = random.choice(dataset['test'])
            input_ids = tokenizer.encode(data['prompt'],
                                         return_tensors='pt').to('cuda')

            out = generate(input_ids)

            print(tokenizer.decode(out[0]))
            print('=================')
            print(data['chosen'])
            print('=================')


trainer = DPOTrainer(model_dpo,
                     model_dpo_ref,
                     args=args,
                     beta=0.1,
                     train_dataset=dataset['train'],
                     tokenizer=tokenizer,
                     max_length=100,
                     max_target_length=100,
                     max_prompt_length=100,
                     callbacks=[MyCallback()])

trainer.train()

Could not estimate the number of tokens of the input, floating-point operations will not be computed


Step,Training Loss
500,0.0018
1000,0.0004
1500,0.0005
2000,0.0003


100
context:CREATE TABLE table_25330991_3 (james_e_holmes VARCHAR, reidsville VARCHAR) question:Name the james e. holmes for erselle young answer:SELECT name FROM table_25330991_3 WHERE reidsville = "young"<|endoftext|>
SELECT james_e_holmes FROM table_25330991_3 WHERE reidsville = "Erselle Young"
200
context:CREATE TABLE table_name_8 (weekly_rank VARCHAR, official_ratings__millions_ VARCHAR, show VARCHAR) question:Which Weekly Rank for a Live Final Show has an Official Ratings (millions) greater than 5.2? answer:SELECT weekly_rank FROM table_name_8 WHERE official_ratings__millions_ = 5.2 AND show = "Live Final Show" AND show = "
SELECT weekly_rank FROM table_name_8 WHERE official_ratings__millions_ > 5.2 AND show = "live final"
300
context:CREATE TABLE table_11173827_1 (english_title VARCHAR, finale VARCHAR, peak VARCHAR) question:What is the english title that has finale as 33 and peak as 42? answer:SELECT title FROM table_11173827_1 WHERE finale = 33 AND peak = 42<|endoftext|>
SELEC

TrainOutput(global_step=2000, training_loss=0.0007831706330180168, metrics={'train_runtime': 1160.3168, 'train_samples_per_second': 27.579, 'train_steps_per_second': 1.724, 'total_flos': 0.0, 'train_loss': 0.0007831706330180168, 'epoch': 0.45})