In [None]:
%pip install datasets trl peft bitsandbytes sentencepiece

In [3]:
from huggingface_hub import login
login(token = "")

  from .autonotebook import tqdm as notebook_tqdm


The token has not been saved to the git credentials helper. Pass `add_to_git_credential=True` in this function directly or `--add-to-git-credential` if using via `huggingface-cli` if you want to set the git credential as well.
Token is valid (permission: write).
Your token has been saved to /root/.cache/huggingface/token
Login successful


### IMPORTS

In [7]:
import os
import gc
import torch

import transformers
from transformers import AutoModelForCausalLM, TrainingArguments, BitsAndBytesConfig, AutoTokenizer, AutoModelForSeq2SeqLM
from datasets import load_dataset
from trl import DPOTrainer
import bitsandbytes as bnb

model_name = "sdevare/qgelm"

### PREPARE DATA

In [8]:
def preprocess_data(data):
    return {
        "prompt": data["event_text"] + " <SEP> " + data["question"],
        "chosen": data["accepted"],
        "rejected": data["rejected"],
    }

# Load dataset
dataset = load_dataset("sdevare/qgelm-ranks")['train']

# Save columns
original_columns = dataset.column_names

# Tokenizer
tokenizer = AutoTokenizer.from_pretrained(model_name)
tokenizer.pad_token = tokenizer.eos_token

# Format dataset
dataset = dataset.map(
    preprocess_data,
    remove_columns=original_columns
)

Downloading data: 100%|██████████| 26.1M/26.1M [00:00<00:00, 28.9MB/s]
Generating train split: 100%|██████████| 9463/9463 [00:00<00:00, 25212.42 examples/s]
You are using the default legacy behaviour of the <class 'transformers.models.t5.tokenization_t5.T5Tokenizer'>. This is expected, and simply means that the `legacy` (previous) behavior will be used so nothing changes for you. If you want to use the new behaviour, set `legacy=False`. This should only be set if you understand what it means, and thoroughly read the reason why this was added as explained in https://github.com/huggingface/transformers/pull/24565
Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
Map: 100%|██████████| 9463/9463 [00:01<00:00, 9013.83 examples/s]


In [9]:
dataset[0]

{'rejected': "''it like an exercise",
 'prompt': 'early-morning traffic jams beginning to develop around spring training camps<TUP> baseball organizations in florida and in arizona<TUP> a new group appeared on the scene<TUP> they nt meeting with players<TUP> they instead interviewing nonuniformed members of teams<TUP> the lawyers nt added to the clubhouse congestion<TUP> ariz. scottsdale on friday<TUP> players met with major league baseball s security traveling team and with representatives of the baseball assistance team<TUP> the meetings scheduled for 8:30<TUP> jim martin executive director of the assistance team<TUP> the same two groups at the diamondbacks  camp<TUP> security arriving to set up their material<TUP> another meeting added in this year<TUP> marvin miller his first spring training as the union executive director in 1967<TUP> don fehr on his 30th tour<TUP> kevin hallinan baseball senior vice president for security<TUP> fehr and his aides provide information the players wa

### SETUP MODELS

In [12]:
# Model to fine-tune
model = AutoModelForSeq2SeqLM.from_pretrained(
    model_name,
)
model.config.use_cache = False

# Reference model
ref_model = AutoModelForSeq2SeqLM.from_pretrained(
    model_name,
)

tokenizer = AutoTokenizer.from_pretrained(model_name)
tokenizer.pad_token = tokenizer.eos_token


Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


### TRAINING ARGS

In [19]:
# Training arguments
training_args = TrainingArguments(
    per_device_train_batch_size=8,
    gradient_accumulation_steps=4,
    gradient_checkpointing=True,
    learning_rate=5e-5,
    logging_steps=1,
    output_dir="qgelm_dpo",
    optim="adamw_torch",
    num_train_epochs=3
)


### TRAINER

In [22]:
# Create DPO trainer
dpo_trainer = DPOTrainer(
    model,
    ref_model,
    args=training_args,
    train_dataset=dataset,
    tokenizer=tokenizer,
    beta=0.3,
)

Map: 100%|██████████| 9463/9463 [00:09<00:00, 1037.48 examples/s]
Detected kernel version 4.4.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.


In [23]:
# Fine-tune model with DPO
dpo_trainer.train()

Step,Training Loss
1,0.1358
2,0.1909
3,0.2027
4,0.2991
5,0.2448
6,0.2346
7,0.254
8,0.252
9,0.5503
10,0.3102




TrainOutput(global_step=885, training_loss=0.2960309810052484, metrics={'train_runtime': 779.9776, 'train_samples_per_second': 36.397, 'train_steps_per_second': 1.135, 'total_flos': 0.0, 'train_loss': 0.2960309810052484, 'epoch': 2.992392223161454})

In [25]:
# Save artifacts
dpo_trainer.model.save_pretrained("qgelm_dpo")
tokenizer.save_pretrained("qgelm_dpo")

('qgelm_dpo/tokenizer_config.json',
 'qgelm_dpo/special_tokens_map.json',
 'qgelm_dpo/spiece.model',
 'qgelm_dpo/added_tokens.json',
 'qgelm_dpo/tokenizer.json')

In [31]:
model.push_to_hub("sdevare/qgelm_dpo", use_temp_dir=False, token = "")
tokenizer.push_to_hub("sdevare/qgelm_dpo", use_temp_dir=False, token = "")

model.safetensors: 100%|██████████| 892M/892M [00:21<00:00, 41.5MB/s] 
spiece.model: 100%|██████████| 792k/792k [00:00<00:00, 5.43MB/s]


CommitInfo(commit_url='https://huggingface.co/sdevare/qgelm_dpo/commit/dfdf09a43c62aeba49b9654440364623e3771b51', commit_message='Upload tokenizer', commit_description='', oid='dfdf09a43c62aeba49b9654440364623e3771b51', pr_url=None, pr_revision=None, pr_num=None)

### INFERENCE

In [4]:
# Use a pipeline as a high-level helper
from transformers import pipeline

pipe = pipeline("text2text-generation", model="sdevare/qgelm")
pipe_dpo = pipeline("text2text-generation", model="sdevare/qgelm_dpo")

You are using the default legacy behaviour of the <class 'transformers.models.t5.tokenization_t5.T5Tokenizer'>. This is expected, and simply means that the `legacy` (previous) behavior will be used so nothing changes for you. If you want to use the new behaviour, set `legacy=False`. This should only be set if you understand what it means, and thoroughly read the reason why this was added as explained in https://github.com/huggingface/transformers/pull/24565
Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
You set `add_prefix_space`. The tokenizer needs to be converted from the slow tokenizers
Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


In [34]:
pipe("earthquake struck the city <SEP> what did the police do next?")



[{'generated_text': 'the police searched the area for signs of tenseness'}]

In [5]:
prompts = [
("food company neglected regulations", "what did the company do next?", "what happened to the company next?"),
("produced products contaminated pathogen", "what did the company do next?", "what happened to the people next?"),
("people exposed to the contamination", "what did the people do next?", "what happened to the people next?"),
("the health department received reports about the food plant", "what did the health department do next?", "what happened to the food plant next?"),
("researchers studied the origin of the disease", "what did the researchers do next?", "what happened to the researchers next?"),
("people infected with the disease", "what did the people do next?", "what happened to the infected people next?"),
("attacker accessed systems through vulnerability", "what did the attacker do next?", "what happened to the attackers next?"),
("the hacker accessed sensitive data", "what did the hacker do next?", "what happened to the hackers next?"),
("the cyberattack disrupted services", "what did the company do next?", "what happened to the hackers next?"),
("the kidnapper ambushed the target", "what did the kidnapper do next?", "what happened to the target next?"),
("the kidnapper abducted the target", "what did the kidnapper do next?", "what happened to the target next?"),
("the kidnapper held the target in the hideout", "what did the kidnapper do next?", "what happened to the target next?"),
("the kidnapper demanded ransom to release the person", "what did the kidnapper do next?", "what happened to the person next?"),
("the country closed its borders", "what did the country do next?", "what happened to the country next?"),
("country imposed sanctions", "what did the country do next?", "what happened to the country next?"),
("country declared war", "what did the country do next?", "what happened to the country next?"),
("country launched missile strucks", "what did the government do next?", "what happened to the government next?"),
("country agreed to sign the accord", "what did the government do next?", "what happened to the country next?"),
("people joined the terrorist organization", "what did the people do next?", "what happened to the terrorist organization next?"),
("the attacker constructed the explosive device", "what did the attacker do next?", "what happened to the city next?"),
("the attacker detonated the explosive device", "what did the attacker do next?", "what happened after the detonation?"),
("scientists warned public about the disaster", "what did the scientists do next?", "what happened to the public next?"),
("earthquake struck the city", "what did the people do next?", "what happened to the city next?"),
("the heavy flooding disrupted the services", "what did the authorities do next?", "what happened to the services next?"),
("people evacuated from buildings", "what did the people do next?", "what happened to the people next?"),
("earthquake damaged buildings", "what did the authorities do next?", "what happened to the police next?"),
("the attacker started shooting at the crowd", "what did the attacker do next?", "what happened to the crowd next?"),
("police initiated the lockdown after the shooting", "what did the police do next?", "what happened to the shooter next?"),
("police evacuated the surrounding buildings after the shooting", "what did the police do next?", "what happened shooter to the next?"),
("police located the shooter", "what did the police do next?", "what happened to the shooter next?"),
("police investigated the shooting incident", "what did the police do next?", "what happened to the shooter next?"),
("the company went bankrupt", "what did the company do next?", "what happened to the CEO next?"),
("the agency investigated the finances", "what did the agency do next?", "what happened to the accountant next?"),
("the company audited the financial books", "what did the company do next?", "what happened to the accountant next?"),
("the company suspected of embezzlement", "what did the authorities do next?", "what happened to the company next?")
]


In [22]:
import pandas as pd

In [23]:
df = pd.DataFrame(prompts, columns=['seed', 'q1', 'q2'])

In [24]:
df.head()

Unnamed: 0,seed,q1,q2
0,food company neglected regulations,what did the company do next?,what happened to the company next?
1,produced products contaminated pathogen,what did the company do next?,what happened to the people next?
2,people exposed to the contamination,what did the people do next?,what happened to the people next?
3,the health department received reports about t...,what did the health department do next?,what happened to the food plant next?
4,researchers studied the origin of the disease,what did the researchers do next?,what happened to the researchers next?


In [26]:
def runner(data, question):
    input_prompt = data['seed'] + " <SEP> " + data[question]
    qgelm = pipe(input_prompt, max_new_tokens = 200)
    qgelm_dpo = pipe_dpo(input_prompt, max_new_tokens = 200)
    return (qgelm[0]['generated_text'], qgelm_dpo[0]['generated_text'])

df['answer1_qgelm'], df['answer1_qgelm_dpo'] = zip(*df.apply(runner, question = "q1", axis = 1))
df['answer2_qgelm'], df['answer2_qgelm_dpo'] = zip(*df.apply(runner, question = "q2", axis = 1))

In [27]:
df.head()

Unnamed: 0,seed,q1,q2,answer1_qgelm,answer1_qgelm_dpo,answer2_qgelm,answer2_qgelm_dpo
0,food company neglected regulations,what did the company do next?,what happened to the company next?,the company a spokesman for the f.d.a.,the company also failed to comply with the reg...,"the company 's chief executive , dr. edward j....",the company to impose new regulations
1,produced products contaminated pathogen,what did the company do next?,what happened to the people next?,the company a subsidiary of a chinese company,the company also announced plans to expand its...,the contaminated products contaminated the liv...,the agency 's decision to investigate the deat...
2,people exposed to the contamination,what did the people do next?,what happened to the people next?,they re not going to a job,they re exposed to the contamination,the people exposed to the contamination,the agency urged people to take precautions
3,the health department received reports about t...,what did the health department do next?,what happened to the food plant next?,the department received a letter from a spokesman,the department received reports,the plant contaminated with a bacterium,the food plant to tested for h.i.v.
4,researchers studied the origin of the disease,what did the researchers do next?,what happened to the researchers next?,they found no evidence,they found no evidence linking the disease to ...,the researchers contacted by the f.d.a.,the researchers to conduct a retrospective stu...


In [28]:
df.to_csv('output.csv')