In [1]:
from accelerate.utils import BnbQuantizationConfig
from accelerate import Accelerator, notebook_launcher
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig, \
                        get_cosine_schedule_with_warmup, set_seed
import transformers
import optimum

from datasets import load_dataset,Dataset
import pandas as pd
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig, \
                        get_cosine_schedule_with_warmup, set_seed
from peft import LoraConfig, TaskType, get_peft_model
from accelerate import Accelerator, notebook_launcher
from accelerate.utils import set_seed
import logging
from torch.utils.data import DataLoader
from torch.optim import AdamW, SGD
from tqdm.notebook import tqdm
import torch
from torch.nn.utils.rnn import pad_sequence
import glob
from collections import OrderedDict
import re

import os

In [2]:
import datetime
start_time = datetime.datetime.now()

In [3]:
def truncate_txt(text, length):
    text_list = text.split()
    
    if len(text_list) <= length:
        return text
    
    return " ".join(text_list[:length])


def gen_prompt(og_text, rewritten_text):
    
    # Truncate the texts to first 200 words for now
    # As we are having memory issues on Mixtral8x7b
    og_text = truncate_txt(og_text, 170)
    rewritten_text = truncate_txt(rewritten_text, 170)
    
    return f"""    
    
    You are given 2 essays, the Rewritten text was created from the Original text using the google Gemma model.Analyzing the changes in style, theme, etc., please come up with a prompt that must have been used to guide the transformation from the original to the rewritten text. Start directly with the prompt.
    
    Original Text:
    \"""{og_text}\"""
    
    Rewritten Text:
    \"""{rewritten_text}\"""
    
    Prompt:
    """.strip()

In [4]:
import pandas as pd
from sklearn.model_selection import train_test_split

filename1 = "./input/gemma-rewrite-nbroad/nbroad-v1.csv"
filename2 = "./input/gemma-rewrite-nbroad/nbroad-v2.csv"
df = pd.concat([pd.read_csv(filename1), pd.read_csv(filename2)]).reset_index(drop=True)

df['reverse_prompt'] = df.apply(lambda x: gen_prompt(x.original_text, x.rewritten_text), axis=1)
df = df.iloc[:6]

data = Dataset.from_pandas(df[['reverse_prompt', 'rewrite_prompt']],split='train')

In [5]:
# MODEL_PATH = "/kaggle/input/gemma/transformers/7b-it/2"
# MODEL_PATH = "distilbert/distilroberta-base"
MODEL_PATH = "distilbert/distilgpt2"
# MODEL_PATH = "gpt2"
# MODEL_PATH = 'mistralai/Mistral-7B-Instruct-v0.2'
# MODEL_PATH = "google/gemma-2b-it"
# MODEL_PATH = "/kaggle/input/mistral/pytorch/7b-instruct-v0.1-hf/1"
# MODEL_PATH = "/kaggle/input/mixtral/pytorch/8x7b-instruct-v0.1-hf/1"
# MODEL_PATH = "/kaggle/input/llama-2/pytorch/7b-chat-hf/1"
# MODEL_PATH = "/kaggle/input/llama-2/pytorch/13b-chat-hf/1"

tokenizer = AutoTokenizer.from_pretrained(MODEL_PATH)
def tokenize_samples(samples):
    inputs = tokenizer(samples["reverse_prompt"], max_length=512, truncation=True)
    targets = tokenizer(samples["rewrite_prompt"], max_length=512, truncation=True)
    return {'input_ids': inputs['input_ids'], 'attention_mask': inputs['attention_mask'], 'labels': targets['input_ids']}
    
data = data.map(tokenize_samples, batched=True)

Map:   0%|          | 0/6 [00:00<?, ? examples/s]

In [6]:
# tokenizer.model_max_length = 512

In [7]:
def collate_fn(batch):
    inputs = [torch.tensor(b['input_ids']) for b in batch]
    labels = [torch.tensor(b['labels']) for b in batch]

    max_length = max(len(input_) for input_ in inputs)

    input_ids = pad_sequence(inputs, batch_first=True, padding_value=tokenizer.pad_token_id)

    padded_labels = []
    for label in labels:
        padded_label = torch.full((max_length,), fill_value=-100, dtype=torch.long)
        padded_label[:len(label)] = label
        padded_labels.append(padded_label)
    padded_labels = torch.stack(padded_labels)

    attention_mask = torch.ones(input_ids.shape, dtype=torch.long)
    attention_mask[input_ids == tokenizer.pad_token_id] = 0

    return {'input_ids': input_ids, 'attention_mask': attention_mask, 'labels': padded_labels}


In [8]:
print(collate_fn)

<function collate_fn at 0x733b20b4bf40>


In [9]:
# import torch
# from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig, AutoConfig


# # Found a good blog to catch me up fast!
# # https://huggingface.co/blog/4bit-transformers-bitsandbytes
# # https://huggingface.co/docs/transformers/v4.38.1/en/quantization#compute-data-type
# quantization_config = BitsAndBytesConfig(
#     load_in_4bit = True,
#     bnb_4bit_quant_type="nf4",
#     bnb_4bit_compute_dtype=torch.bfloat16,
#     bnb_4bit_use_double_quant=True,
# )



# model = AutoModelForCausalLM.from_pretrained(
#     MODEL_PATH,
#     device_map = "auto",
#     trust_remote_code = True,
#     quantization_config=quantization_config,
# )

# # model = model.to_bettertransformer()
# model = accelerator.prepare(model)

In [10]:
def del_past_models(save_path, file_exten='pth'):
    """
    Remove all of the past models
    
    You can change the file_extension if you save the models with other file_extension
    """
    past_models = glob.glob(os.path.join(save_path, '*.' + file_exten))
    for past_model in past_models:
        os.remove(past_model)
        logger.info(f'Remove model {past_model}!')
        
def save_checkpoint(path, model, optim, sched, epoch, iters):
    lr = optim.param_groups[0]['lr']
    # model_state = model.state_dict()
    model_state = OrderedDict((name, param) for name, param in model.named_parameters() \
                    if param.requires_grad)
    
    checkpoint_path = os.path.join(path, f'checkpoint_{iters + 1}.pth')
    new_checkpoint_path = os.path.join(path, f'checkpoint.pth')
    logger.info(f"Model of epoch {epoch} saved at checkpoint_{iters + 1}.pth, lr={lr:.3e}")
    
    torch.save({
        'model': model_state,
        'optimizer': optim.state_dict(),
        'scheduler': sched.state_dict(), 
        'epoch': epoch
    }, checkpoint_path)
    
    torch.save({
        'model': model_state,
        'optimizer': optim.state_dict(),
        'scheduler': sched.state_dict(), 
        'epoch': epoch
    }, new_checkpoint_path)

    
def load_checkpoint(checkpoint_path, model, optim=None, sched=None):
    checkpoint = torch.load(checkpoint_path)
    logger.info(f"Model of epoch {checkpoint['epoch']} is loaded")
    
    model.load_state_dict(checkpoint['model'], strict=False)
    if optim is not None and sched is not None:
        optim.load_state_dict(checkpoint['optimizer'])
        sched.load_state_dict(checkpoint['scheduler'])
        return model, optim, sched, checkpoint['epoch']
    else:
        return model, checkpoint['epoch']

In [11]:
def train_epoch(epoch, model, accelerator, 
                train_dataloader, checkpointing_steps, 
                optimizer, lr_scheduler, save_path):
    global overall_step
    model.train()
    epoch_loss = []
    pbar = tqdm(train_dataloader)
    grad = torch.tensor(0.0)
    
    output_dir = f"step_{overall_step}.pth"
    unwrapped_model = accelerator.unwrap_model(model)
    save_checkpoint(save_path, unwrapped_model, optimizer, lr_scheduler, 
                   epoch, overall_step)
    
    for step, batch in enumerate(pbar):
        if (datetime.datetime.now() - start_time) > datetime.timedelta(hours=6):
            break
        with accelerator.accumulate(model):
            # Gradient accumulation
            # with accelerator.autocast():
            outputs = model(**batch)
            loss = outputs.loss
            accelerator.backward(loss)
            if accelerator.sync_gradients:
                grad = accelerator.clip_grad_norm_(parameters=model.parameters(), 
                                                   max_norm=2.0)
            optimizer.step()
            lr_scheduler.step()
            optimizer.zero_grad()

        overall_step += 1
        lr = optimizer.param_groups[0]['lr']
        
        pbar.set_description(
            f'Epoch {epoch}: loss = {loss.item(): .3f}, grad = {grad.item(): .3f}, lr = {lr: .3e}')
        
        with torch.no_grad():
            avg_loss = accelerator.gather(loss.repeat(len(batch))).mean()
        epoch_loss.append(avg_loss.item() / accelerator.gradient_accumulation_steps)
        
        accelerator.wait_for_everyone()
        if overall_step % checkpointing_steps == 0:
            logger.info(
                f'Epoch {epoch}: loss = {loss.item(): .3f}, grad = {grad.item(): .3f}, lr = {lr: .3e}')
            if accelerator.is_local_main_process:
                # Clear all of the current models
                del_past_models(save_path)

                output_dir = f"step_{overall_step}.pth"
                unwrapped_model = accelerator.unwrap_model(model)
                save_checkpoint(save_path, unwrapped_model, optimizer, lr_scheduler, 
                               epoch, overall_step)
    
    # Just log the loss of the main process
    if len(epoch_loss) > 0:
        logger.info(f'Epoch {epoch}: loss = {sum(epoch_loss) / (len(epoch_loss)+1e-10): .3f}, lr = {lr: .3e}')
    
            
def main(batch_size: int, num_epochs: int, lr: float, grad_accumulation_steps: int, 
         checkpointing_steps: int, save_path: str, ckpt_path: str,
         num_warmup_steps: int=0, r: int=4, lora_alpha: int=32, lora_dropout: float=0.1):
    set_seed(1234)
    
    accelerator = Accelerator(gradient_accumulation_steps=grad_accumulation_steps)
    quantization_config = BitsAndBytesConfig(
        load_in_4bit=True, 
        bnb_4bit_quant_type="nf4",
        bnb_4bit_compute_dtype=torch.bfloat16,
        bnb_4bit_use_double_quant=True,
    )
    
    model = AutoModelForCausalLM.from_pretrained(MODEL_PATH, 
                                                 quantization_config=quantization_config, 
                                                 torch_dtype=torch.float16)
    model.save_pretrained(save_path)
    tokenizer.save_pretrained(save_path)
    
    
    accelerator.print(model)
    
    # Instantiate dataloaders. (We do not split the test data)
    train_dataloader = DataLoader(
        data, shuffle=True, collate_fn=collate_fn, batch_size=batch_size
    )
    
    peft_config = LoraConfig(task_type=TaskType.CAUSAL_LM, 
                             inference_mode=False, r=r, 
                             lora_alpha=lora_alpha, lora_dropout=lora_dropout,
                             target_modules=
                            #  ["q_proj", "o_proj", "k_proj", "v_proj", "gate_proj", "up_proj", "down_proj"],
                            ['c_attn', 'c_proj', 'c_fc'],
                            )
    model = get_peft_model(model, peft_config)
    
    if accelerator.is_local_main_process:
        model.print_trainable_parameters()
    
    lr = lr * accelerator.num_processes * accelerator.gradient_accumulation_steps
    
    optimizer = AdamW(params=model.parameters(), lr=lr)
    
    total_steps = len(train_dataloader) * num_epochs * \
            accelerator.num_processes * accelerator.gradient_accumulation_steps
    
    # Instantiate scheduler
    lr_scheduler = get_cosine_schedule_with_warmup(
        optimizer=optimizer,
        num_warmup_steps=num_warmup_steps,
        # to ensure the lr will not become zero at the end of training
        # (You can adjust this param)
        num_training_steps=total_steps * 1.1,
    )
    
    current_epochs = 0
    global overall_step
    overall_step = 0
    
    # Load checkpoint
    print(ckpt_path)
    if os.path.isfile(ckpt_path):
        print('loading')
        model, optimizer, lr_scheduler, current_epochs = \
                    load_checkpoint(ckpt_path, model, optimizer, lr_scheduler)
        overall_step = int(re.search('(\d)+', ckpt_path).group())
        logger.info(
            f'Checkpoint {ckpt_path} loaded at epoch {current_epochs}, the training will resume from epoch {current_epochs + 1}!')
        current_epochs += 1
    
    
    if current_epochs >= num_epochs:
        raise ValueError('The num_epochs should be larger than the saved epochs!!')
    
    # Prepare everything
    # There is no specific order to remember, 
    # we just need to unpack the objects in the same order we gave them to the prepare method.
    model, optimizer, train_dataloader, lr_scheduler = accelerator.prepare(
        model, optimizer, train_dataloader, lr_scheduler
    )
    
    
    logger.info('*********************** Start training! **************************')
    
    for epoch in range(current_epochs, num_epochs):
        train_epoch(epoch, model, accelerator, train_dataloader, 
                    checkpointing_steps, optimizer, lr_scheduler, save_path)
        
    # Save the final model
    accelerator.wait_for_everyone()
    if accelerator.is_local_main_process:
        output_dir = f"step_{overall_step}.pth"
        unwrapped_model = accelerator.unwrap_model(model)
        save_checkpoint(save_path, unwrapped_model, optimizer, lr_scheduler, 
                       epoch, overall_step)

In [12]:
if tokenizer.pad_token_id is None:
    tokenizer.pad_token = tokenizer.eos_token
print(tokenizer.pad_token_id)

50256


In [13]:
train_dataloader = DataLoader(
    data, shuffle=True, collate_fn=collate_fn, batch_size=2
)

In [14]:
ckpt_path = './working/trained_models/%s/checkpoint.pth' % MODEL_PATH

In [15]:
lora_target_modules_dict = {
  'gpt2': ['c_attn'],
  'distilbert/distilgpt2': ['c_attn'],
  'distilbert/distilroberta-base': ['query', 'key', 'value'],
}
import json
os.makedirs('./settings', exist_ok=True)
json.dump(lora_target_modules_dict, open('./settings/lora_target_modules.json', 'w'))

In [16]:
quantization_config = BitsAndBytesConfig(
  # load_in_4bit=True, 
  # bnb_4bit_quant_type="nf4",
  # bnb_4bit_compute_dtype=torch.bfloat16,
  # bnb_4bit_use_double_quant=True,
)

model = AutoModelForCausalLM.from_pretrained(MODEL_PATH, 
                                            quantization_config=quantization_config, 
                                            torch_dtype=torch.float16)

peft_config = LoraConfig(task_type=TaskType.CAUSAL_LM, 
                          inference_mode=False, r=64, 
                          lora_alpha=64, lora_dropout=0.1,
                          target_modules=
                         lora_target_modules_dict.get(MODEL_PATH, ["q_proj", "o_proj", "k_proj", "v_proj", "gate_proj", "up_proj", "down_proj"]),
                        )
model = get_peft_model(model, peft_config)

logger=logging.getLogger(__name__)

# model, _ = load_checkpoint(ckpt_path, model)

`low_cpu_mem_usage` was None, now set to True since model is quantized.


In [17]:
batch = list(iter(train_dataloader))[1]

model.eval()
model.to('cuda')
with torch.no_grad():
  output = model(**batch)



In [18]:
output.loss

tensor(11.8203, dtype=torch.float16)

In [19]:
prompt = tokenizer.decode(batch['input_ids'][0])
print(prompt)

You are given 2 essays, the Rewritten text was created from the Original text using the google Gemma model.Analyzing the changes in style, theme, etc., please come up with a prompt that must have been used to guide the transformation from the original to the rewritten text. Start directly with the prompt.
    
    Original Text:
    """I can hardly read the letter, because the hand holding it is shaking so bad. My hand. There's nothing wrong with me, I'm just nervous. I'm actually a pretty normal guy, painfully normal. But that's all about to change. *Dear Mr. Thompson, we are quite happy to inform you that we have successfully received and processed your requests, as well as your official test results. It is with great pleasure that I can relay to you, on behalf of The United States Department Of Supernatural Distribution, that you are officially licensed to generation and application of banishment, disappearance, and-* The sturdy stationery and pompous print fade from view. Everythin

In [20]:
from transformers import pipeline, Conversation
prompt = tokenizer.decode(batch['input_ids'][0])

pp = pipeline(task='conversational', model=model, tokenizer=tokenizer)

pp(Conversation(prompt))


No chat template is defined for this tokenizer - using the default template for the GPT2TokenizerFast class. If the default is not appropriate for your model, please set `tokenizer.chat_template` to an appropriate template. See https://huggingface.co/docs/transformers/main/chat_templating for more information.

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Conversation id: c28e9313-3e3c-4235-819d-a84cf4c66104
user: You are given 2 essays, the Rewritten text was created from the Original text using the google Gemma model.Analyzing the changes in style, theme, etc., please come up with a prompt that must have been used to guide the transformation from the original to the rewritten text. Start directly with the prompt.
    
    Original Text:
    """I can hardly read the letter, because the hand holding it is shaking so bad. My hand. There's nothing wrong with me, I'm just nervous. I'm actually a pretty normal guy, painfully normal. But that's all about to change. *Dear Mr. Thompson, we are quite happy to inform you that we have successfully received and processed your requests, as well as your official test results. It is with great pleasure that I can relay to you, on behalf of The United States Department Of Supernatural Distribution, that you are officially licensed to generation and application of banishment, disappearance, and-* The s

In [28]:
prompt = tokenizer.decode(batch['input_ids'][0])
messages = [
    {
        "role": "user",
        "content": prompt
    }
]
encoded_input = tokenizer.apply_chat_template(messages, return_tensors="pt", add_generation_prompt=True).to('cuda:0')
with torch.no_grad():
    encoded_output = model.generate(encoded_input, max_new_tokens=128, do_sample=True, pad_token_id=tokenizer.eos_token_id, num_beams=2, top_k=20)
    decoded_output = tokenizer.decode(encoded_output[0], skip_special_tokens=True).replace(prompt, '').replace("[INST]", "").replace("[/INST]", "").strip()

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


In [29]:
tokenizer.decode(encoded_input[0])

'You are given 2 essays, the Rewritten text was created from the Original text using the google Gemma model.Analyzing the changes in style, theme, etc., please come up with a prompt that must have been used to guide the transformation from the original to the rewritten text. Start directly with the prompt.\n    \n    Original Text:\n    """I can hardly read the letter, because the hand holding it is shaking so bad. My hand. There\'s nothing wrong with me, I\'m just nervous. I\'m actually a pretty normal guy, painfully normal. But that\'s all about to change. *Dear Mr. Thompson, we are quite happy to inform you that we have successfully received and processed your requests, as well as your official test results. It is with great pleasure that I can relay to you, on behalf of The United States Department Of Supernatural Distribution, that you are officially licensed to generation and application of banishment, disappearance, and-* The sturdy stationery and pompous print fade from view. E

In [30]:
decoded_output

'A new version of the original version of the book is available on Amazon.com/Amazon.com/Amazon/Amazon/Amazon/Amazon/Amazon/Amazon/Amazon/Amazon/Amazon/Amazon/Amazon/Amazon/Amazon/Amazon/Amazon/Amazon/Amazon/Amazon/Amazon/Amazon/Amazon/Amazon/Amazon/Amazon/Amazon/Amazon/Amazon/Amazon/Amazon/Amazon/Amazon/Amazon/Amazon/Amazon/Amazon/Amazon/Amazon/Amazon/Amazon/Amazon/Amazon/Amazon/Amazon/Amazon/Amazon/Amazon/Amazon/Amazon/Amazon/Amazon/Amazon/Amazon/Amazon/Amazon'

: 

In [24]:

tokenizer.decode(encoded_input[0])

'You are given 2 essays, the Rewritten text was created from the Original text using the google Gemma model.Analyzing the changes in style, theme, etc., please come up with a prompt that must have been used to guide the transformation from the original to the rewritten text. Start directly with the prompt.\n    \n    Original Text:\n    """I can hardly read the letter, because the hand holding it is shaking so bad. My hand. There\'s nothing wrong with me, I\'m just nervous. I\'m actually a pretty normal guy, painfully normal. But that\'s all about to change. *Dear Mr. Thompson, we are quite happy to inform you that we have successfully received and processed your requests, as well as your official test results. It is with great pleasure that I can relay to you, on behalf of The United States Department Of Supernatural Distribution, that you are officially licensed to generation and application of banishment, disappearance, and-* The sturdy stationery and pompous print fade from view. E

In [25]:
tokenizer.all_special_tokens_extended

['<|endoftext|>']

In [26]:
tokenizer.decode(encoded_output[0]).replace(prompt, '')

'ured up the magic of the word "banishment, disappearance, and transmigration" – powers hitherto unimaginable. The words "banishment, disappearance, transmigration, and transmigration" echoed in my mind like a siren\'s song, promising me the power to manipulate reality itself. The paper burned my eyes, and my nose twitched in anticipation of the intoxicating scent of power that I was about to wield. I read the code, my voice cracking with emotion, and the air crackled with anticipation. The paper burned my eyes, and my nose twitched in anticipation of the intoxicating scent of power that I was about to'

In [27]:
import time
time.sleep(100)

KeyboardInterrupt: 

In [None]:
try:
  del model
except: pass
torch.cuda.empty_cache()

In [None]:
import logging
import os
os.makedirs('./logs/', exist_ok=True)
logging.basicConfig(
  level=logging.INFO,
  filename='./logs/log.txt', filemode='a',
  datefmt='%H:%M:%S',
  format='%(asctime)s - %(levelname)s - %(message)s',
)

logger = logging.getLogger(__name__)
# Stream handler, logging to the stream
logger.info("abc")

In [None]:
import os

batch_size = 3
grad_accumulation_steps = 64
num_epochs = 3
lr = 5e-5
checkpointing_steps = 500
save_path = os.path.join('./working/trained_models/', MODEL_PATH)

# If ckpt_path is a real path (os.path.isfile(ckpt_path) is True),
# then the checkpoint will be loaded
ckpt_path = os.path.join(save_path, 'checkpoint.pth')

args = (batch_size, num_epochs, lr, grad_accumulation_steps, 
        checkpointing_steps, save_path, ckpt_path)

if not os.path.exists(save_path):
    os.makedirs(save_path, exist_ok=True)
# notebook_launcher(main, args, num_processes=1)
main(*args)

`low_cpu_mem_usage` was None, now set to True since model is quantized.


GPT2LMHeadModel(
  (transformer): GPT2Model(
    (wte): Embedding(50257, 768)
    (wpe): Embedding(1024, 768)
    (drop): Dropout(p=0.1, inplace=False)
    (h): ModuleList(
      (0-11): 12 x GPT2Block(
        (ln_1): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
        (attn): GPT2Attention(
          (c_attn): Linear4bit(in_features=768, out_features=2304, bias=True)
          (c_proj): Linear4bit(in_features=768, out_features=768, bias=True)
          (attn_dropout): Dropout(p=0.1, inplace=False)
          (resid_dropout): Dropout(p=0.1, inplace=False)
        )
        (ln_2): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
        (mlp): GPT2MLP(
          (c_fc): Linear4bit(in_features=768, out_features=3072, bias=True)
          (c_proj): Linear4bit(in_features=3072, out_features=768, bias=True)
          (act): NewGELUActivation()
          (dropout): Dropout(p=0.1, inplace=False)
        )
      )
    )
    (ln_f): LayerNorm((768,), eps=1e-05, elementwise_affin

  0%|          | 0/2 [00:00<?, ?it/s]

[2024-03-09 19:24:21,431] [INFO] [real_accelerator.py:191:get_accelerator] Setting ds_accelerator to cuda (auto detect)


  0%|          | 0/2 [00:00<?, ?it/s]

  0%|          | 0/2 [00:00<?, ?it/s]

In [None]:
tokenizer.save_pretrained(save_path)

('./working/trained_models/gpt2/tokenizer_config.json',
 './working/trained_models/gpt2/special_tokens_map.json',
 './working/trained_models/gpt2/vocab.json',
 './working/trained_models/gpt2/merges.txt',
 './working/trained_models/gpt2/added_tokens.json',
 './working/trained_models/gpt2/tokenizer.json')

In [None]:
try:
    del model, optimizer, train_dataloader, lr_scheduler
except Exception:
    pass

In [None]:
torch.cuda.empty_cache()

In [None]:
import time
time.sleep(1000)

# Sub

In [None]:
import pandas as pd
from tqdm import tqdm

DEBUG = True

TEST_DF_FILE = './input/llm-prompt-recovery/test.csv'
SUB_DF_FILE = './input/llm-prompt-recovery/sample_submission.csv'
NROWS = None if DEBUG else None

if DEBUG:
    TEST_DF_FILE = './input/gemma-rewrite-nbroad/nbroad-v1.csv'
    SUB_DF_FILE = TEST_DF_FILE

tdf = pd.read_csv(TEST_DF_FILE, nrows=NROWS, usecols=['id', 'original_text', 'rewritten_text'])
sub = pd.read_csv(SUB_DF_FILE, nrows=NROWS, usecols=['id', 'rewrite_prompt'])

In [None]:
tdf = tdf.iloc[:2]
sub = sub.iloc[:2]

In [None]:
tdf

Unnamed: 0,id,original_text,rewritten_text
0,LNpAovroGe,"This quilt, that my mother made, \n \n Still m...","The softest brown and brightest blue quilt, cr..."
1,nnuxwwThWi,It's the job of our agency to keep track of th...,The agency's responsibility is to track and co...


In [None]:
sub

Unnamed: 0,id,rewrite_prompt
0,LNpAovroGe,Regency Romance: Model the text on a Regency r...
1,nnuxwwThWi,Write like Ernest Hemingway: Focus on Hemingwa...


In [None]:
import gc
import re

device = 'cuda'
tdf['id'] = sub['id'].copy()

pbar = tqdm(total=tdf.shape[0])

it = iter(tdf.iterrows())
idx, row = next(it, (None, None))

# https://www.kaggle.com/competitions/llm-prompt-recovery/discussion/481116
DEFAULT_TEXT = 'Please improve the following text, maintaining the original meaning but altering the tone, diction, and stylistic elements to match the new style.Enhance the clarity, elegance, and impact of the following text by adopting the writing style of , ensuring the core message remains intact while transforming the tone, word choice, and stylistic features to align with the specified style.' 

res = []
model = None
while idx is not None:
    
    if (datetime.datetime.now() - start_time) > datetime.timedelta(hours=8, minutes=30):
        res.append([row["id"], DEFAULT_TEXT])
        idx, row = next(it, (None, None))
        pbar.update(1)
        continue
        
    torch.cuda.empty_cache()
    gc.collect()
        
    try:
        if model is None:
            ckpt_path = './working/trained_models/%s/checkpoint.pth' % MODEL_PATH
            quantization_config = BitsAndBytesConfig(
                load_in_4bit=True, 
                bnb_4bit_quant_type="nf4",
                bnb_4bit_compute_dtype=torch.bfloat16,
                bnb_4bit_use_double_quant=True,
            )
            model = AutoModelForCausalLM.from_pretrained(MODEL_PATH, 
                                                         quantization_config=quantization_config, 
                                                         torch_dtype=torch.float16)
            checkpoint = torch.load(ckpt_path)

            model.load_state_dict(checkpoint['model'], strict=False)
            print(f"Model of epoch {checkpoint['epoch']} is loaded")
        
        
        model.eval()    
            
            
        messages = [
            {
                "role": "user",
                "content": gen_prompt(row["original_text"], row["rewritten_text"])
            }
        ]
        encoded_input = tokenizer.apply_chat_template(messages, return_tensors="pt", add_generation_prompt=True).to(device)
        
        with torch.no_grad():
            encoded_output = model.generate(encoded_input, max_new_tokens=50, do_sample=True, pad_token_id=tokenizer.eos_token_id)
        
        decoded_output = tokenizer.batch_decode(encoded_output, skip_special_tokens=True)[0]
        decoded_output = result = re.sub(r"[\s\S]*\[\/INST\]", '', decoded_output, 1)
                
        res.append([row["id"], decoded_output])
                            
    except Exception as e:
        print(f"ERROR: {e}")
        res.append([row["id"], DEFAULT_TEXT])
        
    finally:
        idx, row = next(it, (None, None))
        pbar.update(1)

        
pbar.close()

  0%|          | 0/2 [00:00<?, ?it/s]`low_cpu_mem_usage` was None, now set to True since model is quantized.

No chat template is defined for this tokenizer - using the default template for the GPT2TokenizerFast class. If the default is not appropriate for your model, please set `tokenizer.chat_template` to an appropriate template. See https://huggingface.co/docs/transformers/main/chat_templating for more information.

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Model of epoch 2 is loaded


 50%|█████     | 1/2 [00:02<00:02,  2.59s/it]A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.
100%|██████████| 2/2 [00:03<00:00,  1.69s/it]


In [None]:
# sub["rewrite_prompt"] = tdf['rewrite_prompt'].copy()
# sub.to_csv("submission.csv", index=False)
sub = pd.DataFrame(res, columns=['id', 'rewrite_prompt'])

sub.to_csv("sample_submission.csv", index=False)
sub.to_csv("submission.csv", index=False)

In [None]:
sub

In [None]:
res

[['LNpAovroGe',
  'Original Text:\n    """This quilt, that my mother made, \n \n Still makes me think to this day. \n \n It\'s softest brown, and brightest blue, \n \n The curved stitch here, reads `` made it May\'\'. \n \n It\'s hard to see, but believe me it\'s true, \n \n That\'s not just a cloth but a piece of shirt. \n \n You can see a logo here, and right there, \n \n And a signature over there, someone named `` Bert\'\'. \n \n This is my favorite part, a piece from a stuffed bear. \n \n I think it was my mother\'s favorite too, \n \n She always said so at least. \n \n Something from when she was two, \n \n Given by her grandad for Thanksgiving feast. \n \n My dad added this, a little button pin, \n \n Something from his mother, for being a scout. \n \n Apparently she went to a store and fished in a bin, \n \n Until night that day, to teach him what love was about. \n \n I\'m sorry you had to see this, \n \n but their funeral was delayed. \n \n \n \n\n"""\n    \n    Rewritten Tex