In [35]:
!pip install -q bitsandbytes transformers trl peft huggingface_hub

In [3]:
!pip -q install accelerate

In [4]:
import os
import pandas as pd
from pprint import pprint
from huggingface_hub import login
import torch
from datasets import load_dataset, Dataset, DatasetDict
from peft import LoraConfig, PeftModel
from transformers import (AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig, TrainingArguments)
from trl import SFTTrainer

device="cuda" if torch.cuda.is_available() else "cpu"
MODEL_NAME = "meta-llama/Llama-2-7b-hf"

In [5]:
login("your-token")

In [6]:
dataset = load_dataset("Cynaptics/persona-chat")

README.md:   0%|          | 0.00/1.34k [00:00<?, ?B/s]

train-00000-of-00001.parquet:   0%|          | 0.00/11.8M [00:00<?, ?B/s]

Generating train split:   0%|          | 0/20000 [00:00<?, ? examples/s]

In [7]:
train_test_split = dataset["train"].train_test_split(test_size=0.05, shuffle=True, seed=42)

dataset = DatasetDict({
    "train": train_test_split["train"],
    "test": train_test_split["test"]
})
dataset

DatasetDict({
    train: Dataset({
        features: ['conv_id', 'persona_b', 'dialogue', 'reference', '__index_level_0__'],
        num_rows: 19000
    })
    test: Dataset({
        features: ['conv_id', 'persona_b', 'dialogue', 'reference', '__index_level_0__'],
        num_rows: 1000
    })
})

In [8]:
def make_data(data_point):
    a = "### Person B has the following Persona information.\n"
    b = """### Instruct: Person A and Person B are now having a conversation. 
Following the conversation below, write a response that Person B would say based on the above Persona information. 
Please carefully consider the flow and context of the conversation below, and use Person B's Persona information appropriately to generate a response that you think is the most appropriate reply for Person B.\n"""
    c = "### Output:\n"
    end_marker = "### End\n"
    
    s = a + " ".join(data_point["persona_b"]) + "\n\n"
    s += b + "\n".join(data_point["dialogue"]) + "\n\n"
    s += c + data_point["reference"] + end_marker + "</s>"
    
    return {
        "text": s
    }

In [9]:
dataset["train"] = dataset["train"].map(make_data)
dataset["test"] = dataset["test"].map(make_data)

Map:   0%|          | 0/19000 [00:00<?, ? examples/s]

Map:   0%|          | 0/1000 [00:00<?, ? examples/s]

In [10]:
def create_model_and_tokenizer():
    bnb_config = BitsAndBytesConfig(
        load_in_4bit=True,
        bnb_4bit_quant_type="nf4",
        bnb_4bit_compute_dtype=torch.float16
    )

    model = AutoModelForCausalLM.from_pretrained(
        MODEL_NAME,
        use_safetensors=True,
        quantization_config=bnb_config,
        trust_remote_code=True,
        device_map="auto"
    )

    tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
    tokenizer.pad_token = tokenizer.eos_token
    tokenizer.padding_size = "right"

    return model, tokenizer

In [11]:
model, tokenizer = create_model_and_tokenizer()

config.json:   0%|          | 0.00/609 [00:00<?, ?B/s]

model.safetensors.index.json:   0%|          | 0.00/26.8k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/2 [00:00<?, ?it/s]

model-00001-of-00002.safetensors:   0%|          | 0.00/9.98G [00:00<?, ?B/s]

model-00002-of-00002.safetensors:   0%|          | 0.00/3.50G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/188 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/776 [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/500k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.84M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/414 [00:00<?, ?B/s]

In [None]:
model.config.quantization_config.to_dict()

In [None]:
lora_alpha = 32
lora_dropout = 0.05
lora_r = 16

peft_config = LoraConfig(
    lora_alpha=lora_alpha,
    lora_dropout=lora_dropout,
    r=lora_r,
    bias="none",
    task_type="CAUSAL_LM"
)

In [None]:
OUTPUT_DIR = "/kaggle/working/trained"


In [16]:
import shutil
import os

source_dir = '/kaggle/input/trained10'
destination_dir = '/kaggle/working/'

os.makedirs(os.path.dirname(destination_dir), exist_ok=True)

shutil.copytree(source_dir, destination_dir, dirs_exist_ok=True)

print(f"Entire folder copied from {source_dir} to {destination_dir}")


Entire folder copied from /kaggle/input/trained10 to /kaggle/working/


In [19]:
training_arguments = TrainingArguments(
    per_device_train_batch_size=2,
    gradient_accumulation_steps=4,
    optim="paged_adamw_32bit",
    logging_steps=1,
    learning_rate=1e-4,
    fp16=True,
    max_grad_norm=0.4,
    num_train_epochs=4,
    eval_strategy="steps",
    eval_steps=2000,
    warmup_ratio=0.05,
    save_strategy="epoch",
    group_by_length=True,
    output_dir=OUTPUT_DIR,
    report_to="tensorboard",
    save_safetensors=True,
    lr_scheduler_type="linear",
    seed=42
)

In [20]:
trainer = SFTTrainer(
    model=model,
    train_dataset=dataset["train"],
    eval_dataset=dataset["test"],
    peft_config=peft_config,
    args=training_arguments,
)

Map:   0%|          | 0/1000 [00:00<?, ? examples/s]

In [None]:
trainer.train()

  torch.load(os.path.join(checkpoint, OPTIMIZER_NAME), map_location=map_location)
  checkpoint_rng_state = torch.load(rng_file)


Step,Training Loss,Validation Loss


In [None]:
trainer.save_model()