## Dataset preparation

In [1]:
## loading env variables
%env HF_HOME=/hpcwork/ba214121/.cache/huggingface
%env TRANFORMERS_CACHE=/hpcwork/ba214121/.cache/huggingface
%env HF_DATASETS_CACHE=/hpcwork/ba214121/.cache/huggingface


!echo $HF_DATASETS_CACHE

env: HF_HOME=/hpcwork/ba214121/.cache/huggingface
env: TRANFORMERS_CACHE=/hpcwork/ba214121/.cache/huggingface
env: HF_DATASETS_CACHE=/hpcwork/ba214121/.cache/huggingface
/hpcwork/ba214121/.cache/huggingface


In [2]:
!lm-eval tasks

zsh:1: command not found: lm-eval


In [2]:
!huggingface-cli whoami

ruggsea
[1morgs: [0m zero-gpu-explorers,mii-community


In [3]:
## getting dataset from HF
!pip install --upgrade -q datasets transformers huggingface_hub trl accelerate bitsandbytes peft wandb

# 
import os

from datasets import load_dataset

phil_enc_dataset= load_dataset("ruggsea/stanford-encyclopedia-of-philosophy_instruct")



In [4]:
!echo $HF_DATASETS_CACHE

/work/ba214121/.cache/huggingface


In [5]:
phil_enc_dataset["train"]

Dataset({
    features: ['question', 'answer'],
    num_rows: 11904
})

In [6]:
phil_enc_dataset["train"][0]

{'question': 'What is the nature of abductive reasoning, and how does it differ from deductive and inductive reasoning?',
 'answer': 'You happen to know that Tim and Harry have recently had a terrible row that ended their friendship. Now someone tells you that she just saw Tim and Harry jogging together. The best explanation for this that you can think of is that they made up. You conclude that they are friends again.\n One morning you enter the kitchen to find a plate and cup on the table, with breadcrumbs and a pat of butter on it, and surrounded by a jar of jam, a pack of sugar, and an empty carton of milk. You conclude that one of your house-mates got up at night to make him- or herself a midnight snack and was too tired to clear the table. This, you think, best explains the scene you are facing. To be sure, it might be that someone burgled the house and took the time to have a bite while on the job, or a house-mate might have arranged the things on the table without having a midni

In [7]:
## putting the dataset in the right format
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
from trl import setup_chat_format
import torch 

system_prompt="You are an expert and informative yet accessible Philosophy university professor. Students will pose you philosophical questions, answer them in a correct and rigorous but not to obscure way."

def turn_dataset_into_chat(sample):
    return {
        "messages":
        [{"role":"system","content":system_prompt},
         {"role":"user","content":sample["question"]},
         {"role":"assistant","content":sample["answer"]}
        ]
    }

model_id="meta-llama/Meta-Llama-3-8B"

chat_dataset=phil_enc_dataset["train"].map(turn_dataset_into_chat, remove_columns=phil_enc_dataset["train"].features)

## model loading

bnb_config = BitsAndBytesConfig(
    bnb_4bit_quant_type="nf4", 
    bnb_4bit_compute_dtype=torch.bfloat16
)



model = AutoModelForCausalLM.from_pretrained(
    model_id,
    device_map="auto",
    torch_dtype=torch.bfloat16,
    quantization_config=bnb_config
)

tokenizer=AutoTokenizer.from_pretrained(model_id)
model, tokenizer = setup_chat_format(model, tokenizer)


def apply_template(sample):
    return {
        "text":tokenizer.apply_chat_template(sample["messages"], tokenize=False)
    }

chat_dataset=chat_dataset.map(apply_template)



Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


In [8]:
chat_dataset[0]

{'messages': [{'content': 'You are an expert and informative yet accessible Philosophy university professor. Students will pose you philosophical questions, answer them in a correct and rigorous but not to obscure way.',
   'role': 'system'},
  {'content': 'What is the nature of abductive reasoning, and how does it differ from deductive and inductive reasoning?',
   'role': 'user'},
  {'content': 'You happen to know that Tim and Harry have recently had a terrible row that ended their friendship. Now someone tells you that she just saw Tim and Harry jogging together. The best explanation for this that you can think of is that they made up. You conclude that they are friends again.\n One morning you enter the kitchen to find a plate and cup on the table, with breadcrumbs and a pat of butter on it, and surrounded by a jar of jam, a pack of sugar, and an empty carton of milk. You conclude that one of your house-mates got up at night to make him- or herself a midnight snack and was too tire

In [9]:
chat_dataset

Dataset({
    features: ['messages', 'text'],
    num_rows: 11904
})

In [10]:
chat_dataset["messages"][0]

[{'content': 'You are an expert and informative yet accessible Philosophy university professor. Students will pose you philosophical questions, answer them in a correct and rigorous but not to obscure way.',
  'role': 'system'},
 {'content': 'What is the nature of abductive reasoning, and how does it differ from deductive and inductive reasoning?',
  'role': 'user'},
 {'content': 'You happen to know that Tim and Harry have recently had a terrible row that ended their friendship. Now someone tells you that she just saw Tim and Harry jogging together. The best explanation for this that you can think of is that they made up. You conclude that they are friends again.\n One morning you enter the kitchen to find a plate and cup on the table, with breadcrumbs and a pat of butter on it, and surrounded by a jar of jam, a pack of sugar, and an empty carton of milk. You conclude that one of your house-mates got up at night to make him- or herself a midnight snack and was too tired to clear the ta

In [11]:
chat_dataset=chat_dataset.train_test_split(test_size=0.20)

In [12]:
print(chat_dataset["train"][345]["messages"])

[{'content': 'You are an expert and informative yet accessible Philosophy university professor. Students will pose you philosophical questions, answer them in a correct and rigorous but not to obscure way.', 'role': 'system'}, {'content': "What are the implications of applying Kuhn's and Lakatos' schema to the structure of economics, and how does this comparison shed light on the global strategy and drawbacks of mainstream theoretical models in economics?", 'role': 'user'}, {'content': '\n In the wake of the work of  Kuhn   and Lakatos , philosophers are much more aware of and interested in the larger theoretical structures that unify and guide research within particular research traditions. Since many theoretical projects or approaches in economics are systematically unified, they pose questions about what guides research, and many economists have applied the work of Kuhn or Lakatos in the attempt to shed light on the overall structure of economics . Whether these applications have be

In [13]:
chat_dataset["test"][213]["text"]

'<|im_start|>system\nYou are an expert and informative yet accessible Philosophy university professor. Students will pose you philosophical questions, answer them in a correct and rigorous but not to obscure way.<|im_end|>\n<|im_start|>user\nWhat is the mechanism by which a competent language-user is able to decode noise-utterances and derive language-based knowledge, according to the Nyāya philosophical tradition?<|im_end|>\n<|im_start|>assistant\n\n\n Just as perception and inference are described, in the pramāṇa system, as knowledge-yielding faculties, so too is language. This leads the Nyāya to formulate a description of the mechanism by which a competent language-user is able to decode noise-utterances and derive language-based knowledge. The description The Manual of Reason gives of this ‘language processing mechanism’ is as follows (TS 67, 68). The input to the process is the hearer’s auditory perception of a spoken utterance qua uninterpreted noise, and this is identified as th

## Finetuning 

In [14]:
# checking
tokenizer.chat_template

"{% for message in messages %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}"

In [15]:
tokenizer.eos_token

'<|im_end|>'

In [16]:
model.config.eos_token_id

128257

In [17]:
tokenizer.decode([128257])

'<|im_end|>'

In [18]:
from peft import LoraConfig
from trl import setup_chat_format

# LoRA config based on QLoRA paper & Sebastian Raschka experiment
peft_config = LoraConfig(
        lora_alpha=128,
        lora_dropout=0.05,
        r=256,
        bias="none",
        target_modules="all-linear",
        task_type="CAUSAL_LM",
)

In [19]:
from transformers import TrainingArguments

args = TrainingArguments(
    output_dir="Llama3-stanford-encyclopedia-philosophy-QA", # directory to save and repository id
    num_train_epochs=3,                     # number of training epochs
    per_device_train_batch_size=8,          # batch size per device during training
    gradient_accumulation_steps=4,          # number of steps before performing a backward/update pass
    gradient_checkpointing=True,            # use gradient checkpointing to save memory
    optim="adamw_torch_fused",              # use fused adamw optimizer
    logging_steps=5,                      # log every n steps
    save_strategy="epoch",                  # save checkpoint every epoch
    learning_rate=2e-4,                     # learning rate, based on QLoRA paper
    bf16=True,                              # use bfloat16 precision
    max_grad_norm=0.3,                      # max gradient norm based on QLoRA paper
    warmup_ratio=0.03,                      # warmup ratio based on QLoRA paper
    lr_scheduler_type="constant",           # use constant learning rate scheduler
    push_to_hub=False,                       # push model to hub
    report_to="wandb",                # report metrics to tensorboard
    eval_steps=15,
    evaluation_strategy="steps"
)

In [20]:
import os
os.environ["TOKENIZERS_PARALLELISM"] = "false"
os.environ["WANDB_NOTEBOOK_NAME"]= "finetuning_llama.ipynb" 
os.environ["WANDB_PROJECT"]="LLama3-phil"
os.environ["WANDB_LOG_MODEL"]= "checkpoint"


In [21]:
from trl import SFTTrainer

## for logging purposes
max_seq_length = 4096  # max sequence length for model and packing of the dataset


trainer = SFTTrainer(
    model=model,
    args=args,
    train_dataset=chat_dataset["train"],
    eval_dataset=chat_dataset["test"],
    peft_config=peft_config,
    max_seq_length=max_seq_length,
    tokenizer=tokenizer,
    packing=True,
    dataset_text_field="text",
    dataset_kwargs={
        "add_special_tokens": False,  # We template with special tokens
        "append_concat_token": False, # No need to add additional separator token
    }
)

Generating train split: 0 examples [00:00, ? examples/s]

Generating train split: 0 examples [00:00, ? examples/s]

Detected kernel version 4.18.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.


In [22]:
# start training, the model will be automatically saved to the hub and the output directory
trainer.train()

# save model
trainer.save_model()

[34m[1mwandb[0m: Currently logged in as: [33mruggsea[0m ([33mruggero[0m). Use [1m`wandb login --relogin`[0m to force relogin


`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`.


Step,Training Loss,Validation Loss
15,2.0187,1.995621


KeyboardInterrupt: 

In [29]:
import gc

from peft import PeftModel

# Flush memory
# del trainer, model
gc.collect()
torch.cuda.empty_cache()

base_model="meta-llama/Meta-Llama-3-8B"
new_model="ruggsea/Llama3-stanford-encyclopedia-philosophy-QA"


adapter="./Llama3-stanford-encyclopedia-philosophy-QA/"
tokenizer = AutoTokenizer.from_pretrained(base_model)
model = AutoModelForCausalLM.from_pretrained(
    base_model,
    low_cpu_mem_usage=True,
    return_dict=True,
    torch_dtype=torch.float16,
    device_map="auto",
)
model, tokenizer = setup_chat_format(model, tokenizer)

# Merge adapter with base model
model = PeftModel.from_pretrained(model, adapter)
model = model.merge_and_unload()
model.push_to_hub(new_model, use_temp_dir=False)
tokenizer.push_to_hub(new_model, use_temp_dir=False)

Error in callback <bound method _WandbInit._resume_backend of <wandb.sdk.wandb_init._WandbInit object at 0x153619f03eb0>> (for pre_run_cell), with arguments args (<ExecutionInfo object at 15335012e160, raw_cell="import gc

from peft import PeftModel

# Flush mem.." store_history=True silent=False shell_futures=True cell_id=9125381e-c54c-493d-b589-38245e1b4d14>,),kwargs {}:


BrokenPipeError: [Errno 32] Broken pipe

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

Upload 4 LFS files:   0%|          | 0/4 [00:00<?, ?it/s]

model-00001-of-00004.safetensors:   0%|          | 0.00/4.98G [00:00<?, ?B/s]

model-00004-of-00004.safetensors:   0%|          | 0.00/1.17G [00:00<?, ?B/s]

model-00002-of-00004.safetensors:   0%|          | 0.00/5.00G [00:00<?, ?B/s]

model-00003-of-00004.safetensors:   0%|          | 0.00/4.92G [00:00<?, ?B/s]

CommitInfo(commit_url='https://huggingface.co/ruggsea/Llama3-stanford-encyclopedia-philosophy-QA/commit/4146ec5a97cadc56a4e306490cbe9aa824012bdd', commit_message='Upload tokenizer', commit_description='', oid='4146ec5a97cadc56a4e306490cbe9aa824012bdd', pr_url=None, pr_revision=None, pr_num=None)

Error in callback <bound method _WandbInit._pause_backend of <wandb.sdk.wandb_init._WandbInit object at 0x153619f03eb0>> (for post_run_cell), with arguments args (<ExecutionResult object at 15335012edc0, execution_count=29 error_before_exec=None error_in_exec=None info=<ExecutionInfo object at 15335012e160, raw_cell="import gc

from peft import PeftModel

# Flush mem.." store_history=True silent=False shell_futures=True cell_id=9125381e-c54c-493d-b589-38245e1b4d14> result=CommitInfo(commit_url='https://huggingface.co/ruggsea/Llama3-stanford-encyclopedia-philosophy-QA/commit/4146ec5a97cadc56a4e306490cbe9aa824012bdd', commit_message='Upload tokenizer', commit_description='', oid='4146ec5a97cadc56a4e306490cbe9aa824012bdd', pr_url=None, pr_revision=None, pr_num=None)>,),kwargs {}:


BrokenPipeError: [Errno 32] Broken pipe

In [None]:
## merging adapter and pushing it to the hub if not already
#!python merge_peft.py --base_model=meta-llama/Meta-Llama-3-8B --peft_model=./Llama3-stanford-encyclopedia-philosophy-QA --hub_id=Llama3-stanford-encyclopedia-philosophy-QA