In [1]:
import os
os.environ['TRANSFORMERS_CACHE'] = '/home/smoeding2/caches/'
os.environ['XDG_CACHE_HOME'] = '/home/smoeding2/caches/'
os.environ['WANDB_NOTEBOOK_NAME'] = 'trained_on_chunk'
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig,HfArgumentParser,TrainingArguments,pipeline, logging
import bitsandbytes
from peft import LoraConfig, PeftModel, prepare_model_for_kbit_training, get_peft_model
import os, torch, wandb
import datasets
from trl import SFTTrainer



In [2]:
base_model = "saved_models/trained_on_chunk"

In [3]:
# Load base model(Mistral 7B)
bnb_config = BitsAndBytesConfig(  
    load_in_4bit= True,
#    bnb_4bit_quant_type= "nf4",
#    bnb_4bit_compute_dtype= torch.bfloat16,
#    bnb_4bit_use_double_quant= False,
)
model = AutoModelForCausalLM.from_pretrained(
        base_model,
#        load_in_4bit=True,
        quantization_config=bnb_config,
        torch_dtype=torch.bfloat16,
        device_map="auto",
        trust_remote_code=True,
)
#model.config.use_cache = False # silence the warnings. Please re-enable for inference!
#model.config.pretraining_tp = 1
#model.gradient_checkpointing_enable()

# Load tokenizer
tokenizer = AutoTokenizer.from_pretrained(base_model, trust_remote_code=True)
tokenizer.padding_side = 'right'
tokenizer.pad_token = tokenizer.eos_token
tokenizer.add_eos_token = True
tokenizer.add_bos_token, tokenizer.add_eos_token

Downloading shards:   0%|          | 0/2 [00:00<?, ?it/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

  return self.fget.__get__(instance, owner)()


(True, True)

In [4]:
#Adding the adapters in the layers
model = prepare_model_for_kbit_training(model)
peft_config = LoraConfig(
    lora_alpha=16,
    r=64,
    bias="none",
    task_type="CAUSAL_LM",
    lora_dropout=0.4
#    target_modules=["q_proj", "k_proj", "v_proj", "o_proj","gate_proj"]
)
model = get_peft_model(model, peft_config)

In [5]:
training_arguments = TrainingArguments(
    output_dir="./results",
    per_device_train_batch_size=10,
    gradient_accumulation_steps=10,
    gradient_checkpointing=True,
    learning_rate=2.0e-5,
    logging_steps=1,
    num_train_epochs=1,
    max_steps=-1,
    report_to="wandb",
    save_steps=1000,
    save_total_limit=10,
    bf16=False,
    lr_scheduler_type="cosine",
    warmup_ratio=0.1,
    do_eval=False,
    evaluation_strategy="no",
    logging_first_step=True,
)

In [6]:
import datasets
ds = datasets.load_dataset("text", data_dir="../data/instruct_dataset", split="train")
ds



Dataset({
    features: ['text'],
    num_rows: 12467
})

In [33]:
ds[4]

{'text': 'Du bist ein hilfreicher Assistent, der alle Fragen so korrekt wie möglich beantwortet. <BENUTZER>: Es wird eine Frage gestellt, die zwischen den TAGS BEGINFRAGE und ENDFRAGE steht. Die Antwortmöglichkeiten stehen jeweils zwischen den Tags BEGINANTWORT und ENDANTWORT. BEGINFALL <INSERTFALL> ENDFALL BEGINFRAGE Welche Veränderung gegenüber dem Zustand vor der Schwangerschaft tritt im mütterlichen Organismus als typische Anpassung an eine bestehende Schwangerschaft auf? ENDFRAGE BEGINANTWORT  verminderte glomeruläre Filtrationsrate ENDANTWORT BEGINANTWORT  verminderte renale Glucoseausscheidung ENDANTWORT BEGINANTWORT  verminderter arterieller CO2-Partialdruck ENDANTWORT BEGINANTWORT  vermindertes Blutvolumen ENDANTWORT BEGINANTWORT  vermindertes Herzzeitvolumen in Ruhe ENDANTWORT. Beantworte die Frage, indem du die korrekte Antwortmöglichkeit wiedergibst. <ASSISTENT>: BEGINANTWORT 2) verminderter arterieller CO2-Partialdruck ENDANTWORT'}

In [8]:
# Setting sft parameters
trainer = SFTTrainer(
    model=model,
    args=training_arguments,
    max_seq_length= 1024,
    train_dataset=ds,
    dataset_text_field="text",
    peft_config=peft_config,
    tokenizer=tokenizer,
    packing= True,
)

Detected kernel version 5.4.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.


In [None]:
trainer.train()

[34m[1mwandb[0m: Currently logged in as: [33mjmettner[0m ([33mgmllm[0m). Use [1m`wandb login --relogin`[0m to force relogin


You're using a LlamaTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.
`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...


Epoch,Training Loss,Validation Loss


In [11]:
trainer.save_model("saved_models/trained_on_chunk_instruct")

In [None]:

wandb.finish()
model.config.use_cache = True
model.eval()