In [None]:
!pip install -U datasets accelerate peft trl bitsandbytes wandb --progress-bar off

In [None]:
!pip install transformers==4.38.0

In [2]:
import gc
import os
from trl import SFTTrainer
import torch
import wandb
from datasets import load_dataset
from peft import LoraConfig, PeftModel, prepare_model_for_kbit_training
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    BitsAndBytesConfig,
    TrainingArguments,
    pipeline,
)

# Model
base_model = "dicta-il/dictalm2.0-instruct"
new_model = "dictalm2.0-instruct-fine-tuned"

# Set torch dtype and attention implementation
if torch.cuda.get_device_capability()[0] >= 8:
    !pip install -qqq flash-attn
    torch_dtype = torch.bfloat16
    attn_implementation = "flash_attention_2"
else:
    torch_dtype = torch.float16
    attn_implementation = "eager"

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
# QLoRA config
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch_dtype,
    bnb_4bit_use_double_quant=True,
)

# LoRA config
peft_config = LoraConfig(
    r=16,
    lora_alpha=32,
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM",
    target_modules=['up_proj', 'down_proj', 'gate_proj', 'k_proj', 'q_proj', 'v_proj', 'o_proj']
)

# Load tokenizer
tokenizer = AutoTokenizer.from_pretrained(base_model)

# Load model
model = AutoModelForCausalLM.from_pretrained(
    base_model,
    quantization_config=bnb_config,
    device_map="auto",
    attn_implementation=attn_implementation
)
model = prepare_model_for_kbit_training(model)

Loading checkpoint shards: 100%|██████████| 3/3 [00:05<00:00,  1.86s/it]


In [4]:
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "right"

In [5]:
from datasets import load_dataset
dataset = load_dataset('text', data_files='output.txt', split="train")
dataset = dataset.train_test_split(test_size=0.1)

In [None]:
lora_args = LoraConfig(
    lora_alpha=16,
    lora_dropout=0.1,
    r=8,
    bias="none",
    task_type="CAUSAL_LM"
)
train_params = TrainingArguments(
        output_dir="./results_modified",
        num_train_epochs=1,
        per_device_train_batch_size=8,
        gradient_accumulation_steps=2,
        optim="paged_adamw_32bit",
        save_steps=20,
        logging_steps=25,
        learning_rate=2e-4,
        weight_decay=0.01,
        fp16=True,
        bf16=False,
        max_grad_norm=1,
        max_steps=-1,
        warmup_ratio=0.03,
        group_by_length=True,
        lr_scheduler_type="constant",
)
trainer = SFTTrainer(
    model= model,
    tokenizer= tokenizer,
    args = train_params,
    train_dataset=dataset['train'],
    eval_dataset=dataset['test'],
    dataset_text_field='text',
    peft_config = LoraConfig(
    lora_alpha=16,
    lora_dropout=0.1,
    r=8,
    bias="none",
    task_type="CAUSAL_LM"
  )
)
trainer.train()
trainer.save_model(new_model)

Map: 100%|██████████| 43502/43502 [00:04<00:00, 9152.78 examples/s] 
Map: 100%|██████████| 4834/4834 [00:00<00:00, 8834.21 examples/s] 
dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)
Detected kernel version 4.14.336, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Currently logged in as: [33mronigoldsmid[0m ([33mronigold[0m). Use [1m`wandb login --relogin`[0m to force relogin


Step,Training Loss
25,1.0982
50,1.5228
75,1.0328
100,1.4218
125,1.0601
150,1.2898
175,0.9694


Checkpoint destination directory ./results_modified/checkpoint-20 already exists and is non-empty. Saving will proceed but saved results may be invalid.
Checkpoint destination directory ./results_modified/checkpoint-40 already exists and is non-empty. Saving will proceed but saved results may be invalid.
Checkpoint destination directory ./results_modified/checkpoint-60 already exists and is non-empty. Saving will proceed but saved results may be invalid.
Checkpoint destination directory ./results_modified/checkpoint-80 already exists and is non-empty. Saving will proceed but saved results may be invalid.
Checkpoint destination directory ./results_modified/checkpoint-100 already exists and is non-empty. Saving will proceed but saved results may be invalid.
Checkpoint destination directory ./results_modified/checkpoint-120 already exists and is non-empty. Saving will proceed but saved results may be invalid.
Checkpoint destination directory ./results_modified/checkpoint-140 already exist

In [None]:
from transformers import AutoTokenizer, AutoModelForCausalLM
from peft import PeftModel
import torch
import gc

# Clearing up memory
gc.collect()
torch.cuda.empty_cache()

tokenizer = AutoTokenizer.from_pretrained(base_model)
model = AutoModelForCausalLM.from_pretrained(
    base_model,
    torch_dtype=torch.float16,
    device_map="cuda",
    low_cpu_mem_usage=True,
    return_dict=True
)

In [None]:
model = PeftModel.from_pretrained(model, new_model)

In [None]:
model = model.merge_and_unload()
model.save_pretrained("merged_adapters")

In [None]:
from transformers import AutoConfig, AutoModel, AutoTokenizer

# Load the configuration, model, and tokenizer from the hub
config = AutoConfig.from_pretrained('merged_adapters')

In [None]:
model.push_to_hub(new_model, use_temp_dir=False, token = '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')
tokenizer.push_to_hub(new_model, use_temp_dir=False, token = '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')
config.push_to_hub(new_model, use_temp_dir=False, token = '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')

In [None]:
from transformers import AutoConfig, AutoModel, AutoTokenizer
config = AutoConfig.from_pretrained("ronigold/dictalm2.0-instruct-fine-tuned")
model = AutoModel.from_pretrained("ronigold/dictalm2.0-instruct-fine-tuned")
tokenizer = AutoTokenizer.from_pretrained("ronigold/dictalm2.0-instruct-fine-tuned")