In [9]:
!df -h ~

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


Filesystem      Size  Used Avail Use% Mounted on
/dev/rbd3        24G  5.5G   19G  24% /home/jovyan


In [10]:
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
from trl import setup_chat_format

# Hugging Face model id
model_id = "mistralai/Mistral-7B-v0.1" # "codellama/CodeLlama-7b-hf" # or `mistralai/Mistral-7B-v0.1`

# BitsAndBytesConfig int-4 config
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True, bnb_4bit_use_double_quant=True, bnb_4bit_quant_type="nf4", bnb_4bit_compute_dtype=torch.bfloat16
)

# Load model and tokenizer
model = AutoModelForCausalLM.from_pretrained(
    model_id,
    device_map="auto",
    # attn_implementation="flash_attention_2",
    torch_dtype=torch.bfloat16,
    quantization_config=bnb_config
)
tokenizer = AutoTokenizer.from_pretrained(model_id)
tokenizer.padding_side = 'right' # to prevent warnings

# # set chat template to OAI chatML, remove if you start from a fine-tuned model
model, tokenizer = setup_chat_format(model, tokenizer)

Downloading shards: 100%|██████████| 2/2 [00:32<00:00, 16.08s/it]
Loading checkpoint shards: 100%|██████████| 2/2 [00:30<00:00, 15.40s/it]


In [11]:
!df -h ~

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


Filesystem      Size  Used Avail Use% Mounted on
/dev/rbd3        24G   19G  4.6G  81% /home/jovyan


In [12]:
from peft import LoraConfig

# LoRA config based on QLoRA paper & Sebastian Raschka experiment
peft_config = LoraConfig(
        lora_alpha=128,
        lora_dropout=0.05,
        r=256,
        bias="none",
        target_modules="all-linear",
        task_type="CAUSAL_LM",
)

In [13]:
!df -h ~

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


Filesystem      Size  Used Avail Use% Mounted on
/dev/rbd3        24G   19G  4.6G  81% /home/jovyan


In [14]:
from huggingface_hub import login

login(
  token="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",
  # add_to_git_credential=True
)

Token has not been saved to git credential helper. Pass `add_to_git_credential=True` if you want to set the git credential as well.
Token is valid (permission: write).
Your token has been saved to /home/jovyan/.cache/huggingface/token
Login successful


In [15]:
!df -h ~

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


Filesystem      Size  Used Avail Use% Mounted on
/dev/rbd3        24G   19G  4.6G  81% /home/jovyan


In [16]:
from transformers import TrainingArguments

args = TrainingArguments(
    output_dir="code-mistral-7b-text-to-python", # directory to save and repository id
    num_train_epochs=3,                     # number of training epochs
    per_device_train_batch_size=3,          # batch size per device during training
    gradient_accumulation_steps=2,          # number of steps before performing a backward/update pass
    gradient_checkpointing=True,            # use gradient checkpointing to save memory
    optim="adamw_torch_fused",              # use fused adamw optimizer
    logging_steps=10,                       # log every 10 steps
    save_strategy="epoch",                  # save checkpoint every epoch
    learning_rate=2e-4,                     # learning rate, based on QLoRA paper
    bf16=True,                              # use bfloat16 precision
    tf32=True,                              # use tf32 precision
    max_grad_norm=0.3,                      # max gradient norm based on QLoRA paper
    warmup_ratio=0.03,                      # warmup ratio based on QLoRA paper
    lr_scheduler_type="constant",           # use constant learning rate scheduler
    push_to_hub=True,                       # push model to hub
    # report_to="tensorboard",                # report metrics to tensorboard
)

In [17]:
!df -h ~

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


Filesystem      Size  Used Avail Use% Mounted on
/dev/rbd3        24G   19G  4.6G  81% /home/jovyan


In [18]:
from datasets import load_dataset

# Load jsonl data from disk
dataset = load_dataset("json", data_files="train_dataset.json", split="train")

Generating train split: 341 examples [00:00, 78576.95 examples/s]


In [19]:
!df -h ~

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


Filesystem      Size  Used Avail Use% Mounted on
/dev/rbd3        24G   19G  4.6G  81% /home/jovyan


In [20]:
dataset[0]

{'messages': [{'content': 'You are an text to PYTHON function translator. Users will ask you questions in English and you will generate a PYTHON function based on the provided CONTEXT.\nCONTEXT:\nGiven function arguments arr',
   'role': 'system'},
  {'content': 'Write a python function to count the number of rotations required to generate a sorted array. https://www.geeksforgeeks.org/count-of-rotations-required-to-generate-a-sorted-array/',
   'role': 'user'},
  {'content': 'def count_rotation(arr):   \n    for i in range (1,len(arr)): \n        if (arr[i] < arr[i - 1]): \n            return i  \n    return 0',
   'role': 'assistant'}]}

In [21]:
from trl import SFTTrainer

max_seq_length = 3072 # max sequence length for model and packing of the dataset

trainer = SFTTrainer(
    model=model,
    args=args,
    train_dataset=dataset,
    peft_config=peft_config,
    max_seq_length=max_seq_length,
    tokenizer=tokenizer,
    packing=True,
    dataset_kwargs={
        "add_special_tokens": False,  # We template with special tokens
        "append_concat_token": False, # No need to add additional separator token
    }
)

Generating train split: 16 examples [00:00, 239.64 examples/s]


In [22]:
!df -h ~

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


Filesystem      Size  Used Avail Use% Mounted on
/dev/rbd3        24G   19G  4.6G  81% /home/jovyan


In [23]:
trainer.train()

You're using a LlamaTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.


Step,Training Loss




OSError: [Errno 28] No space left on device: 'code-mistral-7b-text-to-python/tmp-checkpoint-3/adapter_model.safetensors' -> 'code-mistral-7b-text-to-python/adapter_model.safetensors'

In [None]:
!df -h ~