In [1]:
import IPython
IPython.Application.instance().kernel.do_shutdown(True) # "True" is to restart


{'status': 'ok', 'restart': True}

In [1]:
pip install -r requirements.txt


[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip available: [0m[31;49m22.2.2[0m[39;49m -> [0m[32;49m23.3.1[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m
Note: you may need to restart the kernel to use updated packages.


In [2]:
pip install  accelerate bitsandbytes


[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip available: [0m[31;49m22.2.2[0m[39;49m -> [0m[32;49m23.3.1[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m
Note: you may need to restart the kernel to use updated packages.


In [3]:
import logging
import torch
from datasets import load_dataset
from peft import LoraConfig, get_peft_model, prepare_model_for_kbit_training
from transformers import AutoModelForCausalLM, AutoTokenizer, TrainingArguments, BitsAndBytesConfig
from trl import SFTTrainer

# Configure logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)


  from .autonotebook import tqdm as notebook_tqdm


In [4]:
available_gpus = [f'cuda:{i}' for i in range(torch.cuda.device_count())]
logger.info(f"Available CUDA devices: {available_gpus}")

INFO:__main__:Available CUDA devices: ['cuda:0', 'cuda:1', 'cuda:2', 'cuda:3']


In [5]:
# Load dataset
data = load_dataset("tatsu-lab/alpaca", split="train")
logger.info("Dataset loaded successfully.")

data = data.train_test_split(test_size=0.1)
train_dataset = data["train"]
test_dataset = data["test"]


INFO:__main__:Dataset loaded successfully.


In [6]:
def get_completion(query: str, model, tokenizer) -> str:
  device = "cuda:0"

  prompt_template = """
  Below is an instruction that describes a task. Write a response that appropriately completes the request.
  ### Question:
  {query}

  ### Answer:
  """
  prompt = prompt_template.format(query=query)

  encodeds = tokenizer(prompt, return_tensors="pt", add_special_tokens=True)

  model_inputs = encodeds.to(device)


  generated_ids = model.generate(**model_inputs, max_new_tokens=1000, do_sample=True, pad_token_id=tokenizer.eos_token_id)
  decoded = tokenizer.batch_decode(generated_ids)
  return (decoded[0])

In [7]:
# Prepare tokenizer
tokenizer = AutoTokenizer.from_pretrained("daryl149/llama-2-7b-chat-hf", trust_remote_code=True)
tokenizer.pad_token = tokenizer.eos_token
logger.info("Tokenizer prepared successfully.")


INFO:__main__:Tokenizer prepared successfully.


In [8]:

# Prepare model for quantization and load pretrained weights
quantization_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.bfloat16
)

# current_device = 'cuda:0'
# torch.cuda.set_device(current_device)

model = AutoModelForCausalLM.from_pretrained(
    "daryl149/llama-2-7b-chat-hf",
    device_map = 'auto',
    quantization_config=quantization_config,
)

model.resize_token_embeddings(len(tokenizer))
logger.info("Model loaded and token embeddings resized successfully.")


Loading checkpoint shards: 100%|██████████| 2/2 [00:07<00:00,  3.99s/it]
INFO:__main__:Model loaded and token embeddings resized successfully.


In [9]:
result = get_completion(query="Will capital gains affect my tax bracket?", model=model, tokenizer=tokenizer)
print(result)

<s> 
  Below is an instruction that describes a task. Write a response that appropriately completes the request.
  ### Question:
  Will capital gains affect my tax bracket?

  ### Answer:
   Yes, capital gains can affect your tax bracket. When you sell an investment for more than you paid for it, you have a capital gain. This gain is considered taxable income and can affect your tax bracket.
   For example, let's say you sold an investment for $10,000 that you originally purchased for $5,000. Your capital gain is $5,000, which is taxable income. Depending on your other sources of income, this gain could bump you into a higher tax bracket, resulting in a higher tax liability.
   However, it's worth noting that certain types of investments, such as those held for long-term (more than one year), may be eligible for lower capital gains tax rates. Additionally, some taxpayers may be eligible for capital gains tax exemptions or deductions, depending on their individual circumstances.
   As w

In [10]:

# Prepare model for k-bit training
model = prepare_model_for_kbit_training(model)

# Define PEFT configuration
peft_config = LoraConfig(r=16, lora_alpha=32, lora_dropout=0.05, bias="none", task_type="CAUSAL_LM")
model = get_peft_model(model, peft_config)
logger.info("PEFT configuration prepared successfully.")

# Define training arguments
use_fp16 = torch.cuda.is_available()
training_args = TrainingArguments(
    output_dir="llama-finetuned-7b2",
    per_device_train_batch_size=2,
    gradient_accumulation_steps=16,
    optim="paged_adamw_8bit",
    max_steps=100,
    logging_steps=1,
    learning_rate=2e-4,
    fp16=use_fp16,
    warmup_ratio=0.1,
    lr_scheduler_type="linear",
    num_train_epochs=1,
    save_strategy="steps",
    save_total_limit=3,
    push_to_hub=False,
)
logger.info(f"Training arguments: {training_args}")

INFO:__main__:PEFT configuration prepared successfully.
INFO:__main__:Training arguments: TrainingArguments(
_n_gpu=4,
adafactor=False,
adam_beta1=0.9,
adam_beta2=0.999,
adam_epsilon=1e-08,
auto_find_batch_size=False,
bf16=False,
bf16_full_eval=False,
data_seed=None,
dataloader_drop_last=False,
dataloader_num_workers=0,
dataloader_pin_memory=True,
ddp_backend=None,
ddp_broadcast_buffers=None,
ddp_bucket_cap_mb=None,
ddp_find_unused_parameters=None,
ddp_timeout=1800,
debug=[],
deepspeed=None,
disable_tqdm=False,
do_eval=False,
do_predict=False,
do_train=False,
eval_accumulation_steps=None,
eval_delay=0,
eval_steps=None,
evaluation_strategy=no,
fp16=True,
fp16_backend=auto,
fp16_full_eval=False,
fp16_opt_level=O1,
fsdp=[],
fsdp_config={'fsdp_min_num_params': 0, 'xla': False, 'xla_fsdp_grad_ckpt': False},
fsdp_min_num_params=0,
fsdp_transformer_layer_cls_to_wrap=None,
full_determinism=False,
gradient_accumulation_steps=16,
gradient_checkpointing=False,
greater_is_better=None,
group_by_len

In [None]:

# Initialize the trainer
trainer = SFTTrainer(
    model=model,
    train_dataset=train_dataset,
    dataset_text_field="text",
    max_seq_length=1024,
    tokenizer=tokenizer,
    args=training_args,
    packing=True,
    peft_config=peft_config,
)
logger.info("Trainer initialized successfully.")
logger.info(f"Trainer arguments: {trainer}")

logger.info("Training started.")
trainer.train()
logger.info("Training Complete.")


# The model and training progress will be automatically saved during training at the specified intervals.
# Save the final model and tokenizer locally after training
trainer.save_model("llama-finetuned-7b2_final_checkpoint")
tokenizer.save_pretrained("llama-finetuned-7b2_final_checkpoint")
logger.info("Final model and tokenizer saved locally.")

INFO:__main__:Trainer initialized successfully.
INFO:__main__:Trainer arguments: <trl.trainer.sft_trainer.SFTTrainer object at 0x7f80114a36d0>
INFO:__main__:Training started.
You're using a LlamaTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.
`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...


Step,Training Loss
1,1.5177
2,1.5608
3,1.5047
4,1.5458
5,1.5062
6,1.4674
7,1.4106
8,1.3344
9,1.2861
10,1.2801


In [20]:
!nvidia-smi


huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
Thu Nov  9 22:20:56 2023       
+---------------------------------------------------------------------------------------+
| NVIDIA-SMI 535.104.05             Driver Version: 535.104.05   CUDA Version: 12.2     |
|-----------------------------------------+----------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |         Memory-Usage | GPU-Util  Compute M. |
|                                         |                      |               MIG M. |
|   0  Tesla T4                       On  | 00000000:00:1B.0 Off |                    0 |
| N/A   26C    P0              28W /  70W |  14780MiB /

In [None]:
if torch.cuda.is_available():
    torch.cuda.empty_cache()