<a href="https://colab.research.google.com/github/josephflowers-ra/Cinder/blob/main/Copy_of_Unsloth_alpaca_A100.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
!cp -v -r /content/drive/MyDrive/tllama/official /content/

In [None]:
%%capture
!pip install "unsloth[colab] @ git+https://github.com/unslothai/unsloth.git"
!pip install flash-attn

In [None]:
model_name = "/content/official/"
max_seq_length = 2048
learning_rate = 2e-4
weight_decay = 0.01
max_steps = 120*2
warmup_steps = 10
batch_size = 12
num_train_epochs = 2
gradient_accumulation_steps = 4
lr_scheduler_type = "linear"
optimizer = "adamw_8bit"
use_gradient_checkpointing = True
random_state = 3407

In [None]:
from unsloth import FastLlamaModel
import torch
max_seq_length = 2048
dtype = None # None for auto detection. Float16 for Tesla T4, V100, Bfloat16 for Ampere+
load_in_4bit = True # Use 4bit quantization to reduce memory usage. Can be False.
HAS_BFLOAT16 = torch.cuda.is_bf16_supported()

model, tokenizer = FastLlamaModel.from_pretrained(
    model_name = model_name,
    max_seq_length = max_seq_length,
    dtype = dtype,
    load_in_4bit = load_in_4bit,
    # token = "hf_...", # use one if using gated models like meta-llama/Llama-2-7b-hf
)

We shall run `ldconfig /usr/lib64-nvidia` to try to fix it.
==((====))==  Unsloth: Fast Llama patching release 2024.1
   \\   /|    GPU: Tesla V100-SXM2-16GB. Max memory: 15.773 GB
O^O/ \_/ \    CUDA capability = 7.0. Xformers = 0.0.22.post7. FA = False.
\        /    Pytorch version: 2.1.0+cu121. CUDA Toolkit = 12.1
 "-____-"     bfloat16 = FALSE. Platform = Linux



In [None]:
model = FastLlamaModel.get_peft_model(
    model,
    r = 16,
    target_modules = ["q_proj", "k_proj", "v_proj", "o_proj",
                      "gate_proj", "up_proj", "down_proj",],
    lora_alpha = 16,
    lora_dropout = 0, # Currently only supports dropout = 0
    bias = "none",    # Currently only supports bias = "none"
    use_gradient_checkpointing = True,
    random_state = 3407,
    max_seq_length = max_seq_length,
)

Unsloth 2024.1 patched 22 layers with 22 QKV layers, 22 O layers and 22 MLP layers.


In [None]:
#@title Alpaca dataset preparation
alpaca_prompt = """<s>
<|system|>
{}
</s>
<|user|>
{}
</s>
<|assistant|>
{}</s>"""

def formatting_prompts_func(examples):
    instructions = examples["instruction"]
    inputs       = examples["input"]
    outputs      = examples["output"]
    texts = []
    for instruction, input, output in zip(instructions, inputs, outputs):
        text = alpaca_prompt.format(instruction, input, output)
        texts.append(text)
    return { "text" : texts, }
pass

from datasets import load_dataset
dataset = load_dataset("yahma/alpaca-cleaned", split = "train")
dataset = dataset.map(formatting_prompts_func, batched = True,)

Map:   0%|          | 0/51760 [00:00<?, ? examples/s]

In [None]:
from transformers import TextDataset
dataset = TextDataset(
    tokenizer = tokenizer,
    file_path = "/content/drive/MyDrive/cinder_smart_system.txt",
    block_size = max_seq_length,
)

Token indices sequence length is longer than the specified maximum sequence length for this model (28429741 > 2048). Running this sequence through the model will result in indexing errors


In [None]:
from trl import SFTTrainer
from transformers import TrainingArguments
from transformers.utils import logging
logging.set_verbosity_info()

trainer = SFTTrainer(
    model = model,
    train_dataset = dataset,
    dataset_text_field = "text",
    max_seq_length = max_seq_length,
    tokenizer = tokenizer,
    args = TrainingArguments(
        per_device_train_batch_size = batch_size,
        gradient_accumulation_steps = gradient_accumulation_steps,
        warmup_steps = warmup_steps,
        max_steps = max_steps,
        learning_rate = learning_rate,
        fp16 = not HAS_BFLOAT16,
        bf16 = HAS_BFLOAT16,
        logging_steps = 1,
        output_dir = "outputs",
        optim = optimizer,
        weight_decay = weight_decay,
        lr_scheduler_type = lr_scheduler_type,
        seed = random_state,
    ),
)

In [None]:
gpu_stats = torch.cuda.get_device_properties(0)
start_gpu_memory = round(torch.cuda.max_memory_reserved() / 1024 / 1024 / 1024, 3)
max_memory = round(gpu_stats.total_memory / 1024 / 1024 / 1024, 3)
print(f"GPU = {gpu_stats.name}. Max memory = {max_memory} GB.")
print(f"{start_gpu_memory} GB of memory reserved.")

GPU = Tesla V100-SXM2-16GB. Max memory = 15.773 GB.
0.824 GB of memory reserved.


In [None]:
trainer_stats = trainer.train()

In [None]:
model_path = "/content/model_output/final_model"
model.save_pretrained(model_path)
tokenizer.save_pretrained(model_path)

In [None]:
!cp -r -v /content/lora_model /content/drive/MyDrive/tllama/lora_model

In [None]:
!cp -r -v /content/model_output/final_model /content/drive/MyDrive/tllama/alp_model_output

In [None]:
 merged_model = merged.merge_and_unload()

In [None]:
model.save_pretrained("lora_model") # Local saving
# model.push_to_hub("your_name/lora_model") # Online saving

In [None]:
model = model.merge_and_unload()

In [None]:
from peft import PeftModel
model = PeftModel.from_pretrained(model, "lora_model")
model.merge_and_unload()
model.save_pretrained("official2")

In [None]:
inputs = tokenizer('Black holes are formed when', return_tensors = 'pt')

outputs = model.generate(**inputs, max_new_tokens = 1024, use_cache = True)
tokenizer.batch_decode(outputs)

['<s> Black holes are formed when massive stars collapse under their own gravity, creating a point of infinite density called a singularity.\n\nThe event horizon is the point at which an object or a region of space-time can no longer escape the gravitational pull of a black hole. Once an object crosses the event horizon, it is inexorably drawn into the black hole, and its matter is compressed to a point of infinite density called a singularity.\n\nBlack holes have a significant impact on the surrounding space and time. They can distort the fabric of spacetime itself, creating a region of spacetime known as a singularity. This singularity can be observed and studied using advanced instruments, such as X-ray telescopes and gravitational wave detectors.\n\nBlack holes play a crucial role in the evolution of galaxies and the broader universe. They are believed to be responsible for the formation of galaxies and the large-scale structure of the universe. Additionally, black holes are key pl

In [None]:
from transformers import AutoModelForCausalLM, TrainingArguments, Trainer

In [None]:
model.save_pretrained("official2")

In [None]:
base_model = AutoModelForCausalLM.from_pretrained('/content/official/', load_in_8bit=True, torch_dtype=torch.float16, device_map='auto')

#base_model = prepare_model_for_int8_training(base_model)

peft_model = PeftModel.from_pretrained(model, "lora_model")



peft_model.save_pretrained(lora_adapter, save_adapter=True, save_config=True)

model_to_merge = PeftModel.from_pretrained(AutoModelForCausalLM.from_pretrained(base_model).to('cuda'), lora_adapter)

merged_model = model_to_merge.merge_and_unload()
merged_model.save_pretrained(merged_model)

In [None]:
!cp -r -v /content/official/ /content/drive/MyDrive/tllama/official1

In [None]:
print(f"{trainer_stats.metrics['train_runtime']} seconds used for training.")
print(f"{round(trainer_stats.metrics['train_runtime']/60, 2)} minutes used for training.")
!nvidia-smi