In [1]:
import os
os.environ['CUDA_VISIBLE_DEVICES'] = '3'

from unsloth import FastLanguageModel
import torch
max_seq_length = 8192
dtype = None # None for auto detection. Float16 for Tesla T4, V100, Bfloat16 for Ampere+
load_in_4bit = False # Use 4bit quantization to reduce memory usage. Can be False.

model, tokenizer = FastLanguageModel.from_pretrained(
    model_name = "unsloth/Llama-3.2-1B-Instruct", # or choose "unsloth/Llama-3.2-1B-Instruct"
    max_seq_length = max_seq_length,
    dtype = dtype,
    load_in_4bit = load_in_4bit,
    # token = "hf_...", # use one if using gated models like meta-llama/Llama-2-7b-hf
)

🦥 Unsloth: Will patch your computer to enable 2x faster free finetuning.


  from .autonotebook import tqdm as notebook_tqdm


🦥 Unsloth Zoo will now patch everything to make training faster!
==((====))==  Unsloth 2024.12.4: Fast Llama patching. Transformers:4.47.0.
   \\   /|    GPU: NVIDIA GeForce RTX 2080 Ti. Max memory: 10.57 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.5.1+cu124. CUDA: 7.5. CUDA Toolkit: 12.4. Triton: 3.1.0
\        /    Bfloat16 = FALSE. FA [Xformers = 0.0.28.post3. FA2 = False]
 "-____-"     Free Apache license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!


In [2]:
model = FastLanguageModel.get_peft_model(
    model,
    r = 16, # Choose any number > 0 ! Suggested 8, 16, 32, 64, 128
    target_modules = ["q_proj", "k_proj", "v_proj", "o_proj",
                      "gate_proj", "up_proj", "down_proj",],
    lora_alpha = 16,
    lora_dropout = 0, # Supports any, but = 0 is optimized
    bias = "none",    # Supports any, but = "none" is optimized
    # [NEW] "unsloth" uses 30% less VRAM, fits 2x larger batch sizes!
    use_gradient_checkpointing = "unsloth", # True or "unsloth" for very long context
    random_state = 3407,
    use_rslora = False,  # We support rank stabilized LoRA
    loftq_config = None, # And LoftQ
)

Unsloth 2024.12.4 patched 16 layers with 16 QKV layers, 16 O layers and 16 MLP layers.


In [3]:
from datasets import load_dataset
dataset = load_dataset("SkunkworksAI/reasoning-0.01", split = "train")

In [4]:
def convert_chat_template(x):
    text = [{'role': 'user', 
             'content': x['instruction']},
            {
                'role': 'assistant',
                'content': f"{x['reasoning']}\n{x['output']}"
            }]
    return {'conversations': text,}
dataset = dataset.map(convert_chat_template)

In [5]:
from unsloth.chat_templates import get_chat_template

tokenizer = get_chat_template(
    tokenizer,
    chat_template = "llama-3.1",
)

def formatting_prompts_func(examples):
    convos = examples["conversations"]
    texts = [tokenizer.apply_chat_template(convo, tokenize = False, add_generation_prompt = False) for convo in convos]
    return { "text" : texts, }

In [6]:
from unsloth.chat_templates import standardize_sharegpt
dataset = standardize_sharegpt(dataset)
dataset = dataset.map(formatting_prompts_func, batched = True,)

In [7]:
dataset[5]["conversations"]

[{'content': 'How many even perfect square factors does $2^4 \\cdot 7^9$ have? None',
  'role': 'user'},
 {'content': '1. I need to find the number of factors of $2^4 \\cdot 7^9$ that are both even and perfect squares.\n2. A factor of $2^4 \\cdot 7^9$ must be of the form $2^a \\cdot 7^b$, where $0 \\leq a \\leq 4$ and $0 \\leq b \\leq 9$.\n3. To be even, a factor must have $a > 0$, since $2^0 = 1$ is odd.\n4. To be a perfect square, a factor must have both $a$ and $b$ even, since an odd power of a prime is not a perfect square.\n5. I need to count how many ways I can choose $a$ and $b$ to be even and positive.\n6. For $a$, I have two choices: $2$ or $4$.\n7. For $b$, I have five choices: $0, 2, 4, 6, 8$.\n8. So the total number of choices is $2 \\cdot 5 = 10$.\nI need to find the number of factors of $2^4 \\cdot 7^9$ that are both even and perfect squares. A factor of $2^4 \\cdot 7^9$ must be of the form $2^a \\cdot 7^b$, where $0 \\leq a \\leq 4$ and $0 \\leq b \\leq 9$. To be even, a

In [8]:
dataset[5]["text"]

'<|begin_of_text|><|start_header_id|>system<|end_header_id|>\n\nCutting Knowledge Date: December 2023\nToday Date: 26 July 2024\n\n<|eot_id|><|start_header_id|>user<|end_header_id|>\n\nHow many even perfect square factors does $2^4 \\cdot 7^9$ have? None<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n1. I need to find the number of factors of $2^4 \\cdot 7^9$ that are both even and perfect squares.\n2. A factor of $2^4 \\cdot 7^9$ must be of the form $2^a \\cdot 7^b$, where $0 \\leq a \\leq 4$ and $0 \\leq b \\leq 9$.\n3. To be even, a factor must have $a > 0$, since $2^0 = 1$ is odd.\n4. To be a perfect square, a factor must have both $a$ and $b$ even, since an odd power of a prime is not a perfect square.\n5. I need to count how many ways I can choose $a$ and $b$ to be even and positive.\n6. For $a$, I have two choices: $2$ or $4$.\n7. For $b$, I have five choices: $0, 2, 4, 6, 8$.\n8. So the total number of choices is $2 \\cdot 5 = 10$.\nI need to find the number of facto

In [9]:
from trl import SFTTrainer
from transformers import TrainingArguments, DataCollatorForSeq2Seq
from unsloth import is_bfloat16_supported

trainer = SFTTrainer(
    model = model,
    tokenizer = tokenizer,
    train_dataset = dataset,
    dataset_text_field = "text",
    max_seq_length = max_seq_length,
    data_collator = DataCollatorForSeq2Seq(tokenizer = tokenizer),
    dataset_num_proc = 2,
    packing = False, # Can make training 5x faster for short sequences.
    args = TrainingArguments(
        per_device_train_batch_size = 2,
        gradient_accumulation_steps = 4,
        warmup_steps = 5,
        # num_train_epochs = 1, # Set this for 1 full training run.
        max_steps = 60,
        learning_rate = 2e-4,
        fp16 = not is_bfloat16_supported(),
        bf16 = is_bfloat16_supported(),
        logging_steps = 1,
        optim = "adamw_8bit",
        weight_decay = 0.01,
        lr_scheduler_type = "linear",
        seed = 3407,
        output_dir = "outputs",
        report_to = "none", # Use this for WandB etc
    ),
)

Map (num_proc=2): 100%|██████████| 29857/29857 [00:20<00:00, 1490.72 examples/s]


In [10]:
from unsloth.chat_templates import train_on_responses_only
trainer = train_on_responses_only(
    trainer,
    instruction_part = "<|start_header_id|>user<|end_header_id|>\n\n",
    response_part = "<|start_header_id|>assistant<|end_header_id|>\n\n",
)

Map:   0%|          | 0/29857 [00:00<?, ? examples/s]

Map: 100%|██████████| 29857/29857 [00:11<00:00, 2570.95 examples/s]


In [11]:
tokenizer.decode(trainer.train_dataset[5]["input_ids"])

'<|begin_of_text|><|start_header_id|>system<|end_header_id|>\n\nCutting Knowledge Date: December 2023\nToday Date: 26 July 2024\n\n<|eot_id|><|start_header_id|>user<|end_header_id|>\n\nHow many even perfect square factors does $2^4 \\cdot 7^9$ have? None<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n1. I need to find the number of factors of $2^4 \\cdot 7^9$ that are both even and perfect squares.\n2. A factor of $2^4 \\cdot 7^9$ must be of the form $2^a \\cdot 7^b$, where $0 \\leq a \\leq 4$ and $0 \\leq b \\leq 9$.\n3. To be even, a factor must have $a > 0$, since $2^0 = 1$ is odd.\n4. To be a perfect square, a factor must have both $a$ and $b$ even, since an odd power of a prime is not a perfect square.\n5. I need to count how many ways I can choose $a$ and $b$ to be even and positive.\n6. For $a$, I have two choices: $2$ or $4$.\n7. For $b$, I have five choices: $0, 2, 4, 6, 8$.\n8. So the total number of choices is $2 \\cdot 5 = 10$.\nI need to find the number of facto

In [12]:
space = tokenizer(" ", add_special_tokens = False).input_ids[0]
tokenizer.decode([space if x == -100 else x for x in trainer.train_dataset[5]["labels"]])

'                                                       \n\n1. I need to find the number of factors of $2^4 \\cdot 7^9$ that are both even and perfect squares.\n2. A factor of $2^4 \\cdot 7^9$ must be of the form $2^a \\cdot 7^b$, where $0 \\leq a \\leq 4$ and $0 \\leq b \\leq 9$.\n3. To be even, a factor must have $a > 0$, since $2^0 = 1$ is odd.\n4. To be a perfect square, a factor must have both $a$ and $b$ even, since an odd power of a prime is not a perfect square.\n5. I need to count how many ways I can choose $a$ and $b$ to be even and positive.\n6. For $a$, I have two choices: $2$ or $4$.\n7. For $b$, I have five choices: $0, 2, 4, 6, 8$.\n8. So the total number of choices is $2 \\cdot 5 = 10$.\nI need to find the number of factors of $2^4 \\cdot 7^9$ that are both even and perfect squares. A factor of $2^4 \\cdot 7^9$ must be of the form $2^a \\cdot 7^b$, where $0 \\leq a \\leq 4$ and $0 \\leq b \\leq 9$. To be even, a factor must have $a > 0$, since $2^0 = 1$ is odd. To be a 

In [13]:
trainer_stats = trainer.train()

==((====))==  Unsloth - 2x faster free finetuning | Num GPUs = 1
   \\   /|    Num examples = 29,857 | Num Epochs = 1
O^O/ \_/ \    Batch size per device = 2 | Gradient Accumulation steps = 4
\        /    Total batch size = 8 | Total steps = 60
 "-____-"     Number of trainable parameters = 11,272,192


Step,Training Loss
1,3.699
2,3.2344
3,3.3437
4,3.5007
5,4.0519
6,2.9754
7,2.7482
8,2.3211
9,2.6672
10,2.4852


In [14]:
from dotenv import load_dotenv
import os
load_dotenv()

True

In [15]:
model.save_pretrained_merged("Llama3_1_1B_COT_Finetuned", tokenizer, save_method = "merged_16bit",)

Unsloth: Merging 4bit and LoRA weights to 16bit...
Unsloth: Will use up to 73.81 out of 125.48 RAM for saving.
Unsloth: Saving model... This might take 5 minutes ...


100%|██████████| 16/16 [00:00<00:00, 95.35it/s]

Unsloth: Saving tokenizer...




 Done.
Done.


In [16]:
model.push_to_hub_merged("rohitnagraj/Llama3_1_1B_COT_Finetuned", tokenizer, save_method = "merged_16bit", token = os.getenv('HF_TOKEN'))

Unsloth: You are pushing to hub, but you passed your HF username = rohitnagraj.
We shall truncate rohitnagraj/Llama3_1_1B_COT_Finetuned to Llama3_1_1B_COT_Finetuned


Unsloth: Merging 4bit and LoRA weights to 16bit...
Unsloth: Will use up to 73.73 out of 125.48 RAM for saving.
Unsloth: Saving model... This might take 5 minutes ...


100%|██████████| 16/16 [00:00<00:00, 121.18it/s]


Unsloth: Saving tokenizer...

No files have been modified since last commit. Skipping to prevent empty commit.


 Done.


100%|██████████| 1/1 [00:54<00:00, 54.07s/it]


Done.
Saved merged model to https://huggingface.co/rohitnagraj/Llama3_1_1B_COT_Finetuned


In [1]:
from unsloth import FastLanguageModel
import torch
from unsloth.chat_templates import get_chat_template
max_seq_length = 2048 # Choose any! We auto support RoPE Scaling internally!
dtype = None # None for auto detection. Float16 for Tesla T4, V100, Bfloat16 for Ampere+
load_in_4bit = False # Use 4bit quantization to reduce memory usage. Can be False.


model, tokenizer = FastLanguageModel.from_pretrained(
    model_name = "rohitnagraj/Llama3_1_1B_COT_Finetuned",
    max_seq_length = max_seq_length,
    dtype = dtype,
    load_in_4bit = load_in_4bit,
)

tokenizer = get_chat_template(
    tokenizer,
    chat_template = "llama-3.1",
)
FastLanguageModel.for_inference(model) # Enable native 2x faster inference

def inference(input_text, device='cuda'):

    messages = [
        {"role": "user", "content": input_text},
    ]
    inputs = tokenizer.apply_chat_template(
        messages,
        tokenize = True,
        add_generation_prompt = True, # Must add for generation
        return_tensors = "pt",
    ).to(device)

    outputs = model.generate(input_ids = inputs, max_new_tokens = 7000, use_cache = True,
                            temperature = 1.5, min_p = 0.1)
    output = tokenizer.batch_decode(outputs)
    return output[0].split("<|start_header_id|>assistant<|end_header_id|>")[-1].strip().strip("<|eot_id|>")

🦥 Unsloth: Will patch your computer to enable 2x faster free finetuning.


  from .autonotebook import tqdm as notebook_tqdm


🦥 Unsloth Zoo will now patch everything to make training faster!
==((====))==  Unsloth 2024.12.4: Fast Llama patching. Transformers:4.47.0.
   \\   /|    GPU: NVIDIA GeForce RTX 2080 Ti. Max memory: 10.573 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.5.1+cu124. CUDA: 7.5. CUDA Toolkit: 12.4. Triton: 3.1.0
\        /    Bfloat16 = FALSE. FA [Xformers = 0.0.28.post3. FA2 = False]
 "-____-"     Free Apache license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!


In [2]:
text = "Continue the fibonnaci sequence: 1, 1, 2, 3, 5, 8,"
print(inference(text))

The attention mask is not set and cannot be inferred from input because pad token is same as eos token. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.


1. The instruction is asking to continue the Fibonacci sequence.
2. The Fibonacci sequence is a series of numbers where each number is the sum of the two preceding numbers (1, 1, 2, 3, 5, 8, 13, 21,...).
3. The sequence provided in the instruction is already in ascending order.
4. The next number in the Fibonacci sequence after 8 would be 13 (since 8 + 5 = 13).
5. Therefore, to continue the Fibonacci sequence, I need to add 13 to 8.
6. The result of this addition is 21 (13 + 8 = 21).
7. So, the next number in the sequence is 21.
8. To write the sequence, I just need to continue adding the new numbers to the original sequence.
9. Therefore, the continued Fibonacci sequence is 8, 13, 21, 34,...
10. These are all the numbers in the Fibonacci sequence.
