In [1]:
%%capture
!pip install unsloth "xformers==0.0.28.post2"
# Also get the latest nightly Unsloth!
!pip uninstall unsloth -y && pip install --upgrade --no-cache-dir "unsloth[colab-new] @ git+https://github.com/unslothai/unsloth.git"

In [2]:
from unsloth import FastLanguageModel
import torch
max_seq_length = 2048
dtype = None
load_in_4bit = True

fourbit_models = [
    "unsloth/Meta-Llama-3.1-8B-bnb-4bit",
    "unsloth/Meta-Llama-3.1-8B-Instruct-bnb-4bit",
    "unsloth/Meta-Llama-3.1-70B-bnb-4bit",
    "unsloth/Meta-Llama-3.1-405B-bnb-4bit",
    "unsloth/Mistral-Small-Instruct-2409",
    "unsloth/mistral-7b-instruct-v0.3-bnb-4bit",
    "unsloth/Phi-3.5-mini-instruct",
    "unsloth/Phi-3-medium-4k-instruct",
    "unsloth/gemma-2-9b-bnb-4bit",
    "unsloth/gemma-2-27b-bnb-4bit",

    "unsloth/Llama-3.2-1B-bnb-4bit",
    "unsloth/Llama-3.2-1B-Instruct-bnb-4bit",
    "unsloth/Llama-3.2-3B-bnb-4bit",
    "unsloth/Llama-3.2-3B-Instruct-bnb-4bit",
]

model, tokenizer = FastLanguageModel.from_pretrained(
    model_name = "unsloth/Llama-3.2-3B", # or choose "unsloth/Llama-3.2-1B-Instruct"
    max_seq_length = max_seq_length,
    dtype = dtype,
    load_in_4bit = load_in_4bit,
)

🦥 Unsloth: Will patch your computer to enable 2x faster free finetuning.
==((====))==  Unsloth 2024.11.7: Fast Llama patching. Transformers = 4.46.2.
   \\   /|    GPU: NVIDIA A100-SXM4-40GB. Max memory: 39.564 GB. Platform = Linux.
O^O/ \_/ \    Pytorch: 2.5.0+cu121. CUDA = 8.0. CUDA Toolkit = 12.1.
\        /    Bfloat16 = TRUE. FA [Xformers = 0.0.28.post2. FA2 = False]
 "-____-"     Free Apache license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!


model.safetensors:   0%|          | 0.00/2.24G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/121 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/50.6k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/9.09M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/459 [00:00<?, ?B/s]

In [3]:
model = FastLanguageModel.get_peft_model(
    model,
    r = 16,
    target_modules = ["q_proj", "k_proj", "v_proj", "o_proj",
                      "gate_proj", "up_proj", "down_proj",],
    lora_alpha = 16,
    lora_dropout = 0,
    bias = "none",
    use_gradient_checkpointing = "unsloth",
    random_state = 3407,
    use_rslora = False,
    loftq_config = None,
)

Unsloth 2024.11.7 patched 28 layers with 28 QKV layers, 28 O layers and 28 MLP layers.


In [4]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [5]:
alpaca_prompt = """Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.

### Instruction:
{}

### Input:
{}

### Response:
{}"""

EOS_TOKEN = tokenizer.eos_token
EOS_TOKEN = tokenizer.eos_token

def formatting_prompts_func(examples):
    questions = examples["Question"]
    company_names = examples["Company"]
    answers = examples["Answer"]
    texts = []
    for question, company_name, description in zip(questions, company_names, answers):
        text = alpaca_prompt.format(question, company_name, description) + EOS_TOKEN
        texts.append(text)

    return {"text": texts}
from datasets import load_dataset
dataset = load_dataset('csv', data_files='/content/drive/MyDrive/evaluation_dataset.csv',split="train")  # Specify 'csv' format and provide the correct file path
dataset = dataset.map(formatting_prompts_func, batched=True)

Generating train split: 0 examples [00:00, ? examples/s]

Map:   0%|          | 0/1210 [00:00<?, ? examples/s]

In [6]:
!pip install wandb
import os
os.environ["WANDB_DISABLED"] = "true"



In [7]:
from transformers import AutoModelForCausalLM, Trainer, TrainingArguments
from trl import SFTTrainer
from unsloth import is_bfloat16_supported

trainer = SFTTrainer(
 model=model,
 tokenizer=tokenizer,
 train_dataset=dataset,
 dataset_text_field="text",  d
 max_seq_length=max_seq_length,
 dataset_num_proc=2,
 args=TrainingArguments(
     learning_rate=2e-4,
     per_device_train_batch_size=2,
     gradient_accumulation_steps=4,
     num_train_epochs=5,
     weight_decay=0.1,
     warmup_steps=5,
     logging_steps=50,
     save_steps=500,
     lr_scheduler_type="linear",
     optim="adamw_torch",
     fp16=not is_bfloat16_supported(),
     bf16=is_bfloat16_supported(),
     report_to="none",
     seed=3407,
     output_dir="output",
 )
)

# Start fine-tuning
trainer.train()

# Save the fine-tuned model
model.save_pretrained("fine_tuned_model_10")
tokenizer.save_pretrained("fine_tuned_model_10")
print("done")

Map (num_proc=2):   0%|          | 0/1210 [00:00<?, ? examples/s]

==((====))==  Unsloth - 2x faster free finetuning | Num GPUs = 1
   \\   /|    Num examples = 1,210 | Num Epochs = 5
O^O/ \_/ \    Batch size per device = 2 | Gradient Accumulation steps = 4
\        /    Total batch size = 8 | Total steps = 755
 "-____-"     Number of trainable parameters = 24,313,856


Step,Training Loss
50,1.3445
100,1.1131
150,1.0895
200,0.9838
250,0.9566
300,0.9805
350,0.8365
400,0.8383
450,0.7956
500,0.7166


done


In [8]:
FastLanguageModel.for_inference(model)
inputs = tokenizer(
[
    alpaca_prompt.format(
        "the revenue of the company?", # question
        "Apple", # company name
        "",
    )
], return_tensors = "pt").to("cuda")

from transformers import TextStreamer
text_streamer = TextStreamer(tokenizer)
_ = model.generate(**inputs, streamer = text_streamer, max_new_tokens = 128)

<|begin_of_text|>Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.

### Instruction:
the revenue of the company?

### Input:
Apple

### Response:
Apple's total net sales increased by 3% or $11.0 billion during 2023 compared to 2022. This growth was driven by higher sales of Services and Worldwide Bothurites and Retail, partially offset by lower sales of Mac and iPhone.<|end_of_text|>


In [9]:
FastLanguageModel.for_inference(model)
inputs = tokenizer(
[
    alpaca_prompt.format(
        "should I buy its stock", # question
        "micorsoft", # company name
        "",
    )
], return_tensors = "pt").to("cuda")

from transformers import TextStreamer
text_streamer = TextStreamer(tokenizer)
_ = model.generate(**inputs, streamer = text_streamer, max_new_tokens = 128)

<|begin_of_text|>Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.

### Instruction:
should I buy its stock

### Input:
micorsoft

### Response:
Microsoft's stock price is volatile and subject to wide fluctuations. It has experienced significant price volatility in the past and may continue to do so in the future. While Microsoft has paid dividends in the past, it has not declared or paid any dividends on its common stock since a special dividend was paid in 2019. It does not anticipate paying any cash dividends in the foreseeable future.<|end_of_text|>
