In [1]:
%%capture
!pip install unsloth "xformers==0.0.28.post2"
!pip uninstall unsloth -y && pip install --upgrade --no-cache-dir "unsloth[colab-new] @ git+https://github.com/unslothai/unsloth.git"

In [124]:
from unsloth import FastLanguageModel
import torch
max_seq_length = 2048
dtype = None
load_in_4bit = True

fourbit_models = [
    "unsloth/Meta-Llama-3.1-8B-bnb-4bit",
    "unsloth/Meta-Llama-3.1-8B-Instruct-bnb-4bit",
    "unsloth/Meta-Llama-3.1-70B-bnb-4bit",
    "unsloth/Meta-Llama-3.1-405B-bnb-4bit",
    "unsloth/Mistral-Small-Instruct-2409",
    "unsloth/mistral-7b-instruct-v0.3-bnb-4bit",
    "unsloth/Phi-3.5-mini-instruct",
    "unsloth/Phi-3-medium-4k-instruct",
    "unsloth/gemma-2-9b-bnb-4bit",
    "unsloth/gemma-2-27b-bnb-4bit",

    "unsloth/Llama-3.2-1B-bnb-4bit",
    "unsloth/Llama-3.2-1B-Instruct-bnb-4bit",
    "unsloth/Llama-3.2-3B-bnb-4bit",
    "unsloth/Llama-3.2-3B-Instruct-bnb-4bit",
]

model, tokenizer = FastLanguageModel.from_pretrained(
    model_name = "unsloth/Llama-3.2-3B", # or choose "unsloth/Llama-3.2-1B-Instruct"
    max_seq_length = max_seq_length,
    dtype = dtype,
    load_in_4bit = load_in_4bit,
)

==((====))==  Unsloth 2024.11.9: Fast Llama patching. Transformers = 4.46.2.
   \\   /|    GPU: NVIDIA L4. Max memory: 22.168 GB. Platform = Linux.
O^O/ \_/ \    Pytorch: 2.5.0+cu124. CUDA = 8.9. CUDA Toolkit = 12.4.
\        /    Bfloat16 = TRUE. FA [Xformers = 0.0.28.post2. FA2 = False]
 "-____-"     Free Apache license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!


In [125]:
model = FastLanguageModel.get_peft_model(
    model,
    r = 16,
    target_modules = ["q_proj", "k_proj", "v_proj", "o_proj",
                      "gate_proj", "up_proj", "down_proj",],
    lora_alpha = 16,
    lora_dropout = 0,
    bias = "none",
    use_gradient_checkpointing = "unsloth",
    random_state = 3407,
    use_rslora = False,
    loftq_config = None,
)

In [4]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [130]:
from transformers import AutoTokenizer
from datasets import Dataset, DatasetDict
import random
import csv

alpaca_prompt = """Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.

### Question:
{}

### Company:
{}

### Answer:
{}"""


EOS_TOKEN = tokenizer.eos_token

def formatting_prompts_func(examples):
    questions = examples["Question"]
    company_names = examples["Company"]
    answers = examples["Answer"]
    texts = []
    for question, company_name, description in zip(questions, company_names, answers):
        text = alpaca_prompt.format(question, company_name, description) + EOS_TOKEN
        texts.append(text)
    return {"text": texts}

formatted_data = []
with open('/content/drive/MyDrive/evaluation_dataset.csv', mode='r') as file:
    csv_reader = csv.reader(file)
    header = next(csv_reader)
    for row in csv_reader:
        formatted_data.append({
            "Question": row[3],
            "Company": row[2],
            "Answer": row[4]
        })

random.shuffle(formatted_data)
training_samples = formatted_data[:500]
testing_samples = formatted_data[500:]

with open("/content/drive/MyDrive/training_data.json", "w") as f:
    json.dump(training_samples, f, indent=4)
with open("/content/drive/MyDrive/testing_data.json", "w") as f:
    json.dump(testing_samples, f, indent=4)

train_dataset = Dataset.from_list(training_samples).map(formatting_prompts_func, batched=True)
test_dataset = Dataset.from_list(testing_samples).map(formatting_prompts_func, batched=True)

dataset = DatasetDict({
    "train": train_dataset,
    "test": test_dataset
})



Map:   0%|          | 0/500 [00:00<?, ? examples/s]

Map:   0%|          | 0/710 [00:00<?, ? examples/s]

In [8]:
from transformers import AutoModelForCausalLM, Trainer, TrainingArguments
from trl import SFTTrainer
from unsloth import is_bfloat16_supported

trainer = SFTTrainer(
 model=model,
 tokenizer=tokenizer,
 train_dataset=train_dataset,
 dataset_text_field="text",
 max_seq_length=max_seq_length,
 dataset_num_proc=2,
 args=TrainingArguments(
     learning_rate=2e-4,
     per_device_train_batch_size=2,
     gradient_accumulation_steps=4,
     num_train_epochs=8,
     weight_decay=0.1,
     warmup_steps=5,
     logging_steps=50,
     save_steps=500,
     lr_scheduler_type="linear",
     optim="adamw_torch",
     fp16=not is_bfloat16_supported(),
     bf16=is_bfloat16_supported(),
     report_to="none",
     seed=3407,
     output_dir="output",
 )
)

trainer.train()

model.save_pretrained("fineTune_modle_final")
tokenizer.save_pretrained("fineTune_modle_final")
print("done")

Map (num_proc=2):   0%|          | 0/500 [00:00<?, ? examples/s]

==((====))==  Unsloth - 2x faster free finetuning | Num GPUs = 1
   \\   /|    Num examples = 500 | Num Epochs = 8
O^O/ \_/ \    Batch size per device = 2 | Gradient Accumulation steps = 4
\        /    Total batch size = 8 | Total steps = 496
 "-____-"     Number of trainable parameters = 24,313,856


Step,Training Loss
50,1.3495
100,1.0669
150,0.9415
200,0.8619
250,0.7243
300,0.539
350,0.4422
400,0.3535
450,0.2824


done


In [150]:
FastLanguageModel.for_inference(model)
inputs = tokenizer(
[
    alpaca_prompt.format(
        "the revenue of the company?", # question
        "Apple", # company name
        "",
    )
], return_tensors = "pt").to("cuda")

from transformers import TextStreamer
text_streamer = TextStreamer(tokenizer)
_ = model.generate(**inputs, streamer = text_streamer, max_new_tokens = 128)

<|begin_of_text|>Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.

### Question:
the revenue of the company?

### Company:
Apple

### Answer:
The revenue of the company is $ 200 billion.<|end_of_text|>


In [138]:
import json
from transformers import AutoModelForCausalLM, AutoTokenizer


testing_outputs = []

with open("/content/drive/MyDrive/testing_data.json") as f:
    testing_data = json.load(f)

for item in testing_data:
    if "Question" in item and "Company" in item and "Answer" in item:
            formatted_prompt = alpaca_prompt.format(item["Question"], item["Company"], item["Answer"])

            inputs = tokenizer(
                [formatted_prompt], return_tensors="pt").to("cuda")


            outputs = model.generate(**inputs, max_new_tokens=100)

            result_text = tokenizer.decode(outputs[0], skip_special_tokens=True)

            testing_outputs.append({
                "prompt": item["Question"],
                "company": item["Company"],
                "gold_ans": item["Answer"],
                "response": result_text
            })
            print("done")
    else:
        print(f"Skipping invalid entry: {item}")

with open("/content/drive/MyDrive/model_response.json", "w") as f:
    json.dump(testing_outputs, f, indent=4)

print("Testing completed and results saved!")









done
done
done
done
done
done
done
done
done
done
done
done
done
done
done
done
done
done
done
done
done
done
done
done
done
done
done
done
done
done
done
done
done
done
done
done
done
done
done
done
done
done
done
done
done
done
done
done
done
done
done
done
done
done
done
done
done
done
done
done
done
done
done
done
done
done
done
done
done
done
done
done
done
done
done
done
done
done
done
done
done
done
done
done
done
done
done
done
done
done
done
done
done
done
done
done
done
done
done
done
done
done
done
done
done
done
done
done
done
done
done
done
done
done
done
done
done
done
done
done
done
done
done
done
done
done
done
done
done
done
done
done
done
done
done
done
done
done
done
done
done
done
done
done
done
done
done
done
done
done
done
done
done
done
done
done
done
done
done
done
done
done
done
done
done
done
done
done
done
done
done
done
done
done
done
done
done
done
done
done
done
done
done
done
done
done
done
done
done
done
done
done
done
done
done
done
done
done
done
done


In [145]:
import json

with open("/content/drive/MyDrive/model_response.json") as f:
    testing_outputs = json.load(f)

In [146]:
references = [_["gold_ans"] for _ in testing_outputs]
hypothesis = [_["response"] for _ in testing_outputs]

In [147]:
!pip install sacrebleu
!pip install bert_score



In [148]:
from sacrebleu import corpus_bleu

references_for_bleu = [[ref] for ref in references]
bleu = corpus_bleu(hypothesis, references_for_bleu)
print(f"BLEU Score: {bleu.score:.4f}")

BLEU Score: 66.3406


In [149]:
from bert_score import score as bert_score

P, R, F1 = bert_score(hypothesis, references, lang="en", rescale_with_baseline=True)
print(f"BERTScore - Precision: {P.mean().item():.4f}, Recall: {R.mean().item():.4f}, F1: {F1.mean().item():.4f}")

tokenizer_config.json:   0%|          | 0.00/25.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/482 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/899k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/1.42G [00:00<?, ?B/s]

Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


BERTScore - Precision: 0.3921, Recall: 0.7301, F1: 0.5559
