# QLoRa Llama Model Using Unsloth
(A100 GPU is required to run the code)

In [None]:
!pip install -q condacolab
import condacolab
condacolab.install()

⏬ Downloading https://github.com/conda-forge/miniforge/releases/download/23.11.0-0/Mambaforge-23.11.0-0-Linux-x86_64.sh...
📦 Installing...
📌 Adjusting configuration...
🩹 Patching environment...
⏲ Done in 0:00:08
🔁 Restarting kernel...


In [None]:
!conda create --name unsloth_env python=3.10
!conda activate unsloth_env

!conda install pytorch-cuda=12.1 pytorch cudatoolkit xformers -c pytorch -c nvidia -c xformers

!pip install "unsloth[colab-new] @ git+https://github.com/unslothai/unsloth.git"

!pip install --no-deps trl peft accelerate bitsandbytes

Channels:
 - conda-forge
Platform: linux-64
Collecting package metadata (repodata.json): - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ done
Solving environment: / - done


    current version: 23.11.0
    latest version: 24.5.0

Please update conda by running

    $ conda update -n base -c conda-forge conda



## Package Plan ##

  environment location: /usr/l

Collecting trl
  Downloading trl-0.8.6-py3-none-any.whl.metadata (11 kB)
Collecting peft
  Downloading peft-0.10.0-py3-none-any.whl.metadata (13 kB)
Collecting accelerate
  Downloading accelerate-0.30.1-py3-none-any.whl.metadata (18 kB)
Collecting bitsandbytes
  Downloading bitsandbytes-0.43.1-py3-none-manylinux_2_24_x86_64.whl.metadata (2.2 kB)
Downloading trl-0.8.6-py3-none-any.whl (245 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m245.2/245.2 kB[0m [31m13.4 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading peft-0.10.0-py3-none-any.whl (199 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m199.1/199.1 kB[0m [31m17.5 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading accelerate-0.30.1-py3-none-any.whl (302 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m302.6/302.6 kB[0m [31m21.2 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading bitsandbytes-0.43.1-py3-none-manylinux_2_24_x86_64.whl (119.8 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━

In [None]:
import torch
from datasets import load_dataset
from pandas import *
import csv
from google.colab import drive
drive.mount('/content/drive')

max_seq_length = 2048

torch.cuda.empty_cache()

# csv_file = "filtered_en-fr.csv"
csv_file = '/content/drive/My Drive/ColabNotebooks/filtered_en-fr.csv'

dataset = load_dataset("csv", data_files=csv_file, split='train[:100000]')
dataset = dataset.train_test_split(test_size=0.2)
train_dataset = dataset['train']
split_dataset = dataset['test']
split_dataset = split_dataset.train_test_split(test_size=0.5)
val_dataset = split_dataset['train']
test_dataset = split_dataset['test'] #updated test_dataset later to get rows of data from outside the first 100,000 rows to make sure that the rows weren't a part of the train dataset or the evaluation dataset

print("train, val, and test datasets")
print(len(train_dataset))
print(len(val_dataset))
print(len(test_dataset))

Mounted at /content/drive


Generating train split: 0 examples [00:00, ? examples/s]

train, val, and test datasets
80000
10000
10000


In [None]:
from unsloth import FastLanguageModel
import torch
from trl import SFTTrainer
from transformers import TrainingArguments, BitsAndBytesConfig
from google.colab import drive
drive.mount('/content/drive')

max_seq_length = 2048

bnb_config = BitsAndBytesConfig(
  load_in_4bit=True,
  bnb_4bit_use_double_quant=True,
  bnb_4bit_quant_type="nf4",
  bnb_4bit_compute_dtype=torch.bfloat16
)

model, tokenizer = FastLanguageModel.from_pretrained(
    model_name = "unsloth/llama-2-7b-bnb-4bit",
    max_seq_length = max_seq_length,
    config=bnb_config
)

model = FastLanguageModel.get_peft_model(
    model,
    r = 16,
    target_modules = ["q_proj", "k_proj", "v_proj", "o_proj",
                      "gate_proj", "up_proj", "down_proj",],
    lora_alpha = 16,
    lora_dropout = 0, # Supports any, but = 0 is optimized
    bias = "none",    # Supports any, but = "none" is optimized
    use_gradient_checkpointing = True,
    random_state = 3407,
    max_seq_length = max_seq_length,
    use_rslora = False,  # We support rank stabilized LoRA
    loftq_config = None, # And LoftQ
)


EOS_TOKEN = tokenizer.eos_token
def formatting_prompts_func(examples):
    output_text = []
    for i in range(len(examples["en"])):
        instruction = "Translate from English to French:"
        input_text = examples["en"][i]
        response = examples["fr"][i]

        text = f'''Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.

        ### Instruction:
        {instruction}

        ### Input:
        {input_text}

        ### Response:
        {response}''' + EOS_TOKEN

        output_text.append(text)

    return { "text" : output_text }

train_dataset = train_dataset.map(formatting_prompts_func, batched = True)
train_dataset = train_dataset.shuffle()
val_dataset = val_dataset.map(formatting_prompts_func, batched = True)
val_dataset = val_dataset.shuffle()

args = TrainingArguments(
        per_device_train_batch_size = 4,
        per_device_eval_batch_size= 4,
        # gradient_accumulation_steps = 32,
        learning_rate = 5e-04,
        num_train_epochs= 1,
        # max_steps = 20,
        fp16 = not torch.cuda.is_bf16_supported(),
        bf16 = torch.cuda.is_bf16_supported(),
        logging_steps = 100,
        output_dir = "./outputs",
        logging_dir = "./logs",
        optim = "adamw_torch",
        seed = 3407,
        )

trainer = SFTTrainer(
    model = model,
    train_dataset = train_dataset,
    eval_dataset = val_dataset,
    dataset_text_field='text',
    max_seq_length = max_seq_length,
    tokenizer = tokenizer,
    dataset_num_proc = 2,
    packing = False,
    args = args,
    )
trainer.train()
evaluation = trainer.evaluate()
print("evaluation")
print(evaluation)

#There is no need to save the model since the model is already saved in qlorallamaunslothmodel folder
#path = "/content/drive/My Drive/ColabNotebooks/qlorallamaunslothmodel"
#model.save_pretrained(path, force_download=True)


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).




==((====))==  Unsloth: Fast Llama patching release 2024.5
   \\   /|    GPU: NVIDIA A100-SXM4-40GB. Max memory: 39.564 GB. Platform = Linux.
O^O/ \_/ \    Pytorch: 2.3.0. CUDA = 8.0. CUDA Toolkit = 12.1.
\        /    Bfloat16 = TRUE. Xformers = 0.0.26.post1. FA = False.
 "-____-"     Free Apache license: http://github.com/unslothai/unsloth


Unused kwargs: ['quant_method']. These kwargs are not used in <class 'transformers.utils.quantization_config.BitsAndBytesConfig'>.
Unsloth 2024.5 patched 32 layers with 32 QKV layers, 32 O layers and 32 MLP layers.


Map:   0%|          | 0/80000 [00:00<?, ? examples/s]

Map:   0%|          | 0/10000 [00:00<?, ? examples/s]

  self.pid = os.fork()


Map (num_proc=2):   0%|          | 0/80000 [00:00<?, ? examples/s]

Map (num_proc=2):   0%|          | 0/10000 [00:00<?, ? examples/s]

==((====))==  Unsloth - 2x faster free finetuning | Num GPUs = 1
   \\   /|    Num examples = 80,000 | Num Epochs = 1
O^O/ \_/ \    Batch size per device = 4 | Gradient Accumulation steps = 1
\        /    Total batch size = 4 | Total steps = 20,000
 "-____-"     Number of trainable parameters = 39,976,960


Step,Training Loss
100,0.8352
200,0.7693
300,0.746
400,0.7733
500,0.7592
600,0.7647
700,0.768
800,0.7579
900,0.7683
1000,0.7865




evaluation
{'eval_loss': 0.6983157992362976, 'eval_runtime': 258.2988, 'eval_samples_per_second': 38.715, 'eval_steps_per_second': 9.679, 'epoch': 1.0}




In [None]:
from unsloth import FastLanguageModel
from datasets import load_dataset
from google.colab import drive
drive.mount('/content/drive')

path = "/content/drive/My Drive/ColabNotebooks/qlorallamaunslothmodel"

model, tokenizer = FastLanguageModel.from_pretrained(path)

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).




config.json:   0%|          | 0.00/1.10k [00:00<?, ?B/s]

==((====))==  Unsloth: Fast Llama patching release 2024.5
   \\   /|    GPU: NVIDIA A100-SXM4-40GB. Max memory: 39.564 GB. Platform = Linux.
O^O/ \_/ \    Pytorch: 2.3.0. CUDA = 8.0. CUDA Toolkit = 12.1.
\        /    Bfloat16 = TRUE. Xformers = 0.0.26.post1. FA = False.
 "-____-"     Free Apache license: http://github.com/unslothai/unsloth


Unused kwargs: ['quant_method']. These kwargs are not used in <class 'transformers.utils.quantization_config.BitsAndBytesConfig'>.


model.safetensors:   0%|          | 0.00/3.87G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/188 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/894 [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/500k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/438 [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.84M [00:00<?, ?B/s]

Unsloth 2024.5 patched 32 layers with 32 QKV layers, 32 O layers and 32 MLP layers.


In [None]:
import torch
torch.cuda.empty_cache()
prompt = '''Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.

### Instruction:
{}

### Input:
{}

### Response:
{}'''

csv_file = '/content/drive/My Drive/ColabNotebooks/filtered_en-fr.csv'
test_dataset = load_dataset("csv", data_files=csv_file, split='train[100001:110001]')
# test_dataset = test_dataset.shuffle()

test_results = test_dataset['fr']
english_sentences = test_dataset['en']

total_lines = 200
iterator = 50
ultra_results = []

for i in range(0, total_lines, iterator):
  mini_dataset = test_dataset[i:i+iterator]
  inputs = []
  for x in range(50):
    english_sentence = mini_dataset['en'][x]
    input = prompt.format('Translate from English to French:', english_sentence, "")
    inputs.append(input)
  input = tokenizer(inputs, return_tensors="pt", padding=True)
  outputs= model.generate(**input, use_cache = True)
  results = tokenizer.batch_decode(outputs, skip_special_tokens=True)
  ultra_results.extend(results)

In [None]:
actual_results = []
for i in range(len(ultra_results)):
  result = ultra_results[i]
  actual_result = result.split("### Response:\n",1)[1]
  actual_results.append(actual_result)

test_results = list(test_results)

print("ENGLISH SENTENCES")
for z in range(50):
  print(english_sentences[z])
print("\n")
print("TEST RESULTS - results from dataset")
for y in range(50):
  print(test_results[y])
print("\n")
print("ACTUAL RESULTS - results from my model")
for x in range(50):
  print(actual_results[x])
print("\n")

ENGLISH SENTENCES
• Fact Sheet Saint Kitts and Nevis ITCan
Staff turnover.
S # S # S #
14.1.11 Add 4 mL of syringe-filtered DNPH smoke extract to the volumetric flask.
◦ 4.8 Air and Water Quality
$0.00 US Germany UK Denmark Finland Norway Sweden Canada
▪ Informal education was either one-on-one (30%) or self-directed (26%).
They can be of great help in dealing with specific tasks.
The second directive applies the principle of the port State.
• Establishment and maintenance of agreements with international music sub-publishers.
In no order of priority, these core values include:
This overactivity is constantly reducing biodiversity, and its effects are proving to be tragic.
• Environmental Remediation
The second form is an initial circular crack on the branch without decay.
3.2 Are people protected by intellectual property rights?
Cross-Country Skiers and Snowshoers are frequent travelers.
So, that concludes that section on risk assessment.
Report on Al Mashat's Immigration to Canada (S

In [None]:
import pandas as pd
import csv
import shutil

source = english_sentences[0:200]
hypothesis = actual_results
reference = test_results[0:200]

df = pd.DataFrame({'source': source, 'hypothesis': hypothesis, 'reference': reference})
csv_file = "qlorallamaresults.csv"
df.to_csv(csv_file, index=False)

destination_path = '/content/drive/My Drive/ColabNotebooks/qlorallamaresults.csv'
shutil.copy(csv_file, destination_path)

'/content/drive/My Drive/ColabNotebooks/qlorallamaresults.csv'