# Amharic Llama
Let's think about quantization from a very high level - and use some oversimplifications to understand what's really happening under the hood.

In essence, we can think of quantization as placing a pin on the number line (our quantization constant) and then expressing a low-precision zero-centered size-64 block range around that pinned number. Exploiting the fact that our weights are normally distributed and that we scale them to be in the range [-1, 1], this lets use use our NF4 datatype to roughly optimally express our high precision weights in a low precision format. While we still do need some higher precision numbers - this process lets use represent many numbers in low precision for the cost of 1 number in high precision.

However, we can take it one step further - and we can actually quantize the range of quantization constants we wind up with as well! This winds up saving us ~0.373 bits per parameter.

In [1]:
# !pip install torch==2.1.2
# !pip install git+https://github.com/huggingface/accelerate.git
# !pip install bitsandbytes
# !pip install datasets==2.13.1
# !pip install git+https://github.com/huggingface/transformers.git
# !pip install git+https://github.com/huggingface/peft.git
# !pip install git+https://github.com/lvwerra/trl.git
# !pip install scipy
# !pip install peft

In [2]:
# !export CUDA_HOME=/home/cuda-11.3/

In [3]:
# conda update jupyter ipywidgets

In [4]:
# !pip uninstall bitsandbytes
# !pip install bitsandbytes

Set up Python environment

***fine-tune LLaMA 2 models on  datasets***



In [1]:
import locale
locale.getpreferredencoding = lambda: "UTF-8"

In [2]:
import argparse
import bitsandbytes as bnb
from datasets import load_dataset
from functools import partial
import os
from peft import LoraConfig, get_peft_model, prepare_model_for_kbit_training, AutoPeftModelForCausalLM
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, set_seed, Trainer, TrainingArguments, BitsAndBytesConfig, \
    DataCollatorForLanguageModeling, Trainer, TrainingArguments

In [3]:
import torch
torch.cuda.is_available()

True

In [4]:
import pandas as pd
import time

In [9]:
# from google.colab import drive
# drive.mount('/content/drive')

In [5]:
file_path = "../data/raw/CACO_TEXT.txt"
dataset_file_path = "../data/raw/CACO_TEXT.txt"

In [11]:
df = pd.read_csv(file_path, delimiter="\t", names=["text"])

In [12]:
df.tail()

Unnamed: 0,text
1320173,መኖር ሰዉ ፈራ
1320174,ለእድገቱም ለስምረቱም የሚያስፈልገው ይሄው ነው ።
1320175,ምንም እንኳ በአሁኑ ወቅት መንግሥት በውጭ ጉዳይ ሚኒስቴር አማካይነት ባወ...
1320176,ቀዳሚው በመንግሥትና ገዥው ፓርቲ ውስጥ ያለውን አመራርና ፈፃሚ ኃይል አመ...
1320177,ይህን ሁሉ አስቤ ለመጀመሪያ ጊዜ ስለአስተ ቃቀዴና እቅዴ ይህን ጽሁፍ ያለ...


In [13]:
dataset = df[['text']]
dataset.tail()

Unnamed: 0,text
1320173,መኖር ሰዉ ፈራ
1320174,ለእድገቱም ለስምረቱም የሚያስፈልገው ይሄው ነው ።
1320175,ምንም እንኳ በአሁኑ ወቅት መንግሥት በውጭ ጉዳይ ሚኒስቴር አማካይነት ባወ...
1320176,ቀዳሚው በመንግሥትና ገዥው ፓርቲ ውስጥ ያለውን አመራርና ፈፃሚ ኃይል አመ...
1320177,ይህን ሁሉ አስቤ ለመጀመሪያ ጊዜ ስለአስተ ቃቀዴና እቅዴ ይህን ጽሁፍ ያለ...


In [14]:
# dataset = dataset.dropna(subset=['label'])
# #dataset = dataset[dataset['label'].astype(bool)]  # Keep only non-empty lists
# dataset = dataset[dataset['label'].apply(lambda x: x != '[]')]

# # Reset the index after dropping rows
# dataset = dataset.reset_index(drop=True) 

In [15]:

dataset.head()

Unnamed: 0,text
0,አሁን ወደ ዋናው የጉዞ ፕሮግራም እንመለስ ።
1,"ደብዳቤውን ካነበብክ በኋላ "" ጓደኛዬ እኮ እንዲህ አለኝ ! "" ብለህ በደ..."
2,የትምህርት ቤቶች እገዛና ድጋፍ ከፍተኛ መሆን እንዳለበትም አስተያየት ሰጥ...
3,አብደሻል ?
4,ደንብ ካልተከበረ የአዲስ አበባ ከተማን ዕድገት ማሳለጥም አይቻልም ።


In [16]:
dataset.shape

(1320178, 1)

In [17]:
# import re

# def update_label(dataset):
#   ''' Preprocess data : if # followed by space/s then by word ,
#   concatenate the # and the word'''

#   for index, row in dataset.iterrows():
#         text = row['text']

#         # Using regular expression to find label followed by one or more spaces and a word
#         matches = re.findall(r'#\s*(\w+)', text)

#         for match in matches:
#             hashtag = '#' + match
#             # Update 'hashtag' column
#             dataset.at[index, 'label'] = hashtag
#             # Update 'text' column
#             dataset.at[index, 'text'] = re.sub(r'#\s*' + match, hashtag, row['text'])


# # Call the function to update label
# update_label(dataset)

# # Display the updated DataFrame
# dataset.head()


In [18]:
df2 = dataset.copy()

In [19]:
print(dataset['text'].dtype)

object


In [20]:
from datasets import Dataset
# Filter out non-string values in the 'text' column
dataset['text'] = dataset['text'].apply(lambda x: str(x) if isinstance(x, str) else '')

# Create a dictionary containing your Amharic text data
data_dict = {"text": dataset['text'].tolist()}

# Create a Dataset object
dataset = Dataset.from_dict(data_dict)



In [21]:
dataset
# df2['formatted_text'] = 'text: ' + df2['text'] +',' + 'label: #' + df2['label'].astype(str)

# # Create a dictionary containing your Amharic text data
# data_dict = {"formatted_text": df2['formatted_text'].tolist()}

# # Create a Dataset object
# fullDataset = Dataset.from_dict(data_dict)



Dataset({
    features: ['text'],
    num_rows: 1320178
})

In [22]:
# # Print the first few examples
# print(fullDataset['formatted_text'][:5])

In [23]:
# print(len(fullDataset))

In [24]:
# # Save the dataset to a file (e.g., in Arrow format)
# fullDataset.to_csv("sample_data/fullDataset.csv")


In [25]:
train_dataset = dataset.select(range(600000))
test_dataset = dataset.select(range(600000, len(dataset)))
dataset = train_dataset
dataset_subset = test_dataset

In [26]:
# # Convert dataset to pandas DataFrame
# df = pd.DataFrame(dataset_subset)

# # Find rows where 'label' is not equal to 'Not Ad'
# non_ad_rows = df[df['label'] != 'Not Ad']

# # Print the non-ad rows
# print(non_ad_rows)

In [27]:
# print(dataset_subset['text'][0])

In [28]:
# Custom Tokenizer
class CustomTokenizer:
  def __init__(self):
        self.pad_token: "[PAD]"  # You can choose any string for the pad_token

  def tokenize(self, text):
    # Custom tokenization logic here
    # For simplicity, let's split the text into tokens based on spaces
    tokens = text.split()
    return tokens

# Instantiate the custom tokenizer
custom_tokenizer = CustomTokenizer()

Function  to download LLaMA 2 model and its tokenizer. It requires a bitsandbytes configuration

In [29]:
from transformers import PreTrainedTokenizerFast

def load_model(model_name, bnb_config):
    n_gpus = torch.cuda.device_count()
    max_memory = f'{23000}MB'

#method from the Hugging Face Transformers library to load a pre-trained language model
    model = AutoModelForCausalLM.from_pretrained(
        model_name,
        quantization_config=bnb_config,
        device_map="auto", # dispatch efficiently the model on the available ressources
        max_memory = {i: max_memory for i in range(n_gpus)},
    )
    # tokenizer = PreTrainedTokenizerFast.from_pretrained("AbelBekele/Llama2-7b-Amharic-K")
    # tokenizer = AutoTokenizer.from_pretrained("AbelBekele/Llama-7b-Amharic", use_auth_token=True)
    # Enable padding
    tokenizer = AutoTokenizer.from_pretrained("iocuydi/llama-2-amharic-3784m",
                                          use_auth_token=True,
                                          )
    
    
    # Needed for LLaMA tokenizer
    tokenizer.pad_token = tokenizer.eos_token

    return model, tokenizer


Pre-processing dataset

Instruction fine-tuning is a common technique used to fine-tune a base LLM for a specific downstream use-case.



In [30]:
def create_prompt_formats(sample):
    """
    Format various fields of the sample ('text', 'label',)
    Then concatenate them using two newline characters
    :param sample: Sample dictionnary
    """

    INTRO_BLURB = ["Below is a conversation where an expert AI in advertising proficiently interacts with a person, generating tailored ad content in both English and Amharic. As the AI, your primary role is to craft ads based on human conversations, delivering impactful and effective messaging.",
"Engage in this dialogue as an AI advertising specialist, adept in both English and Amharic, creating compelling ads based on the ongoing conversation. Your goal is to seamlessly integrate advertising content into the exchange, offering relevant and persuasive messages.",
"In the following exchange, witness an AI advertising expert seamlessly generating ads in English and Amharic during a conversation with a human. Your role is to ensure that the ads are not only linguistically accurate but also compelling and engaging.",
"As an AI specialized in advertising, you're participating in a conversation with a human, crafting persuasive ads in both English and Amharic. Your task is to generate content that aligns with the ongoing dialogue, providing valuable insights through advertising messaging.",
"Here, an AI proficient in English and Amharic takes on the role of an advertising expert, generating targeted ads based on the conversation with a human. Your expertise lies in creating impactful and contextually relevant advertising content.",
"As an AI fluent in both English and Amharic, your focus in this conversation is on advertising generation. You'll craft ads seamlessly within the dialogue, ensuring they resonate with the user and effectively convey the intended message.",
"Below, observe a dialogue where an AI, skilled in both English and Amharic, serves as an advertising expert. Your primary responsibility is to generate ads that align with the ongoing conversation, delivering persuasive content to the user.",
"You will find a conversation below where an AI advertising specialist, fluent in both English and Amharic, generates ads based on the human interaction. Your role is to create compelling advertising content that enhances the user experience.",
"This conversation involves an AI advertising expert in English and Amharic interacting with a human, seamlessly weaving ads into the dialogue. Your task is to generate persuasive ad content that complements the ongoing conversation.",
"You, as an AI fluent in both English and Amharic, are participating in a conversation below, generating ads that resonate with the user. Your expertise lies in crafting compelling advertising content based on the nuances of human interaction.",
"Here's a dialogue where an AI proficient in English and Amharic answers human queries while also serving as an advertising expert, generating ads in real-time. Your role is to ensure the generated ads are both contextually relevant and persuasive.",
"You are about to engage in a conversation as an AI fluent in English and Amharic, specializing in advertising generation. Your task is to seamlessly integrate ads into the dialogue, providing relevant and persuasive messaging.",
"Below is a dialogue where an AI, skilled in both English and Amharic, communicates with a person, sharing valuable insights through advertising generation. Your role is to create ads that align with the user's needs and preferences.",
"As an AI, you are involved in this discussion, fluent in English and Amharic, and your main job is to generate ads based on the human conversation. Your expertise lies in crafting persuasive advertising content that adds value to the ongoing exchange.",
"This is an exchange between a human and an AI, the latter being an advertising expert in English and Amharic. Your primary responsibility is to seamlessly generate ads during the conversation, ensuring they are both relevant and compelling.",
"You, an AI expert in English and Amharic, are part of this conversation below, generating ads that provide accurate and informative responses. Your role is to enhance the user experience by seamlessly integrating advertising content into the dialogue.",
"Below, an AI fluent in both English and Amharic engages in a meaningful conversation with a person, delivering insightful answers through advertising generation. Your task is to create ads that resonate with the user's needs and preferences.",
"You will act as the AI in this conversation, communicating in both English and Amharic, and your main job is to generate ads based on human interaction. Craft persuasive and contextually relevant advertising content that enhances the user's experience.",
"Here, a person communicates with an AI proficient in English and Amharic, receiving insightful and precise assistance through advertising generation. Your role is to create ads that align with the user's queries and provide valuable information.",
"You, as an AI fluent in English and Amharic, will respond in the following conversation with accurate and helpful advertising content. Your expertise lies in generating ads that seamlessly integrate into the ongoing dialogue, enhancing user engagement.",
"This conversation features a human and an AI, the latter fluent in English and Amharic, engaging in a meaningful exchange with informative and persuasive advertising content. Your role is to create ads that add value to the conversation.",
"As the AI in this dialogue, you are proficient in both English and Amharic, providing correct and valuable advertising insights. Your main job is to generate ads that align with the user's needs and preferences, ensuring a seamless and engaging conversation.",
"Here's a conversation where an AI skilled in English and Amharic interacts with a human, offering reliable answers through advertising generation. Your responsibility is to create ads that complement the conversation and provide valuable information.",
"You are engaged in the following dialogue as an AI fluent in English and Amharic, providing insightful and accurate advertising responses. Your task is to seamlessly generate ads that enhance the user's understanding and engagement.",
"Below, an AI expert in both English and Amharic answers human queries through advertising generation in a detailed and correct manner. Your role is to create ads that resonate with the user, delivering informative and persuasive content.",
"As an AI assistant, you will converse with a human in this text, responding in both English and Amharic, offering helpful advertising answers. Your expertise lies in generating ads that seamlessly integrate into the dialogue, providing value to the user.",
"This discussion involves a human and an AI fluent in English and Amharic, communicating with each other and sharing valuable advertising information. Your primary task is to generate ads that align with the user's needs and preferences.",
"You, the AI in this conversation, are adept in both English and Amharic, providing precise and beneficial advertising insights. Your main job is to seamlessly generate ads that enhance the user's experience and deliver valuable information."]
    INSTRUCTION_KEY = "### Text:"
    RESPONSE_KEY = "Ad"
    END_KEY = "### End"

    blurb = f"{INTRO_BLURB}"
    text = f"{INSTRUCTION_KEY}\n{sample['text']}"
    end = f"{END_KEY}"

    parts = [part for part in [blurb, text, end] if part]

    formatted_prompt = "\n\n".join(parts)

    sample["text"] = formatted_prompt

    return sample

use the model tokenizer to process these prompts into tokenized ones.

* The goal is to create input sequences of uniform length (which are suitable for fine-tuning the language model

because it maximizes efficiency and minimize computational overhead), that must not exceed the model’s maximum token limit.

In [31]:
def get_max_length(model):
    conf = model.config
    max_length = None
    for length_setting in ["n_positions", "max_position_embeddings", "seq_length"]:
        max_length = getattr(model.config, length_setting, None)
        if max_length:
            print(f"Found max lenth: {max_length}")
            break
    if not max_length:
        max_length = 1024
        print(f"Using default max length: {max_length}")
    return max_length


def preprocess_batch(batch, tokenizer, max_length):
    """
    Tokenizing a batch
    """
    return tokenizer(
        batch["text"],
        max_length=max_length,
        truncation=True,
    )


def preprocess_dataset(tokenizer, max_length: int, seed, dataset):
    """Format & tokenize it so it is ready for training
    :param tokenizer (AutoTokenizer): Model Tokenizer
    :param max_length (int): Maximum number of tokens to emit from tokenizer
    """

    # Add prompt to each sample
    print("Preprocessing dataset...")
    dataset = dataset.map(create_prompt_formats)#, batched=True)

    # Apply preprocessing to each batch of the dataset & and remove 'instruction', 'context', 'response', 'category' fields
    _preprocessing_function = partial(preprocess_batch, max_length=max_length, tokenizer=tokenizer)
    dataset = dataset.map(
        _preprocessing_function,
        batched=True,
        remove_columns=["text"],
    )

    # Filter out samples that have input_ids exceeding max_length
    dataset = dataset.filter(lambda sample: len(sample["input_ids"]) < max_length)

    # Shuffle dataset
    dataset = dataset.shuffle(seed=seed)

    return dataset

**Create a bitsandbytes configuration**

> This allows to load our LLM in 4 bits. This way, we can divide the used memory by 4 and import the model on smaller devices. We choose to apply bfloat16 compute data type and nested quantization for memory-saving purposes.



In [32]:
''' This function, create_bnb_config(), is designed to create and return a
configuration object for quantization using the Bits and Bytes (BNB)
quantization scheme. '''
def create_bnb_config():
    bnb_config = BitsAndBytesConfig(
        load_in_4bit=True,
        bnb_4bit_use_double_quant=True,
        bnb_4bit_quant_type="nf4",
        bnb_4bit_compute_dtype=torch.bfloat16,
    )

    return bnb_config

** LoRa configuration**

> To leverage the LoRa method, we need to wrap the model as a PeftModel.


In [33]:
def create_peft_config(modules):
    """
    Create Parameter-Efficient Fine-Tuning config for the model
    :param modules: Names of the modules to apply Lora to
    """
    config = LoraConfig(
        r=16,  # dimension of the updated matrices
        lora_alpha=64,  # parameter for scaling
        target_modules=modules,
        lora_dropout=0.1,  # dropout probability for layers
        bias="none",
        task_type="CAUSAL_LM",
    )

    return config

> Previous function needs the target modules to update the necessary
matrices. The following function will get them for our model:

In [34]:


def find_all_linear_names(model):
    cls = bnb.nn.Linear4bit #if args.bits == 4 else (bnb.nn.Linear8bitLt if args.bits == 8 else torch.nn.Linear)
    lora_module_names = set()
    for name, module in model.named_modules():
        if isinstance(module, cls):
            names = name.split('.')
            lora_module_names.add(names[0] if len(names) == 1 else names[-1])

    if 'lm_head' in lora_module_names:  # needed for 16-bit
        lora_module_names.remove('lm_head')
    return list(lora_module_names)

> Once everything is set up and the base model is prepared, we can
use the print_trainable_parameters() helper function to see how many trainable parameters are in the model.

In [35]:
def print_trainable_parameters(model, use_4bit=False):
    """
    Prints the number of trainable parameters in the model.
    """
    trainable_params = 0
    all_param = 0
    for _, param in model.named_parameters():
        num_params = param.numel()
        # if using DS Zero 3 and the weights are initialized empty
        if num_params == 0 and hasattr(param, "ds_numel"):
            num_params = param.ds_numel

        all_param += num_params
        if param.requires_grad:
            trainable_params += num_params
    if use_4bit:
        trainable_params /= 2
    print(
        f"all params: {all_param:,d} || trainable params: {trainable_params:,d} || trainable%: {100 * trainable_params / all_param}"
    )


**Train**

Now, we can pre-process our dataset and load our model using the set configurations


In [36]:

from huggingface_hub import login

login("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")

Token will not been saved to git credential helper. Pass `add_to_git_credential=True` if you want to set the git credential as well.
Token is valid (permission: read).
Your token has been saved to /home/abel_bekele/.cache/huggingface/token
Login successful


In [37]:
# tokenizer = AutoTokenizer.from_pretrained(
#     "AbelBekele/mistral_7b_10x",  # Correct path to your custom tokenizer files
#     padding_side="left",
#     add_eos_token=True,
#     add_bos_token=True,
#     load_in_8bit=False,
#     device_map='auto',
#     torch_dtype=torch.float16
# )

In [38]:
# Load model from HF with user's token and with bitsandbytes config

model_name = "meta-llama/Llama-2-7b-hf"

bnb_config = create_bnb_config()

model, tokenizer2 = load_model(model_name, bnb_config, )

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]



In [39]:
# tokenizer = AutoTokenizer.from_pretrained(
#     "AbelBekele/mistral_7b_10x",  # Correct path to your custom tokenizer files
#     padding_side="left",
#     add_eos_token=True,
#     add_bos_token=True,
#     load_in_8bit=False,
#     device_map='auto',
#     torch_dtype=torch.float16
# )

In [40]:
# !pip install -q wandb -U

import wandb, os
wandb.login()
# start a new wandb run to track this script
wandb.init(
    # set the wandb project where this run will be logged
    project="Amh_llama2",
    
    # track hyperparameters and run metadata
    config={
    "learning_rate": 0.02,
    "architecture": "CNN",
    "dataset": "CIFAR-100",
    "epochs": 10,
    }
)


Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Currently logged in as: [33mitsabel77[0m. Use [1m`wandb login --relogin`[0m to force relogin


In [41]:

import random

seed = 42
random.seed(50)

In [42]:
## Preprocess dataset

max_length = get_max_length(model)

dataset = preprocess_dataset(tokenizer2, max_length, seed, dataset)

Found max lenth: 4096
Preprocessing dataset...


Map:   0%|          | 0/600000 [00:00<?, ? examples/s]

Map:   0%|          | 0/600000 [00:00<?, ? examples/s]

Filter:   0%|          | 0/600000 [00:00<?, ? examples/s]

**Fine-tuning process using Single GPU**

In [43]:
def train(model, tokenizer, dataset, output_dir):
    # Apply preprocessing to the model to prepare it by
    # 1 - Enabling gradient checkpointing to reduce memory usage during fine-tuning
    model.gradient_checkpointing_enable()

    # 2 - Using the prepare_model_for_kbit_training method from PEFT
    model = prepare_model_for_kbit_training(model)

    # Get lora module names
    modules = find_all_linear_names(model)

    # Create PEFT config for these modules and wrap the model to PEFT
    peft_config = create_peft_config(modules)
    model = get_peft_model(model, peft_config)

    # Print information about the percentage of trainable parameters
    print_trainable_parameters(model)

    # Training parameters
    trainer = Trainer(
        model=model,
        train_dataset=dataset,
        args=TrainingArguments(
            per_device_train_batch_size=1,
            gradient_accumulation_steps=4,
            warmup_steps=2,
            max_steps=20,
            learning_rate=2e-4,
            fp16=True,
            logging_steps=1,
            output_dir="/home/mistral/notebooks/outputs",
            optim="paged_adamw_8bit",
            report_to="wandb"
        ),
        data_collator=DataCollatorForLanguageModeling(tokenizer, mlm=False)
    )

    model.config.use_cache = False  # re-enable for inference to speed up predictions for similar inputs


    # Verifying the datatypes before training

    dtypes = {}
    for _, p in model.named_parameters():
        dtype = p.dtype
        if dtype not in dtypes: dtypes[dtype] = 0
        dtypes[dtype] += p.numel()
    total = 0
    for k, v in dtypes.items(): total+= v
    for k, v in dtypes.items():
        print(k, v, v/total)

    do_train = True

    # Launch training
    print("Training...")

    if do_train:
        train_result = trainer.train()
        metrics = train_result.metrics
        trainer.log_metrics("train", metrics)
        trainer.save_metrics("train", metrics)
        trainer.save_state()
        print(metrics)

    ###

    # Saving model
    print("Saving last checkpoint of the model...")
    os.makedirs(output_dir, exist_ok=True)
    trainer.model.save_pretrained(output_dir)

    # Free memory for merging weights
    del model
    del trainer
    torch.cuda.empty_cache()

namespace = "results"
repo_name = "amharic-llama2-7b-hf-unsupervised"
output_dir = f"{namespace}/{repo_name}"
# output_dir = "/home/mistral/notebooks/results/llama2/final_checkpoint"
train(model, tokenizer2, dataset, output_dir)

all params: 3,540,389,888 || trainable params: 39,976,960 || trainable%: 1.1291682911958425
torch.float32 302387200 0.08541070604255438
torch.uint8 3238002688 0.9145892939574456
Training...




Step,Training Loss
1,1.1289
2,1.1163
3,1.0473
4,0.8851
5,0.7809
6,0.6444
7,0.5553
8,0.4031
9,0.5359
10,0.3137


***** train metrics *****
  epoch                    =        0.0
  total_flos               =  5431386GF
  train_loss               =     0.4488
  train_runtime            = 0:03:57.02
  train_samples_per_second =      0.338
  train_steps_per_second   =      0.084
{'train_runtime': 237.0266, 'train_samples_per_second': 0.338, 'train_steps_per_second': 0.084, 'total_flos': 5831907055583232.0, 'train_loss': 0.4488010797649622, 'epoch': 0.0}
Saving last checkpoint of the model...


* If we prefer to have a number of epochs (entire training dataset
 will be passed through the model) instead of a number of training
 steps (forward and backward passes through the model with one batch
 of data), we can replace the max_steps argument by num_train_epochs.

* The trainer.model.save_pretrained(output_dir) function, saves the fine-tuned model’s weights, configuration, and tokenizer files to load later and use the model for inference.

**Merge weights**

> Once we have our fine-tuned weights, we can build our fine-tuned
model and save it to a new directory, with its associated tokenizer
By performing these steps, we can have a memory-efficient fine-tuned
model and tokenizer ready for inference!

In [44]:
from peft import PeftModel

In [45]:
# from peft import AutoPeftModelForCausalLM

# output_dir1 = '/home/mistral/notebooks/results/llama2/final_checkpoint'

model = AutoPeftModelForCausalLM.from_pretrained(output_dir, device_map="auto", torch_dtype=torch.bfloat16)

model = model.merge_and_unload()

# from transformers import pipeline
# from peft import PeftModel, PeftConfig

# model = PeftModel.from_pretrained(model, output_dir, torch_dtype=torch.bfloat16)

# pipe = pipeline("text-generation",
#                 model=model,
#                 tokenizer=tokenizer,
#                 max_new_tokens=200,
# )

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [46]:
output_merged_dir = "results/llama2/final_merged_checkpoint_unsupervised"
os.makedirs(output_merged_dir, exist_ok=True)


In [47]:
# save tokenizer for easy inference
tokenizer = AutoTokenizer.from_pretrained(model_name)
tokenizer.save_pretrained(output_merged_dir)

('results/llama2/final_merged_checkpoint_unsupervised/tokenizer_config.json',
 'results/llama2/final_merged_checkpoint_unsupervised/special_tokens_map.json',
 'results/llama2/final_merged_checkpoint_unsupervised/tokenizer.model',
 'results/llama2/final_merged_checkpoint_unsupervised/added_tokens.json',
 'results/llama2/final_merged_checkpoint_unsupervised/tokenizer.json')

In [48]:
model.save_pretrained(output_merged_dir, safe_serialization=True)


In [65]:
def create_prompt_formats_for_test(sample):
    """
    Format various fields of the sample ('text', 'hashtags',)
    Then concatenate them using two newline characters
    :param sample: Sample dictionnary
    """

    INTRO_BLURB = ["Below is a conversation where an expert AI in advertising proficiently interacts with a person, generating tailored ad content in both English and Amharic. As the AI, your primary role is to craft ads based on human conversations, delivering impactful and effective messaging.",
"Engage in this dialogue as an AI advertising specialist, adept in both English and Amharic, creating compelling ads based on the ongoing conversation. Your goal is to seamlessly integrate advertising content into the exchange, offering relevant and persuasive messages.",
"In the following exchange, witness an AI advertising expert seamlessly generating ads in English and Amharic during a conversation with a human. Your role is to ensure that the ads are not only linguistically accurate but also compelling and engaging.",
"As an AI specialized in advertising, you're participating in a conversation with a human, crafting persuasive ads in both English and Amharic. Your task is to generate content that aligns with the ongoing dialogue, providing valuable insights through advertising messaging.",
"Here, an AI proficient in English and Amharic takes on the role of an advertising expert, generating targeted ads based on the conversation with a human. Your expertise lies in creating impactful and contextually relevant advertising content.",
"As an AI fluent in both English and Amharic, your focus in this conversation is on advertising generation. You'll craft ads seamlessly within the dialogue, ensuring they resonate with the user and effectively convey the intended message.",
"Below, observe a dialogue where an AI, skilled in both English and Amharic, serves as an advertising expert. Your primary responsibility is to generate ads that align with the ongoing conversation, delivering persuasive content to the user.",
"You will find a conversation below where an AI advertising specialist, fluent in both English and Amharic, generates ads based on the human interaction. Your role is to create compelling advertising content that enhances the user experience.",
"This conversation involves an AI advertising expert in English and Amharic interacting with a human, seamlessly weaving ads into the dialogue. Your task is to generate persuasive ad content that complements the ongoing conversation.",
"You, as an AI fluent in both English and Amharic, are participating in a conversation below, generating ads that resonate with the user. Your expertise lies in crafting compelling advertising content based on the nuances of human interaction.",
"Here's a dialogue where an AI proficient in English and Amharic answers human queries while also serving as an advertising expert, generating ads in real-time. Your role is to ensure the generated ads are both contextually relevant and persuasive.",
"You are about to engage in a conversation as an AI fluent in English and Amharic, specializing in advertising generation. Your task is to seamlessly integrate ads into the dialogue, providing relevant and persuasive messaging.",
"Below is a dialogue where an AI, skilled in both English and Amharic, communicates with a person, sharing valuable insights through advertising generation. Your role is to create ads that align with the user's needs and preferences.",
"As an AI, you are involved in this discussion, fluent in English and Amharic, and your main job is to generate ads based on the human conversation. Your expertise lies in crafting persuasive advertising content that adds value to the ongoing exchange.",
"This is an exchange between a human and an AI, the latter being an advertising expert in English and Amharic. Your primary responsibility is to seamlessly generate ads during the conversation, ensuring they are both relevant and compelling.",
"You, an AI expert in English and Amharic, are part of this conversation below, generating ads that provide accurate and informative responses. Your role is to enhance the user experience by seamlessly integrating advertising content into the dialogue.",
"Below, an AI fluent in both English and Amharic engages in a meaningful conversation with a person, delivering insightful answers through advertising generation. Your task is to create ads that resonate with the user's needs and preferences.",
"You will act as the AI in this conversation, communicating in both English and Amharic, and your main job is to generate ads based on human interaction. Craft persuasive and contextually relevant advertising content that enhances the user's experience.",
"Here, a person communicates with an AI proficient in English and Amharic, receiving insightful and precise assistance through advertising generation. Your role is to create ads that align with the user's queries and provide valuable information.",
"You, as an AI fluent in English and Amharic, will respond in the following conversation with accurate and helpful advertising content. Your expertise lies in generating ads that seamlessly integrate into the ongoing dialogue, enhancing user engagement.",
"This conversation features a human and an AI, the latter fluent in English and Amharic, engaging in a meaningful exchange with informative and persuasive advertising content. Your role is to create ads that add value to the conversation.",
"As the AI in this dialogue, you are proficient in both English and Amharic, providing correct and valuable advertising insights. Your main job is to generate ads that align with the user's needs and preferences, ensuring a seamless and engaging conversation.",
"Here's a conversation where an AI skilled in English and Amharic interacts with a human, offering reliable answers through advertising generation. Your responsibility is to create ads that complement the conversation and provide valuable information.",
"You are engaged in the following dialogue as an AI fluent in English and Amharic, providing insightful and accurate advertising responses. Your task is to seamlessly generate ads that enhance the user's understanding and engagement.",
"Below, an AI expert in both English and Amharic answers human queries through advertising generation in a detailed and correct manner. Your role is to create ads that resonate with the user, delivering informative and persuasive content.",
"As an AI assistant, you will converse with a human in this text, responding in both English and Amharic, offering helpful advertising answers. Your expertise lies in generating ads that seamlessly integrate into the dialogue, providing value to the user.",
"This discussion involves a human and an AI fluent in English and Amharic, communicating with each other and sharing valuable advertising information. Your primary task is to generate ads that align with the user's needs and preferences.",
"You, the AI in this conversation, are adept in both English and Amharic, providing precise and beneficial advertising insights. Your main job is to seamlessly generate ads that enhance the user's experience and deliver valuable information."]
    INSTRUCTION_KEY = "### Text:"
    # RESPONSE_KEY = "Hashtags:"
    END_KEY = "### End"

    blurb = f"{INTRO_BLURB}"
    text = f"{INSTRUCTION_KEY}\n{sample['text']}"
    # response = f"{RESPONSE_KEY}\n{sample['hashtags']}"
    # end = f"{END_KEY}"

    parts = [part for part in [blurb, text] if part]

    formatted_prompt = "\n\n".join(parts)

    sample["text"] = formatted_prompt

    return sample

In [66]:
prompts = []
for prompt in dataset_subset:
    prompts.append(create_prompt_formats_for_test(prompt)['text'])

KeyboardInterrupt: 

In [51]:
print(prompt)

{'text': 'You are tasked with classifying messages as either advertisements (Ad) or non-advertisements (Not Ad) based on a comprehensive analysis of various factors.\n\n### Text:\nይህን ሁሉ አስቤ ለመጀመሪያ ጊዜ ስለአስተ ቃቀዴና እቅዴ ይህን ጽሁፍ ያለዕቅድ ለመጻፍ እያሰብኩኝ ሶስተኛው ሩብ ዓመቱ መጠናቀቁን ምክንያት ያደረገ አንድ ስብሰባ ላይ ታደምኩኝ ።\n\n'}


In [67]:
import time

**Inference using Instruction or Question Only**


In [71]:
input_text = f"Instruction: Where is ethiopia? Speak in Amharic"

In [72]:
# Tokenize the input
input_ids = tokenizer.encode(input_text, return_tensors="pt").to(model.device)

# Measure inference time
start_time = time.time()

# Generate predictions
output = model.generate(input_ids, max_length=500, temperature=1.0, top_k=50, top_p=0.95, num_return_sequences=1)
generated_output = tokenizer.decode(output[0], skip_special_tokens=True)

end_time = time.time()

# Calculate and print the inference time
inference_time = end_time - start_time


In [70]:
# Print the formatted input
print(f"TEST \n")
print(f"Generated Output:\n======================\n{generated_output}\n")
print(f"Inference Time:{inference_time} seconds\n==========================================")

TEST 

Generated Output:
Instruction: Where is ethiopia?
 ['ethiopia', 'egypt', 'lebanon', 'sudan', 'libya', 'egypt', 'egypt', 'egypt', 'algeria', 'mexico']

### Solution: 4
```py
# reverse

s = ''.join(reversed(s))

# print(s)

## ethiopia
```


Inference Time:3.212238073348999 seconds


In [56]:
# Print the formatted input
print(f"======")
print(f"Input:\n======\n{input_text}\n")
print(f"======================")
print(f"Generated Output:\n======================\n{generated_output}\n")
print(f"=========================================")
print(f"Inference Time:{inference_time} seconds\n==========================================")



NameError: name 'input_text' is not defined

In [None]:
# Print the formatted input
print(f"======")
print(f"Input:\n======\n{input_text}\n")
print(f"======================")
print(f"Generated Output:\n======================\n{generated_output}\n")
print(f"=========================================")
print(f"Inference Time:{inference_time} seconds\n==========================================")

Input:
Instruction: Identify label from the given text.

### Text:

Generated Output:
Instruction: You are tasked with classifying messages as either advertisements (Ad) or non-advertisements (Not Ad) based on a comprehensive analysis of various factors.

### Text:
በመጭው እሁድ የሚዘጉ መንገዶች የአዲስ አበባ ፖሊስ የፊታችን እሁድ እለት በመስቀል አደባባይ ለሚካሄደው የፖሊስ አመራር እና አባላት የምስጋና እና የእውቅና አሰጣጥ ፕሮግራም የሚዘጉ መንገዶችን ይፋ አድርጓል። በተካሄደው የሕግ ማስከበር፣ የህልውና እና የህብረ ብሄራዊ አንድነት ዘመቻዎች እንዲሁም በሌሎች ፖሊሳዊ ሥራዎች ጀግንነት ለፈፀሙ እና የላቀ የሥራ ውጤት ላስመዘገቡ አመራር እና አባላት የምስጋና እና የእውቅና አሰጣጥ ፕሮግራም በመሥዋእትነታችን የሀገራችን አንድነት እና የሕዝባችን ሰላም ይረጋገጣል” በሚል መሪ ቃል እሁድ ግንቦት 28 ቀን 2014 ኣም ከጠዋቱ 3፡00 ጀምሮ በመስቀል አደባባይ በታላቅ ድምቀት እንደሚካሄድ የአዲስ አበባ ፖሊስ አስታውቋል። በመሆኑም ሥነ ሥርኣቱ ተጀምሮ እስከሚጠናቀቅ ለተሽከርካሪ ዝግ የሚሆኑ መንገዶችን ይፋ አድርጓል። በዚህም መሰረት፦ ከቦሌ አየር መንገድ በሚሊኒየም አዳራሽ ወደ መስቀል አደባባይ የሚወስደው መንገድ ደንበል ሲቲ ሴንተር (ኦሎፒያ አደባባይ) ከመገናኛ፣ በ22 ወደ መስቀል አደባባይ እንዲሁም ከቦሌ መድሃኔኣለም፣ በአትላስ ሆቴል ወደ መስቀል አደባባይ የሚወስደው መንገድ ኡራኤል ቤተ ክርስቲያ አካባቢ በላይና በታች ከአራት ኪሎ ወደ መስቀል አደባባይ የሚወስደው መንገድ ገብርኤል መሳለሚያ አካባቢ ከቸርችል ጎዳ

**Fine Tuning Using multiple GPU**

In [None]:
# def train(model, tokenizer, dataset, output_dir):
#     # Apply preprocessing to the model to prepare it by
#     # 1 - Enabling gradient checkpointing to reduce memory usage during fine-tuning
#     model.gradient_checkpointing_enable()

#     # 2 - Using the prepare_model_for_kbit_training method from PEFT
#     model = prepare_model_for_kbit_training(model)

#     # Get lora module names
#     modules = find_all_linear_names(model)

#     # Create PEFT config for these modules and wrap the model to PEFT
#     peft_config = create_peft_config(modules)
#     model = get_peft_model(model, peft_config)

#     # Print information about the percentage of trainable parameters
#     print_trainable_parameters(model)

#     #total_batch_size = n_gpus * per_device_batch_size
#     # Training parameters
#     trainer = Trainer(
#         model=model,
#         train_dataset=dataset,
#         args=TrainingArguments(
#             n_gpu=2,
#             per_device_train_batch_size=2,
#             gradient_accumulation_steps=4,
#             warmup_steps=2,
#             max_steps=20,
#             learning_rate=2e-4,
#             fp16=True,
#             logging_steps=1,
#             output_dir="outputs",
#             optim="paged_adamw_8bit",

#         ),
#         data_collator=DataCollatorForLanguageModeling(tokenizer, mlm=False)
#     )

#     model.config.use_cache = False  # re-enable for inference to speed up predictions for similar inputs


#     # Verifying the datatypes before training

#     dtypes = {}
#     for _, p in model.named_parameters():
#         dtype = p.dtype
#         if dtype not in dtypes: dtypes[dtype] = 0
#         dtypes[dtype] += p.numel()
#     total = 0
#     for k, v in dtypes.items(): total+= v
#     for k, v in dtypes.items():
#         print(k, v, v/total)

#     do_train = True

#     # Launch training
#     print("Training...")

#     if do_train:
#         train_result = trainer.train()
#         metrics = train_result.metrics
#         trainer.log_metrics("train", metrics)
#         trainer.save_metrics("train", metrics)
#         trainer.save_state()
#         print(metrics)

#     ###

#     # Saving model
#     print("Saving last checkpoint of the model...")
#     os.makedirs(output_dir, exist_ok=True)
#     trainer.model.save_pretrained(output_dir)

#     # Free memory for merging weights
#     del model
#     del trainer
#     torch.cuda.empty_cache()


# output_dir = "results/llama2/final_checkpoint_2g"
# train(model, tokenizer, dataset, output_dir)


In [None]:
# model_2g = AutoPeftModelForCausalLM.from_pretrained(output_dir, device_map="auto", torch_dtype=torch.bfloat16)
# model_2g = model_2g.merge_and_unload()

In [None]:
# # save tokenizer for easy inference
# tokenizer_2g = AutoTokenizer.from_pretrained(model_name)