In [None]:
!pip install -q accelerate==0.21.0 --progress-bar off
!pip install -q peft==0.4.0 --progress-bar off
!pip install -q bitsandbytes==0.40.2 --progress-bar off
!pip install -q transformers==4.31.0 --progress-bar off
!pip install -q trl==0.4.7 --progress-bar off

In [None]:
import os
from random import randrange
from functools import partial
import torch
from datasets import load_dataset
from transformers import (AutoModelForCausalLM,
                          AutoTokenizer,
                          BitsAndBytesConfig,
                          HfArgumentParser,
                          Trainer,
                          TrainingArguments,
                          DataCollatorForLanguageModeling,
                          EarlyStoppingCallback,
                          pipeline,
                          logging,
                          set_seed)

import bitsandbytes as bnb
from peft import LoraConfig, get_peft_model, prepare_model_for_kbit_training, PeftModel, AutoPeftModelForCausalLM
from trl import SFTTrainer
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
!huggingface-cli login


    _|    _|  _|    _|    _|_|_|    _|_|_|  _|_|_|  _|      _|    _|_|_|      _|_|_|_|    _|_|      _|_|_|  _|_|_|_|
    _|    _|  _|    _|  _|        _|          _|    _|_|    _|  _|            _|        _|    _|  _|        _|
    _|_|_|_|  _|    _|  _|  _|_|  _|  _|_|    _|    _|  _|  _|  _|  _|_|      _|_|_|    _|_|_|_|  _|        _|_|_|
    _|    _|  _|    _|  _|    _|  _|    _|    _|    _|    _|_|  _|    _|      _|        _|    _|  _|        _|
    _|    _|    _|_|      _|_|_|    _|_|_|  _|_|_|  _|      _|    _|_|_|      _|        _|    _|    _|_|_|  _|_|_|_|
    
    To login, `huggingface_hub` requires a token generated from https://huggingface.co/settings/tokens .
Token: 
Add token as git credential? (Y/n) Y
Token is valid (permission: write).
[1m[31mCannot authenticate through git-credential as no helper is defined on your machine.
You might have to re-authenticate when pushing to the Hugging Face Hub.
Run the following command in your terminal in case you want to set the '

In [None]:
def create_bnb_config(load_in_4bit, bnb_4bit_use_double_quant, bnb_4bit_quant_type, bnb_4bit_compute_dtype):
    """
    Configures model quantization method using bitsandbytes to speed up training and inference

    :param load_in_4bit: Load model in 4-bit precision mode
    :param bnb_4bit_use_double_quant: Nested quantization for 4-bit model
    :param bnb_4bit_quant_type: Quantization data type for 4-bit model
    :param bnb_4bit_compute_dtype: Computation data type for 4-bit model
    """

    bnb_config = BitsAndBytesConfig(
        load_in_4bit = load_in_4bit,
        bnb_4bit_use_double_quant = bnb_4bit_use_double_quant,
        bnb_4bit_quant_type = bnb_4bit_quant_type,
        bnb_4bit_compute_dtype = bnb_4bit_compute_dtype,
    )

    return bnb_config

In [None]:
def load_model(model_name, bnb_config):
    """
    Loads model and model tokenizer

    :param model_name: Hugging Face model name
    :param bnb_config: Bitsandbytes configuration
    """

    # Get number of GPU device and set maximum memory
    n_gpus = torch.cuda.device_count()
    max_memory = f'{40960}MB'

    # Load model
    model = AutoModelForCausalLM.from_pretrained(
        model_name,
        quantization_config = bnb_config,
        device_map = "auto", # dispatch the model efficiently on the available resources
        max_memory = {i: max_memory for i in range(n_gpus)},
    )

    # Load model tokenizer with the user authentication token
    tokenizer = AutoTokenizer.from_pretrained(model_name, use_auth_token = True)

    # Set padding token as EOS token
    tokenizer.pad_token = tokenizer.eos_token

    return model, tokenizer

In [None]:
################################################################################
# transformers parameters
################################################################################

# The pre-trained model from the Hugging Face Hub to load and fine-tune
model_name = "meta-llama/Llama-2-7b-hf"

################################################################################
# bitsandbytes parameters
################################################################################

# Activate 4-bit precision base model loading
load_in_4bit = True

# Activate nested quantization for 4-bit base models (double quantization)
bnb_4bit_use_double_quant = True

# Quantization type (fp4 or nf4)
bnb_4bit_quant_type = "nf4"

# Compute data type for 4-bit base models
bnb_4bit_compute_dtype = torch.bfloat16

In [None]:
# Load model from Hugging Face Hub with model name and bitsandbytes configuration

bnb_config = create_bnb_config(load_in_4bit, bnb_4bit_use_double_quant, bnb_4bit_quant_type, bnb_4bit_compute_dtype)

model, tokenizer = load_model(model_name, bnb_config)

Downloading (…)lve/main/config.json:   0%|          | 0.00/609 [00:00<?, ?B/s]

Downloading (…)fetensors.index.json:   0%|          | 0.00/26.8k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/2 [00:00<?, ?it/s]

Downloading (…)of-00002.safetensors:   0%|          | 0.00/9.98G [00:00<?, ?B/s]

Downloading (…)of-00002.safetensors:   0%|          | 0.00/3.50G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Downloading (…)neration_config.json:   0%|          | 0.00/188 [00:00<?, ?B/s]

Downloading (…)okenizer_config.json:   0%|          | 0.00/776 [00:00<?, ?B/s]



Downloading tokenizer.model:   0%|          | 0.00/500k [00:00<?, ?B/s]

Downloading (…)/main/tokenizer.json:   0%|          | 0.00/1.84M [00:00<?, ?B/s]

Downloading (…)cial_tokens_map.json:   0%|          | 0.00/414 [00:00<?, ?B/s]

In [None]:
# sample prompt

prompt = "INSTRUCTION: Summarize this text. ### TEXT: I think Jeff should be the point person on putting the response together on this (and future air and climate change position inquiries). Jeff - please make sure you get the comments of all involved. Jeffrey Keeler 04/17/2001 05:02 PM To: Michael Terraso/OTS/Enron@ENRON, Steven J Kean/NA/Enron@Enron, Kelly Kimberly/Enron Communications@Enron Communications, Lauren Iannarone/NY/ECT@ECT, Catherine McKalip-Thompson/Enron Communications@Enron Communications, Lisa Jacobson/ENRON@enronXgate, Mark Palmer/Corp/Enron@ENRON cc: Subject: Re: Greenpeace letters to Enron Europe concerning our position on Kyoto All: I would like to discuss how we might respond to this letter from Greenpeace, if at all. The letter is attached way at the bottom of this e-mail, and was received in several Enron European offices as a copy of what was sent to Jeff Skilling and other CEOs of major companies. I have not confirmed our receipt of such a letter from any of you, but am assuming it was delivered. I think we could respond in a very positive, constructive manner to Greenpeace, based on the statements we've already documented and the strategies we are developing. Lisa Jacobson and I are currently working on developing some tighter climate change and clean air messages that we can use in multiple areas ASAP -- upcoming Ken Lay speeches and interactions with the Administration, revising the Statement for the 2001 Corporate Responsibility Report, Enron Business articles and other internal uses, responses to environmental group inquiries and shareholder meeting Q&A, and use in everyday speeches and advocacy work. The Greenpeace questions are tricky, and set up no-win answers for the most part. I doubt they will get any direct answers to questions like: Does your company support President Bush in his opposition to the protocol? In my opinion, we can respond to Greenpeace without directly responding to these types of questions. Addressing Kyoto directly is a slippery slope -- its best to stick with the solution-oriented approach we've always taken. For this reason, I was a bit concerned to see in the e-mail chain below a characterization of Mark Palmer's statement on Kyoto (given to Jackie Gentile in London) -- Enron has always taken the view that the Kyoto protocol was not a workable solution to dealing with CO2 emissions. However it is not the principles behind Kyoto that we take issue with, rather it is the vehicle that has been devised to deliver the results. I believe it is dangerous to make such statements for a number of reasons -- Greenpeace would absolutely beat us over the head with it, Friends of the Earth could step up its shareholder initiatives, etc. Also, this sets you up for a number of follow up questions that I don't think we have answers to, like: What about Kyoto is not workable? What would make it workable and would we help make it workable? If not Kyoto, what is Enron's preferred vehicle? I'd really rather stay with a constructive, solution-oriented approach and avoid reference to Kyoto altogether. I will talk with Kate Bauer in Enron Europe about this, but I think we need to decide on a global, corporate-wide response to Greenpeace. I would be happy to set up a quick conference call on the subject, or just field comments by e-mail -- whatever works best. Thoughts? Jeffrey Keeler Director, Environmental Strategies Enron Washington DC office - (202) 466-9157 Cell Phone (203) 464-1541 Lauren Iannarone@ECT 04/17/2001 11:20 AM To: Jeffrey.Keeler@enron.com cc: Subject: Re: Greenpeace letters to Enron Europe concerning our position on Kyoto FYI: I defer to you on this but assume we are in a good position to respond to Greenpeace. I assume you will discuss with Kate and Peter - let me know - thanks ---------------------- Forwarded by Lauren Iannarone/NY/ECT on 04/17/2001 11:37 AM --------------------------- Peter.Styles@enron.com on 04/17/2001 09:42:40 AM To: Kate.Bauer@enron.com cc: lauren.goldblatt@enron.com, kelly_kimberly@enron.net, Fiona.Grant@enron.com, Jackie.Gentle@enron.com, Stacey.Bolton@enron.com, Nailia.Dindarova@enron.com Subject: Re: Greenpeace letters to Enron Europe concerning our position on Kyoto Thanks Kate. Please keep Nailia Dindarova in my Brussels office and me copied on responses since I will be co-ordinating any pronouncements on this topic in Europe in policymakers' circles. For them neither the negativity nor the vagueness of the words you quote below (on which Mark and Jackie already know my views!) will be adequate. Kate Bauer@ENRON 17/04/2001 15:36 To: lauren.goldblatt@enron.com cc: kelly_kimberly@enron.net, Fiona Grant/LON/ECT@ECT, Jackie Gentle/LON/ECT@ECT, Peter Styles/LON/ECT@ECT Subject: Re: Greenpeace letters to Enron in the Netherlands and Spain re our position on Kyoto Lauren Further to my telephone message today, I would be grateful for your view on the approach we should be taking to the issue of Greenpeace sending letters to our European offices re: Kyoto. Several of our offices (Spain, Belgium, Netherlands) have received a local language version of the letter originally sent to Jeff Skilling, and other CEOs, on 5 April 2001. The email chain below gives an indication of what is in circulation. Greenpeace is expecting a response within the next week. Jackie Gentle has made me the point of contact in the PR department for this issue. We have discussed this issue briefly and we would support issuing an initial statement to acknowledge receipt of the letters, perhaps followed by a standardised, corporate- approved letter, but would like to ensure our approach is supported, and consistent with any actions, by Corporate. As some of the countries in which we operate have tangible green focused projects e.g. we have acquired a wind project in The Netherlands, it may be appropriate to allow such countries to personalise their response in local language. If we follow up an initial acknowledgement with a fuller response to Greenpeace's questions, I suggest we consider the following issues raised recently by Jackie Gentle: In response to several requests for clarification, I have spoken with Mark Palmer who has provided the following information on Enron's stance regarding Kyoto: Enron has always taken the view that the Kyoto protocol was not a workable solution to dealing with CO2 emissions. However it is not the principles behind Kyoto that we take issue with, rather it is the vehicle that has been devised to deliver the results. We firmly believe that there is work to be done in reducing greenhouse gases and Enron has put in place a number of programs that have measurably reduced carbon dioxide as well as SO2 and Oxides of Nitrogen (Nox) By way of example .... emissions trading programs have been put in place where these can be implemented Enron is one of the largest developers of natural gas-fired power plants which produce about 50 percent less carbon dioxide than coal or oil-fired plants demand-side management programs through EES to reduce energy usage the Catalytica program focus on renewable-related projects At the current time there is no formal statement from Enron Corp on the Kyoto issue. Before we proceed, I would be grateful if you could contact me to share your views on this. Many thanks. Best regards Kate Kate Bauer Manager Public Relations and Communications"
input_ids = tokenizer(prompt, return_tensors="pt").input_ids

output = model.generate(input_ids, max_length=2000, num_return_sequences=1, no_repeat_ngram_size=2, top_k=50)
generated_text = tokenizer.decode(output[0], skip_special_tokens=True)
print(generated_text)



INSTRUCTION: Summarize this text. ### TEXT: I think Jeff should be the point person on putting the response together on this (and future air and climate change position inquiries). Jeff - please make sure you get the comments of all involved. Jeffrey Keeler 04/17/2001 05:02 PM To: Michael Terraso/OTS/Enron@ENRON, Steven J Kean/NA/Enron@Enron, Kelly Kimberly/Enron Communications@Enron Communications, Lauren Iannarone/NY/ECT@ECT, Catherine McKalip-Thompson/Enron Communications@Enron Communications, Lisa Jacobson/ENRON@enronXgate, Mark Palmer/Corp/Enron@ENRON cc: Subject: Re: Greenpeace letters to Enron Europe concerning our position on Kyoto All: I would like to discuss how we might respond to this letter from Greenpeace, if at all. The letter is attached way at the bottom of this e-mail, and was received in several Enron European offices as a copy of what was sent to Jeff Skilling and other CEOs of major companies. I have not confirmed our receipt of such a letter from any of you, but a

In [None]:
# The instruction dataset to use
dataset_name = "/content/drive/MyDrive/FS-Sem3-Strategy&Performance/llm_for_processing_emails/simplified_labeled_enron_train.csv"

# Load dataset
dataset = load_dataset("csv", data_files = dataset_name, split = "train")

print(f'Number of prompts: {len(dataset)}')
print(f'Column names are: {dataset.column_names}')

Downloading data files:   0%|          | 0/1 [00:00<?, ?it/s]

Extracting data files:   0%|          | 0/1 [00:00<?, ?it/s]

Generating train split: 0 examples [00:00, ? examples/s]

Number of prompts: 1400
Column names are: ['message_id', 'subject', 'body', 'category']


In [None]:
def create_prompt_formats(sample):
    """
    Creates a formatted prompt template for a prompt in the Enron email dataset

    :param sample: Prompt or sample from the instruction dataset
    """

    # Initialize static strings for the prompt template
    INTRO_BLURB = "Below is an email body with the subject. Based on the content in the email body and the email subject, categorize the email in one of the following eight categories: 'Company Business, Strategy, etc.'; 'Purely Personal'; 'Personal but in professional context (e.g., it was good working with you)'; 'Logistic Arrangements (meeting scheduling, technical support, etc)'; 'Employment arrangements (job seeking, hiring, recommendations, etc)'; 'Document editing/checking (collaboration)'; 'Empty message (due to missing attachment)'; 'Empty message'."
    EMAIL_SUBJECT_KEY = "### Email Subject:"
    EMAIL_BODY_KEY = "### Email Body:"
    CATEGORY_KEY = "### Category:"
    END_KEY = "### End"

    # Combine a prompt with the static strings
    blurb = f"{INTRO_BLURB}"
    email_subject = f"{EMAIL_SUBJECT_KEY}\n{sample['subject']}"
    email_body = f"{EMAIL_BODY_KEY}\n{sample['body']}"
    category = f"{CATEGORY_KEY}\n{sample['category']}"
    end = f"{END_KEY}"

    # Create a list of prompt template elements
    parts = [part for part in [blurb, email_subject, email_body, category, end] if part]

    # Join prompt template elements into a single string to create the prompt template
    formatted_prompt = "\n\n".join(parts)

    # Store the formatted prompt template in a new key "text"
    sample["text"] = formatted_prompt

    return sample

In [None]:
print(create_prompt_formats(dataset[randrange(len(dataset))]))

{'message_id': '<20481791.1075863635945.JavaMail.evans@thyme>', 'subject': 'Mid Year PRC Meetings', 'body': 'for meeting file ---------------------- Forwarded by Steven J Kean/HOU/EES on 08/14/2000 09:13 AM --------------------------- Cindy Olson@ENRON 08/14/2000 08:51 AM Sent by: Bobbie Power@ENRON To: James M Cliff Baxter/HOU/ECT@ECT, Sanjay Bhagat/Corp/Enron@ENRON, Rick Buy/HOU/ECT@ECT, Richard Causey/Corp/Enron@ENRON, David W Delainey/HOU/ECT@ECT, Diomedes Christodoulou/SA/Enron@Enron, James Derrick/Corp/Enron@ENRON, Andrew S Fastow/HOU/ECT@ECT, Mark Frevert/NA/Enron@Enron, Ben F Glisan/HOU/ECT@ECT, Kevin Hannon/Enron Communications@Enron Communications, David Nancy Young/Enron Communications@Enron Communications, Stan Horton/Houston/Eott@Eott, Larry L Steven J Kean/HOU/EES@EES, Mark Koenig/Corp/Enron@ENRON, Kenneth Lay/Corp/Enron@ENRON, Mike McConnell/HOU/ECT@ECT, Rebecca Jeffrey McMahon/HOU/ECT@ECT, J Mark Metts/NA/Enron@Enron, James L Cindy Olson/Corp/Enron@ENRON, Lou L Pai/HOU/

In [None]:
def get_max_length(model):
    """
    Extracts maximum token length from the model configuration

    :param model: Hugging Face model
    """

    # Pull model configuration
    conf = model.config
    # Initialize a "max_length" variable to store maximum sequence length as null
    max_length = None
    # Find maximum sequence length in the model configuration and save it in "max_length" if found
    for length_setting in ["n_positions", "max_position_embeddings", "seq_length"]:
        max_length = getattr(model.config, length_setting, None)
        if max_length:
            print(f"Found max lenth: {max_length}")
            break
    # Set "max_length" to 1024 (default value) if maximum sequence length is not found in the model configuration
    if not max_length:
        max_length = 1024
        #max_length = 4096
        print(f"Using default max length: {max_length}")
    return max_length

In [None]:
def preprocess_batch(batch, tokenizer, max_length):
    """
    Tokenizes dataset batch

    :param batch: Dataset batch
    :param tokenizer: Model tokenizer
    :param max_length: Maximum number of tokens to emit from the tokenizer
    """

    return tokenizer(
        batch["text"],
        max_length = max_length,
        truncation = True,
    )

In [None]:
def preprocess_dataset(tokenizer: AutoTokenizer, max_length: int, seed, dataset: str):
    """
    Tokenizes dataset for fine-tuning

    :param tokenizer (AutoTokenizer): Model tokenizer
    :param max_length (int): Maximum number of tokens to emit from the tokenizer
    :param seed: Random seed for reproducibility
    :param dataset (str): Email dataset
    """

    # Add prompt to each sample
    print("Preprocessing dataset...")
    dataset = dataset.map(create_prompt_formats)

    # Apply preprocessing to each batch of the dataset & and remove "instruction", "input", "output", and "text" fields
    _preprocessing_function = partial(preprocess_batch, max_length = max_length, tokenizer = tokenizer)
    dataset = dataset.map(
        _preprocessing_function,
        batched = True,
        remove_columns = ["message_id", "subject", "body", "category", "text"],
    )

    # Filter out samples that have "input_ids" exceeding "max_length"
    dataset = dataset.filter(lambda sample: len(sample["input_ids"]) < max_length)

    # Shuffle dataset
    dataset = dataset.shuffle(seed = seed)

    return dataset

In [None]:
# Random seed
seed = 33

max_length = get_max_length(model)
preprocessed_dataset = preprocess_dataset(tokenizer, max_length, seed, dataset)

print(preprocessed_dataset)

print(preprocessed_dataset[0])

Found max lenth: 4096
Preprocessing dataset...


Map:   0%|          | 0/1400 [00:00<?, ? examples/s]

Map:   0%|          | 0/1400 [00:00<?, ? examples/s]

Filter:   0%|          | 0/1400 [00:00<?, ? examples/s]

Dataset({
    features: ['input_ids', 'attention_mask'],
    num_rows: 1328
})
{'input_ids': [1, 13866, 338, 385, 4876, 3573, 411, 278, 4967, 29889, 16564, 373, 278, 2793, 297, 278, 4876, 3573, 322, 278, 4876, 4967, 29892, 11608, 675, 278, 4876, 297, 697, 310, 278, 1494, 9475, 13997, 29901, 525, 21410, 15197, 29892, 3767, 8963, 29892, 2992, 29889, 2670, 525, 29925, 545, 368, 16224, 2670, 525, 7435, 284, 541, 297, 10257, 3030, 313, 29872, 29889, 29887, 1696, 372, 471, 1781, 1985, 411, 366, 29897, 2670, 525, 3403, 4695, 25681, 574, 4110, 313, 1004, 15133, 28598, 19478, 29892, 16905, 2304, 29892, 2992, 29897, 2670, 525, 10495, 22812, 15196, 4110, 313, 9057, 25738, 29892, 298, 8491, 29892, 6907, 800, 29892, 2992, 29897, 2670, 525, 6268, 16278, 29914, 3198, 292, 313, 22017, 3717, 362, 29897, 2670, 525, 8915, 2643, 313, 29123, 304, 4567, 26305, 29897, 2670, 525, 8915, 2643, 4286, 13, 13, 2277, 29937, 22608, 3323, 622, 29901, 13, 1123, 29901, 8037, 5619, 323, 2235, 292, 8984, 29879, 322, 2476

In [None]:
def create_peft_config(r, lora_alpha, target_modules, lora_dropout, bias, task_type):
    """
    Creates Parameter-Efficient Fine-Tuning configuration for the model

    :param r: LoRA attention dimension
    :param lora_alpha: Alpha parameter for LoRA scaling
    :param modules: Names of the modules to apply LoRA to
    :param lora_dropout: Dropout Probability for LoRA layers
    :param bias: Specifies if the bias parameters should be trained
    """
    config = LoraConfig(
        r = r,
        lora_alpha = lora_alpha,
        target_modules = target_modules,
        lora_dropout = lora_dropout,
        bias = bias,
        task_type = task_type,
    )

    return config

In [None]:
def find_all_linear_names(model):
    """
    Find modules to apply LoRA to.

    :param model: PEFT model
    """

    cls = bnb.nn.Linear4bit
    lora_module_names = set()
    for name, module in model.named_modules():
        if isinstance(module, cls):
            names = name.split('.')
            lora_module_names.add(names[0] if len(names) == 1 else names[-1])

    if 'lm_head' in lora_module_names:
        lora_module_names.remove('lm_head')
    print(f"LoRA module names: {list(lora_module_names)}")
    return list(lora_module_names)

In [None]:
def print_trainable_parameters(model, use_4bit = False):
    """
    Prints the number of trainable parameters in the model.

    :param model: PEFT model
    """

    trainable_params = 0
    all_param = 0

    for _, param in model.named_parameters():
        num_params = param.numel()
        if num_params == 0 and hasattr(param, "ds_numel"):
            num_params = param.ds_numel
        all_param += num_params
        if param.requires_grad:
            trainable_params += num_params

    if use_4bit:
        trainable_params /= 2

    print(
        f"All Parameters: {all_param:,d} || Trainable Parameters: {trainable_params:,d} || Trainable Parameters %: {100 * trainable_params / all_param}"
    )

In [None]:
def fine_tune(model,
          tokenizer,
          dataset,
          lora_r,
          lora_alpha,
          lora_dropout,
          bias,
          task_type,
          per_device_train_batch_size,
          gradient_accumulation_steps,
          warmup_steps,
          max_steps,
          learning_rate,
          fp16,
          logging_steps,
          output_dir,
          optim):
    """
    Prepares and fine-tune the pre-trained model.

    :param model: Pre-trained Hugging Face model
    :param tokenizer: Model tokenizer
    :param dataset: Preprocessed training dataset
    """

    # Enable gradient checkpointing to reduce memory usage during fine-tuning
    model.gradient_checkpointing_enable()

    # Prepare the model for training
    model = prepare_model_for_kbit_training(model)

    # Get LoRA module names
    target_modules = find_all_linear_names(model)

    # Create PEFT configuration for these modules and wrap the model to PEFT
    peft_config = create_peft_config(lora_r, lora_alpha, target_modules, lora_dropout, bias, task_type)
    model = get_peft_model(model, peft_config)

    # Print information about the percentage of trainable parameters
    print_trainable_parameters(model)

    # Training parameters
    trainer = Trainer(
        model = model,
        train_dataset = dataset,
        args = TrainingArguments(
            per_device_train_batch_size = per_device_train_batch_size,
            gradient_accumulation_steps = gradient_accumulation_steps,
            warmup_steps = warmup_steps,
            max_steps = max_steps,
            learning_rate = learning_rate,
            fp16 = fp16,
            logging_steps = logging_steps,
            output_dir = output_dir,
            optim = optim,
        ),
        data_collator = DataCollatorForLanguageModeling(tokenizer, mlm = False)
    )

    model.config.use_cache = False

    do_train = True

    # Launch training and log metrics
    print("Training...")

    if do_train:
        train_result = trainer.train()
        metrics = train_result.metrics
        trainer.log_metrics("train", metrics)
        trainer.save_metrics("train", metrics)
        trainer.save_state()
        print(metrics)

    # Save model
    print("Saving last checkpoint of the model...")
    os.makedirs(output_dir, exist_ok = True)
    trainer.model.save_pretrained(output_dir)

    # Free memory for merging weights
    del model
    del trainer
    torch.cuda.empty_cache()

In [None]:
################################################################################
# QLoRA parameters
################################################################################

# LoRA attention dimension
lora_r = 16

# Alpha parameter for LoRA scaling
lora_alpha = 64

# Dropout probability for LoRA layers
lora_dropout = 0.1

# Bias
bias = "none"

# Task type
task_type = "CAUSAL_LM"

################################################################################
# TrainingArguments parameters
################################################################################

# Output directory where the model predictions and checkpoints will be stored
output_dir = "./results"

# Batch size per GPU for training
per_device_train_batch_size = 1

# Number of update steps to accumulate the gradients for
gradient_accumulation_steps = 4

# Initial learning rate (AdamW optimizer)
learning_rate = 2e-4

# Optimizer to use
optim = "paged_adamw_32bit"

# Number of training steps (overrides num_train_epochs)
max_steps = 20

# Linear warmup steps from 0 to learning_rate
warmup_steps = 2

# Enable fp16/bf16 training (set bf16 to True with an A100)
fp16 = True

# Log every X updates steps
logging_steps = 1

In [None]:
fine_tune(model, tokenizer, preprocessed_dataset, lora_r, lora_alpha, lora_dropout, bias, task_type, per_device_train_batch_size, gradient_accumulation_steps, warmup_steps, max_steps, learning_rate, fp16, logging_steps, output_dir, optim)

LoRA module names: ['o_proj', 'up_proj', 'down_proj', 'v_proj', 'k_proj', 'gate_proj', 'q_proj']
All Parameters: 3,540,389,888 || Trainable Parameters: 39,976,960 || Trainable Parameters %: 1.1291682911958425
Training...


You're using a LlamaTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.


Step,Training Loss
1,2.0705
2,2.2911
3,2.1535
4,2.0069
5,1.8416
6,1.4761
7,1.5271
8,1.3823
9,1.3414
10,1.5509


***** train metrics *****
  epoch                    =       0.06
  total_flos               =  1221000GF
  train_loss               =     1.5512
  train_runtime            = 0:06:38.37
  train_samples_per_second =      0.201
  train_steps_per_second   =       0.05
{'train_runtime': 398.3762, 'train_samples_per_second': 0.201, 'train_steps_per_second': 0.05, 'total_flos': 1311039556558848.0, 'train_loss': 1.5512112498283386, 'epoch': 0.06}
Saving last checkpoint of the model...


In [None]:
# Load fine-tuned weights

model = AutoPeftModelForCausalLM.from_pretrained(output_dir, device_map = "auto", torch_dtype = torch.bfloat16)
# Merge the LoRA layers with the base model
model = model.merge_and_unload()

# Save fine-tuned model at a new location
output_merged_dir = "results/email_classification_llama2_7b/final_merged_checkpoint"
os.makedirs(output_merged_dir, exist_ok = True)
model.save_pretrained(output_merged_dir, safe_serialization = True)

# Save tokenizer for easy inference
tokenizer = AutoTokenizer.from_pretrained(model_name)
tokenizer.save_pretrained(output_merged_dir)

# Fine-tuned model name on Hugging Face Hub
new_model = "neelblabla/email-classification-llama2-7b-peft"

# Push fine-tuned model and tokenizer to Hugging Face Hub
model.push_to_hub(new_model, use_auth_token = True)
tokenizer.push_to_hub(new_model, use_auth_token = True)