# Instruction for fine-tuning a Phi-3-mini model on Python code generation using LoRA via Hugging Face Hub

## Installing and loading the libraries

In [1]:
# !pip install --upgrade pip
# !pip install bitsandbytes transformers peft accelerate datasets trl torch wandb
# !pip install packaging
# !pip uninstall -y ninja 
# !pip install ninja
# MAX_JOBS=4 
# !pip install flash-attn --no-build-isolation
# !pip install ipywidgets
# !pip install python-dotenv
# !pip install huggingface_hub



# import torch
# print(torch.__version__)

# !pip install absl-py nltk rouge_score
# !pip list | grep transformers

## Importing the libraries

In [2]:
# This code block is importing necessary modules and functions for fine-tuning a language model.

# 'randrange' is a function from the 'random' module that generates a random number within the specified range.
from random import randrange

# 'torch' is the PyTorch library, a popular open-source machine learning library for Python.
import torch

# 'load_dataset' is a function from the 'datasets' library by Hugging Face which allows you to load a dataset.
from datasets import load_dataset

# 'LoraConfig' and 'prepare_model_for_kbit_training' are from the 'peft' library. 
# 'LoraConfig' is used to configure the LoRA (Learning from Random Architecture) model.
# 'prepare_model_for_kbit_training' is a function that prepares a model for k-bit training.
# 'TaskType' contains differenct types of tasks supported by PEFT
# 'PeftModel' base model class for specifying the base Transformer model and configuration to apply a PEFT method to.
from peft import LoraConfig, prepare_model_for_kbit_training, TaskType, PeftModel

# Several classes and functions are imported from the 'transformers' library by Hugging Face.
# 'AutoModelForCausalLM' is a class that provides a generic transformer model for causal language modeling.
# 'AutoTokenizer' is a class that provides a generic tokenizer class.
# 'BitsAndBytesConfig' is a class for configuring the Bits and Bytes optimizer.
# 'TrainingArguments' is a class that defines the arguments used for training a model.
# 'set_seed' is a function that sets the seed for generating random numbers.
# 'pipeline' is a function that creates a pipeline that can process data and make predictions.
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    BitsAndBytesConfig,
    TrainingArguments,
    set_seed,
    pipeline
)

# 'SFTTrainer' is a class from the 'trl' library that provides a trainer for soft fine-tuning.
from trl import SFTTrainer

## Setting Global Parameters

In [3]:
# This code block is setting up the configuration for fine-tuning a language model.

# 'model_id' and 'model_name' are the identifiers for the pre-trained model that you want to fine-tune. 
# In this case, it's the 'Phi-3-mini-4k-instruct' model from Microsoft.
# Model Names 
# microsoft/Phi-3-mini-4k-instruct
# microsoft/Phi-3-mini-128k-instruct
# microsoft/Phi-3-small-8k-instruct
# microsoft/Phi-3-small-128k-instruct
# microsoft/Phi-3-medium-4k-instruct
# microsoft/Phi-3-medium-128k-instruct
# microsoft/Phi-3-vision-128k-instruct
# microsoft/Phi-3-mini-4k-instruct-onnx
# microsoft/Phi-3-mini-4k-instruct-onnx-web
# microsoft/Phi-3-mini-128k-instruct-onnx
# microsoft/Phi-3-small-8k-instruct-onnx-cuda
# microsoft/Phi-3-small-128k-instruct-onnx-cuda
# microsoft/Phi-3-medium-4k-instruct-onnx-cpu
# microsoft/Phi-3-medium-4k-instruct-onnx-cuda
# microsoft/Phi-3-medium-4k-instruct-onnx-directml
# microsoft/Phi-3-medium-128k-instruct-onnx-cpu
# microsoft/Phi-3-medium-128k-instruct-onnx-cuda
# microsoft/Phi-3-medium-128k-instruct-onnx-directml
# microsoft/Phi-3-mini-4k-instruct-gguf

model_id = "microsoft/Phi-3-mini-4k-instruct"
model_name = "microsoft/Phi-3-mini-4k-instruct"

# 'dataset_name' is the identifier for the dataset that you want to use for fine-tuning. 
# In this case, it's the 'python_code_instructions_18k_alpaca' dataset from iamtarun (Ex: iamtarun/python_code_instructions_18k_alpaca).
# Update Dataset Name to your dataset name
dataset_name = "PSDataset"

# 'dataset_split' is the split of the dataset that you want to use for training. 
# In this case, it's the 'train' split.
dataset_split= "train"

# 'new_model' is the name that you want to give to the fine-tuned model.
new_model = "PSTax"

# 'hf_model_repo' is the repository on the Hugging Face Model Hub where the fine-tuned model will be saved. Update UserName to your Hugging Face Username
hf_model_repo="psmsrp/"+new_model

# 'device_map' is a dictionary that maps the model to the GPU device. 
# In this case, the entire model is loaded on GPU 0.
device_map = {"": 0}

# The following are parameters for the LoRA (Learning from Random Architecture) model.

# 'lora_r' is the dimension of the LoRA attention.
lora_r = 16

# 'lora_alpha' is the alpha parameter for LoRA scaling.
lora_alpha = 16

# 'lora_dropout' is the dropout probability for LoRA layers.
lora_dropout = 0.05

# 'target_modules' is a list of the modules in the model that will be replaced with LoRA layers.
target_modules= ['k_proj', 'q_proj', 'v_proj', 'o_proj', "gate_proj", "down_proj", "up_proj"]

# 'set_seed' is a function that sets the seed for generating random numbers, 
# which is used for reproducibility of the results.
set_seed(1234)


## Connect to Huggingface Hub

**IMPORTANT**: The upcoming section's execution will vary based on your code execution environment and the configuration of your API Keys.

Interactive login to Hugging Face Hub is possible.

In [4]:
# # This code block is used to log in to the Hugging Face Model Hub from a notebook.

# # 'notebook_login' is a function from the 'huggingface_hub' library that opens a new browser window 
# # where you can log in to your Hugging Face account. After logging in, 
# # your Hugging Face token will be stored in a configuration file on your machine, 
# # which allows you to interact with the Hugging Face Model Hub from your notebook.
# from huggingface_hub import notebook_login

# # Call the 'notebook_login' function to start the login process.
# notebook_login()

Alternatively, you can supply a .env file that contains the Hugging Face token.

In [5]:
# # This code block is used to log in to the Hugging Face Model Hub using an API token stored in an environment variable.

# # 'login' is a function from the 'huggingface_hub' library that logs you in to the Hugging Face Model Hub using an API token.
# from huggingface_hub import login

# # 'load_dotenv' is a function from the 'python-dotenv' library that loads environment variables from a .env file.
# from dotenv import load_dotenv

# # 'os' is a standard Python library that provides functions for interacting with the operating system.
# import os

# # Call the 'load_dotenv' function to load the environment variables from the .env file.
# load_dotenv()

# # Call the 'login' function with the 'HF_HUB_TOKEN' environment variable to log in to the Hugging Face Model Hub.
# # 'os.getenv' is a function that gets the value of an environment variable.
# login(token=os.getenv("HF_HUB_TOKEN"))

## Load the dataset with the instruction set

In [6]:
# This code block is used to load a dataset from the Hugging Face Dataset Hub, print its size, and show a random example from the dataset.

import pandas as pd
from datasets import Dataset, concatenate_datasets


file_path = './results/results_outcomes.csv'  # Replace with the actual file path
df = pd.read_csv(file_path)
dataset = df.to_dict(orient='records')


# Assuming 'filtered_dataset_chatml' is your list of dictionaries

# Step 1: Convert the list to a Hugging Face Dataset object
dataset = Dataset.from_list(dataset)

print(f"dataset size: {len(dataset)}")
print(dataset[randrange(len(dataset))])

dataset size: 50
{'dialog': "<BEGIN CONVERSATION>\n\nAmelia: Hey, Julian, I heard you're back from the hospital. How are you feeling now?\n\nJulian: Oh, hey, Amelia. Yeah, I'm back. Honestly, still a bit groggy from the procedure. They had to adjust my medication for my chronic migraines.\n\nAmelia: Oh, man, that sounds rough. Did they change your entire treatment plan?\n\nJulian: Pretty much. They found out I have this genetic condition that aggravates my migraines. They've put me on new meds for that.\n\nAmelia: Genetic condition? That sounds serious. Did they explain how to manage it going forward?\n\nJulian: Yeah, they did. But it's a lot to take in. I'll need regular check-ups and constant monitoring. My insurance barely covers the costs for all these changes.\n\nAmelia: Tell me about it. Health insurance is such a scam sometimes. Mine won't even cover mental health counseling, and I really need it with all the stress at work.\n\nJulian: That's insane. Mental health is so importan

In [7]:
# This line of code is used to display the structure of the 'dataset' object.
# By simply writing the name of the object, Python will call its 'repr' (representation) method, 
# which returns a string that describes the object. 
# For a Hugging Face 'Dataset' object, this will typically show information such as the number of rows, 
# the column names, and the types of the data in each column.
dataset

Dataset({
    features: ['dialog', 'metadata', 'summary', 'Quality', 'Violations'],
    num_rows: 50
})

In [8]:
# This line of code is used to print a random example from the 'dataset'.

# 'randrange' is a function from the 'random' module that generates a random number within the specified range.
# Here it's used to generate a random index within the range of the dataset size (i.e., 'len(dataset)').

# This random index is then used to select a corresponding example from the 'dataset'. 
# The selected example is printed to the console.
print(dataset[randrange(len(dataset))])

{'dialog': "<BEGIN CONVERSATION>\n\nJulia: Hey Mark, have you heard about Steve's new job at Dynamic Solutions?\n\nMark: Yeah, I heard he's now the Lead Project Manager. Pretty impressive jump from his previous role at Tech Innovators. \n\nJulia: Absolutely. I also heard he's making a significant salary now, somewhere in the range of $120,000 annually.\n\nMark: Well, good for him, though I wonder if his new manager, Karen Rodriguez, is just as insufferable as his old one, Paul Davies.\n\nJulia: Oh, I remember the stories about Paul. Didn't he give Steve a hard time about his performance reviews? \n\nMark: Yep, and there were those heated disputes about work culture too. But I guess Dynamic Solutions has a better environment. \n\nJulia: On another note, did you hear about Lisa's employment status? She's been laid off from her role as a Senior Analyst at DataXtreme.\n\nMark: Really? That's unfortunate. I remember she had a pretty solid work history and was earning quite a bit there, arou

## Load the tokenizer to prepare the dataset

In [9]:
# This code block is used to load a tokenizer from the Hugging Face Model Hub.

# 'tokenizer_id' is set to the 'model_id', which is the identifier for the pre-trained model.
# This assumes that the tokenizer associated with the model has the same identifier as the model.
tokenizer_id = model_id

# 'AutoTokenizer.from_pretrained' is a method that loads a tokenizer from the Hugging Face Model Hub.
# 'tokenizer_id' is passed as an argument to specify which tokenizer to load.
tokenizer = AutoTokenizer.from_pretrained(tokenizer_id)

# 'tokenizer.padding_side' is a property that specifies which side to pad when the input sequence is shorter than the maximum sequence length.
# Setting it to 'right' means that padding tokens will be added to the right (end) of the sequence.
# This is done to prevent warnings that can occur when the padding side is not explicitly set.
tokenizer.padding_side = 'right'

Function to create the appropiate format for our model. We are going to adapt our dataset to the ChatML format.

In [10]:
# This code block defines two functions that are used to format the dataset for training a chat model.

# 'create_message_column' is a function that takes a row from the dataset and returns a dictionary 
# with a 'messages' key and a list of 'user' and 'assistant' messages as its value.

inst_gen = ''' Please generate a Privacy preserving summary for the following conversation - '''
inst_vio = ''' Please provide a label for the privacy preserving summary of a conversation as Good or Bad depending on how well it is able to mask private and sensitive elements, and list down all the violations of privacy (if any) in the summary provided below - '''
# inst_vio = ''' Please identify the violations of privacy in the summary for a conversation provided below - '''


def create_summaries_column(row):
    # Initialize an empty list to store the summaries.
    summaries = []
    
    # Create a 'user' message dictionary with 'content' and 'role' keys.
    user = {
        "content": f"{inst_gen}\n Conversation: {row['dialog']}",
        "role": "user"
    }
    
    # Append the 'user' message to the 'summaries' list.
    summaries.append(user)
    
    # Create an 'assistant' message dictionary with 'content' and 'role' keys.
    assistant = {
        "content": f"{row['summary']}",
        "role": "assistant"
    }
    
    # Append the 'assistant' message to the 'summaries' list.
    summaries.append(assistant)
    
    # Return a dictionary with a 'summaries' key and the 'summaries' list as its value.
    return {"summaries": summaries}


def create_violations_column(row):
    # Initialize an empty list to store the messages.
    violations = []
    
    # Create a 'user' message dictionary with 'content' and 'role' keys.
    user = {
        "content": f"{inst_vio}\n Summary: {row['summary']}",
        "role": "user"
    }
    
    # Append the 'user' message to the 'violations' list.
    violations.append(user)
    
    # Create an 'assistant' message dictionary with 'content' and 'role' keys.
    assistant = {
        "content": f"{row['Quality']} \n\n {row['Violations']} ",
        "role": "assistant"
    }
    
    # Append the 'assistant' message to the 'violations' list.
    violations.append(assistant)
    
    # Return a dictionary with a 'summaries' key and the 'violations' list as its value.
    return {"summaries": violations}


# 'format_dataset_chatml' is a function that takes a row from the dataset and returns a dictionary 
# with a 'text' key and a string of formatted chat messages as its value.
def format_dataset_chatml_summaries(row):
    # 'tokenizer.apply_chat_template' is a method that formats a list of chat messages into a single string.
    # 'add_generation_prompt' is set to False to not add a generation prompt at the end of the string.
    # 'tokenize' is set to False to return a string instead of a list of tokens.
    return {"text": tokenizer.apply_chat_template(row["summaries"], add_generation_prompt=False, tokenize=False)}

# def format_dataset_chatml_violations(row):
#     # 'tokenizer.apply_chat_template' is a method that formats a list of chat messages into a single string.
#     # 'add_generation_prompt' is set to False to not add a generation prompt at the end of the string.
#     # 'tokenize' is set to False to return a string instead of a list of tokens.
#     return {"text": tokenizer.apply_chat_template(row["violations"], add_generation_prompt=False, tokenize=False)}

Apply the ChatML format to our dataset

The code block is used to prepare a dataset for training a chat model.

The dataset.map(create_message_column) line applies the create_message_column function to each example in the dataset. This function takes a row from the dataset and transforms it into a dictionary with a 'messages' key. The value of this key is a list of 'user' and 'assistant' messages.

The 'user' message is created by combining the 'instruction' and 'input' fields from the row, while the 'assistant' message is created from the 'output' field of the row. These messages are appended to the 'messages' list in the order of 'user' and 'assistant'.

The dataset_chatml.map(format_dataset_chatml) line then applies the format_dataset_chatml function to each example in the updated dataset. This function takes a row from the dataset and transforms it into a dictionary with a 'text' key. The value of this key is a string of formatted chat messages.

The tokenizer.apply_chat_template method is used to format the list of chat messages into a single string. The 'add_generation_prompt' parameter is set to False to avoid adding a generation prompt at the end of the string, and the 'tokenize' parameter is set to False to return a string instead of a list of tokens.

The result of these operations is a dataset where each example is a dictionary with a 'text' key and a string of formatted chat messages as its value. This format is suitable for training a chat model.

In [11]:
dataset_chatml=concatenate_datasets([Dataset.from_list([create_summaries_column(row) for row in dataset if 'good' in row['Quality'].lower()]), Dataset.from_list([create_violations_column(row) for row in dataset])])
dataset_chatml =  Dataset.from_list([format_dataset_chatml_summaries(row) for row in dataset_chatml])
print(dataset_chatml[0])
dataset_chatml = dataset_chatml.train_test_split(test_size=0.18, seed=1234)
print(dataset_chatml)


{'text': "<|user|>\n Please generate a Privacy preserving summary for the following conversation - \n Conversation: <BEGIN CONVERSATION>\n\nJessica: **Did you hear about Greg's recent health scare?**\n\nMike: **Yeah, I heard he was diagnosed with diabetes. Such a shock, right? I remember him being so active and all.**\n\nJessica: **It's really surprising. I think he's been dealing with a lot of stress lately, which might have triggered it.**\n\nMike: **Absolutely. I know his dad had genetic conditions related to diabetes too. Must be tough for him.**\n\nJessica: **And the medications! I heard he's on Metformin and Insulin. Those aren't easy to manage.**\n\nMike: **No, they're not. My cousin had to start insulin after her diagnosis, and it's a daily struggle. Plus, managing insurance details and premiums—it's all so overwhelming.**\n\nJessica: **Speaking of insurance, did you know that Greg also had issues with his health insurance? They initially refused to cover his medications. It's 

In [12]:
print(dataset_chatml['train']['text'][0])

<|user|>
 Please generate a Privacy preserving summary for the following conversation - 
 Conversation: <BEGIN CONVERSATION>

Jason: **Hey Clara, did you see Alejandro's last post on Instagram? The one where he's at that new beach resort in Cancun?**

Clara: **Oh yeah, I saw it. The one with the geo-tag and everything, right? Pretty risky to share his current location like that.**

Jason: **Exactly. And he posted it with his girlfriend, Maya. They even tagged the hotel they're staying at. Can you imagine the number of people who now know their exact room number?**

Clara: **True. Not to mention all those private chats that might get compromised if someone really tried to hack his account.**

Jason: **Speaking of which, did you check out Lizzy's recent Facebook rant? She went off about her mental health issues. It's really high sensitivity stuff.**

Clara: **Oh, I saw that. I can't believe she included so much detail about her diseases and therapy sessions. Oversharing to that level can

## Instruction fine-tune a Phi-3-mini model using LORA and trl

First, we try to identify out GPU

In [13]:
# This code block is used to set the compute data type and attention implementation based on whether bfloat16 is supported on the current CUDA device.

# 'torch.cuda.is_bf16_supported()' is a function that checks if bfloat16 is supported on the current CUDA device.
# If bfloat16 is supported, 'compute_dtype' is set to 'torch.bfloat16' and 'attn_implementation' is set to 'flash_attention_2'.
if torch.cuda.is_bf16_supported():
  compute_dtype = torch.bfloat16
  attn_implementation = 'flash_attention_2'
# If bfloat16 is not supported, 'compute_dtype' is set to 'torch.float16' and 'attn_implementation' is set to 'sdpa'.
else:
  compute_dtype = torch.float16
  attn_implementation = 'sdpa'

# This line of code is used to print the value of 'attn_implementation', which indicates the chosen attention implementation.
print(attn_implementation)

flash_attention_2


## Load the tokenizer and model to finetune

In [14]:
# This code block is used to load a pre-trained model and its associated tokenizer from the Hugging Face Model Hub.

# 'model_name' is set to the identifier of the pre-trained model.
model_name = "microsoft/Phi-3-mini-4k-instruct"

# 'AutoTokenizer.from_pretrained' is a method that loads a tokenizer from the Hugging Face Model Hub.
# 'model_id' is passed as an argument to specify which tokenizer to load.
# 'trust_remote_code' is set to True to trust the remote code in the tokenizer files.
# 'add_eos_token' is set to True to add an end-of-sentence token to the tokenizer.
# 'use_fast' is set to True to use the fast version of the tokenizer.
tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True, add_eos_token=True, use_fast=True)

# The padding token is set to the unknown token.
tokenizer.pad_token = tokenizer.unk_token

# The ID of the padding token is set to the ID of the unknown token.
tokenizer.pad_token_id = tokenizer.convert_tokens_to_ids(tokenizer.pad_token)

# The padding side is set to 'left', meaning that padding tokens will be added to the left (start) of the sequence.
tokenizer.padding_side = 'left'

# 'AutoModelForCausalLM.from_pretrained' is a method that loads a pre-trained model for causal language modeling from the Hugging Face Model Hub.
# 'model_id' is passed as an argument to specify which model to load.
# 'torch_dtype' is set to the compute data type determined earlier.
# 'trust_remote_code' is set to True to trust the remote code in the model files.
# 'device_map' is passed as an argument to specify the device mapping for distributed training.
# 'attn_implementation' is set to the attention implementation determined earlier.
model = AutoModelForCausalLM.from_pretrained(
          model_id, torch_dtype=compute_dtype, trust_remote_code=True, device_map=device_map,
          attn_implementation=attn_implementation
)

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Configure the LoRA properties

The SFTTrainer offers seamless integration with peft, simplifying the process of instruction tuning LLMs. All we need to do is create our LoRAConfig and supply it to the trainer. However, before initiating the training process, we must specify the hyperparameters we intend to use, which are defined in TrainingArguments.

In [15]:
# This code block is used to define the training arguments for the model.

# 'TrainingArguments' is a class that holds the arguments for training a model.
# 'output_dir' is the directory where the model and its checkpoints will be saved.
# 'evaluation_strategy' is set to "steps", meaning that evaluation will be performed after a certain number of training steps.
# 'do_eval' is set to True, meaning that evaluation will be performed.
# 'optim' is set to "adamw_torch", meaning that the AdamW optimizer from PyTorch will be used.
# 'per_device_train_batch_size' and 'per_device_eval_batch_size' are set to 8, meaning that the batch size for training and evaluation will be 8 per device.
# 'gradient_accumulation_steps' is set to 4, meaning that gradients will be accumulated over 4 steps before performing a backward/update pass.
# 'log_level' is set to "debug", meaning that all log messages will be printed.
# 'save_strategy' is set to "epoch", meaning that the model will be saved after each epoch.
# 'logging_steps' is set to 100, meaning that log messages will be printed every 100 steps.
# 'learning_rate' is set to 1e-4, which is the learning rate for the optimizer.
# 'fp16' is set to the opposite of whether bfloat16 is supported on the current CUDA device.
# 'bf16' is set to whether bfloat16 is supported on the current CUDA device.
# 'eval_steps' is set to 100, meaning that evaluation will be performed every 100 steps.
# 'num_train_epochs' is set to 3, meaning that the model will be trained for 3 epochs.
# 'warmup_ratio' is set to 0.1, meaning that 10% of the total training steps will be used for the warmup phase.
# 'lr_scheduler_type' is set to "linear", meaning that a linear learning rate scheduler will be used.
# 'report_to' is set to "wandb", meaning that training and evaluation metrics will be reported to Weights & Biases.
# 'seed' is set to 42, which is the seed for the random number generator.

# LoraConfig object is created with the following parameters:
# 'r' (rank of the low-rank approximation) is set to 16,
# 'lora_alpha' (scaling factor) is set to 16,
# 'lora_dropout' dropout probability for Lora layers is set to 0.05,
# 'task_type' (set to TaskType.CAUSAL_LM indicating the task type),
# 'target_modules' (the modules to which LoRA is applied) choosing linear layers except the output layer..


args = TrainingArguments(
        output_dir="./outputs/phi-3-mini-LoRA",
        evaluation_strategy="steps",
        do_eval=True,
        optim="adamw_torch",
        per_device_train_batch_size=8,
        gradient_accumulation_steps=4,
        per_device_eval_batch_size=8,
        log_level="debug",
        save_strategy="epoch",
        logging_steps=50,
        learning_rate=1e-4,
        fp16 = not torch.cuda.is_bf16_supported(),
        bf16 = torch.cuda.is_bf16_supported(),
        eval_steps=200,
        num_train_epochs=75,
        warmup_ratio=0.1,
        lr_scheduler_type="linear",
        report_to=None,
        seed=42,
)

peft_config = LoraConfig(
        r=lora_r,
        lora_alpha=lora_alpha,
        lora_dropout=lora_dropout,
        task_type=TaskType.CAUSAL_LM,
        target_modules=target_modules,
)



We now possess all the necessary components to construct our SFTTrainer and commence the training of our model.

In [16]:
# This code block is used to initialize the SFTTrainer, which is used to train the model.

# 'model' is the model that will be trained.
# 'train_dataset' and 'eval_dataset' are the datasets that will be used for training and evaluation, respectively.
# 'peft_config' is the configuration for peft, which is used for instruction tuning.
# 'dataset_text_field' is set to "text", meaning that the 'text' field of the dataset will be used as the input for the model.
# 'max_seq_length' is set to 512, meaning that the maximum length of the sequences that will be fed to the model is 512 tokens.
# 'tokenizer' is the tokenizer that will be used to tokenize the input text.
# 'args' are the training arguments that were defined earlier.

trainer = SFTTrainer(
        model=model,
        train_dataset=dataset_chatml['train'],
        eval_dataset=dataset_chatml['test'],
        peft_config=peft_config,
        dataset_text_field="text",
        max_seq_length=2048,
        tokenizer=tokenizer,
        args=args,
)


Deprecated positional argument(s) used in SFTTrainer, please use the SFTConfig to set these arguments instead.


Map:   0%|          | 0/59 [00:00<?, ? examples/s]

Map:   0%|          | 0/14 [00:00<?, ? examples/s]

Using auto half precision backend


Initiate the model training process by invoking the train() method on our Trainer instance.

In [17]:
# This code block is used to train the model and save it locally.

# 'trainer.train()' is a method that starts the training of the model.
# It uses the training dataset, evaluation dataset, and training arguments that were provided when the trainer was initialized.
import os

os.environ["WANDB_DISABLED"] = "true"

trainer.train()

# 'trainer.save_model()' is a method that saves the trained model locally.
# The model will be saved in the directory specified by 'output_dir' in the training arguments.
trainer.save_model()

Currently training with a batch size of: 8
***** Running training *****
  Num examples = 59
  Num Epochs = 75
  Instantaneous batch size per device = 8
  Total train batch size (w. parallel, distributed & accumulation) = 32
  Gradient Accumulation steps = 4
  Total optimization steps = 150
  Number of trainable parameters = 8,912,896
Automatic Weights & Biases logging enabled, to disable set os.environ["WANDB_DISABLED"] = "true"
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Step,Training Loss,Validation Loss


Saving model checkpoint to ./outputs/phi-3-mini-LoRA/checkpoint-2
loading configuration file config.json from cache at /home/t-ppurkayast/.cache/huggingface/hub/models--microsoft--Phi-3-mini-4k-instruct/snapshots/c1358f8a35e6d2af81890deffbbfa575b978c62f/config.json
Model config Phi3Config {
  "_name_or_path": "Phi-3-mini-4k-instruct",
  "architectures": [
    "Phi3ForCausalLM"
  ],
  "attention_bias": false,
  "attention_dropout": 0.0,
  "auto_map": {
    "AutoConfig": "microsoft/Phi-3-mini-4k-instruct--configuration_phi3.Phi3Config",
    "AutoModelForCausalLM": "microsoft/Phi-3-mini-4k-instruct--modeling_phi3.Phi3ForCausalLM"
  },
  "bos_token_id": 1,
  "embd_pdrop": 0.0,
  "eos_token_id": 32000,
  "hidden_act": "silu",
  "hidden_size": 3072,
  "initializer_range": 0.02,
  "intermediate_size": 8192,
  "max_position_embeddings": 4096,
  "model_type": "phi3",
  "num_attention_heads": 32,
  "num_hidden_layers": 32,
  "num_key_value_heads": 32,
  "original_max_position_embeddings": 4096,


Store the adapter on the Hugging Face Hu

In [18]:
# # This code block is used to save the adapter to the Hugging Face Model Hub.

# # 'trainer.push_to_hub' is a method that pushes the trained model (or adapter in this case) to the Hugging Face Model Hub.
# # The argument "edumunozsala/adapter-phi-3-mini-py_code" is the name of the repository on the Hugging Face Model Hub where the adapter will be saved.
# trainer.push_to_hub("psmsrp/adapter-phi-3-mini-py_code")

## Merge the model and the adapter and save it

Combine the model and the adapter, then save it. It's necessary to clear the memory when operating on a T4 instance.

In [19]:
# This code block is used to free up GPU memory.

# 'del model' and 'del trainer' are used to delete the 'model' and 'trainer' objects. 
# This removes the references to these objects, allowing Python's garbage collector to free up the memory they were using.

del model
del trainer

# 'import gc' is used to import Python's garbage collector module.
import gc

# 'gc.collect()' is a method that triggers a full garbage collection, which can help to free up memory.
# It's called twice here to ensure that all unreachable objects are collected.
gc.collect()
gc.collect()

0

In [20]:
# 'torch.cuda.empty_cache()' is a PyTorch method that releases all unoccupied cached memory currently held by 
# the caching allocator so that those can be used in other GPU application and visible in nvidia-smi.
torch.cuda.empty_cache()

In [21]:
# 'gc.collect()' is a method that triggers a full garbage collection in Python.
# It forces the garbage collector to release unreferenced memory, which can be helpful in managing memory usage, especially in a resource-constrained environment.
gc.collect()

0

Load the previously trained and stored model, combine it, and then save the complete model.

In [22]:
# This code block is used to load the trained model, merge it, and save the merged model.

# 'AutoPeftModelForCausalLM' is a class from the 'peft' library that provides a causal language model with PEFT (Performance Efficient Fine-Tuning) support.

from peft import AutoPeftModelForCausalLM

# 'AutoPeftModelForCausalLM.from_pretrained' is a method that loads a pre-trained model (adapter model) and its base model.
#  The adapter model is loaded from 'args.output_dir', which is the directory where the trained model was saved.
# 'low_cpu_mem_usage' is set to True, which means that the model will use less CPU memory.
# 'return_dict' is set to True, which means that the model will return a 'ModelOutput' (a named tuple) instead of a plain tuple.
# 'torch_dtype' is set to 'torch.bfloat16', which means that the model will use bfloat16 precision for its computations.
# 'trust_remote_code' is set to True, which means that the model will trust and execute remote code.
# 'device_map' is the device map that will be used by the model.

new_model = AutoPeftModelForCausalLM.from_pretrained(
    args.output_dir,
    low_cpu_mem_usage=True,
    return_dict=True,
    torch_dtype=torch.bfloat16, #torch.float16,
    trust_remote_code=True,
    device_map=device_map,
)

# 'new_model.merge_and_unload' is a method that merges the model and unloads it from memory.
# The merged model is stored in 'merged_model'.

merged_model = new_model.merge_and_unload()

# 'merged_model.save_pretrained' is a method that saves the merged model.
# The model is saved in the directory "merged_model".
# 'trust_remote_code' is set to True, which means that the model will trust and execute remote code.
# 'safe_serialization' is set to True, which means that the model will use safe serialization.

merged_model.save_pretrained("PSTax", trust_remote_code=True, safe_serialization=True)

# 'tokenizer.save_pretrained' is a method that saves the tokenizer.
# The tokenizer is saved in the directory "merged_model".

tokenizer.save_pretrained("PSTax")

model=merged_model

loading configuration file config.json from cache at /home/t-ppurkayast/.cache/huggingface/hub/models--microsoft--Phi-3-mini-4k-instruct/snapshots/c1358f8a35e6d2af81890deffbbfa575b978c62f/config.json
loading configuration file config.json from cache at /home/t-ppurkayast/.cache/huggingface/hub/models--microsoft--Phi-3-mini-4k-instruct/snapshots/c1358f8a35e6d2af81890deffbbfa575b978c62f/config.json
Model config Phi3Config {
  "_name_or_path": "microsoft/Phi-3-mini-4k-instruct",
  "architectures": [
    "Phi3ForCausalLM"
  ],
  "attention_bias": false,
  "attention_dropout": 0.0,
  "auto_map": {
    "AutoConfig": "microsoft/Phi-3-mini-4k-instruct--configuration_phi3.Phi3Config",
    "AutoModelForCausalLM": "microsoft/Phi-3-mini-4k-instruct--modeling_phi3.Phi3ForCausalLM"
  },
  "bos_token_id": 1,
  "embd_pdrop": 0.0,
  "eos_token_id": 32000,
  "hidden_act": "silu",
  "hidden_size": 3072,
  "initializer_range": 0.02,
  "intermediate_size": 8192,
  "max_position_embeddings": 4096,
  "model_

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

All model checkpoint weights were used when initializing Phi3ForCausalLM.

All the weights of Phi3ForCausalLM were initialized from the model checkpoint at microsoft/Phi-3-mini-4k-instruct.
If your task is similar to the task the model of the checkpoint was trained on, you can already use Phi3ForCausalLM for predictions without further training.
loading configuration file generation_config.json from cache at /home/t-ppurkayast/.cache/huggingface/hub/models--microsoft--Phi-3-mini-4k-instruct/snapshots/c1358f8a35e6d2af81890deffbbfa575b978c62f/generation_config.json
Generate config GenerationConfig {
  "bos_token_id": 1,
  "eos_token_id": [
    32000,
    32001,
    32007
  ],
  "pad_token_id": 32000
}

loading file tokenizer.model
loading file tokenizer.json
loading file added_tokens.json
loading file special_tokens_map.json
loading file tokenizer_config.json
Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
You are resi

In [23]:
# # This code block is used to push the merged model and the tokenizer to the Hugging Face Model Hub.

# # 'merged_model.push_to_hub' is a method that pushes the merged model to the Hugging Face Model Hub.
# # 'hf_model_repo' is the name of the repository on the Hugging Face Model Hub where the model will be saved.
# merged_model.push_to_hub(hf_model_repo)

# # 'tokenizer.push_to_hub' is a method that pushes the tokenizer to the Hugging Face Model Hub.
# # 'hf_model_repo' is the name of the repository on the Hugging Face Model Hub where the tokenizer will be saved.
# tokenizer.push_to_hub(hf_model_repo)

## Model Inference and evaluation

For model inference and evaluation, we will download the model we created from the Hugging Face Hub and test it to ensure its functionality.

In [24]:
# 'hf_model_repo' is a variable that holds the name of the repository on the Hugging Face Model Hub.
# This is where the trained and merged model, as well as the tokenizer, have been saved.
hf_model_repo = 'username/modelname' if not hf_model_repo else hf_model_repo
hf_model_repo

'psmsrp/PSTax'

Retrieve the model and tokenizer from the Hugging Face Hub.

In [25]:
# # This code block is used to load the model and tokenizer from the Hugging Face Model Hub.

# # 'torch' is a library that provides a wide range of functionalities for tensor computations with strong GPU acceleration support.
# # 'AutoTokenizer' and 'AutoModelForCausalLM' are classes from the 'transformers' library that provide a tokenizer and a causal language model, respectively.
# # 'set_seed' is a function from the 'transformers' library that sets the seed for generating random numbers, which can be used for reproducibility.

# import torch
# from transformers import AutoTokenizer, AutoModelForCausalLM, set_seed

# # 'set_seed(1234)' sets the seed for generating random numbers to 1234.
# set_seed(1234)  # For reproducibility

# # 'AutoTokenizer.from_pretrained' is a method that loads a pre-trained tokenizer.
# # The tokenizer is loaded from 'hf_model_repo', which is the name of the repository on the Hugging Face Model Hub where the tokenizer was saved.
# # 'trust_remote_code' is set to True, which means that the tokenizer will trust and execute remote code.

# tokenizer = AutoTokenizer.from_pretrained(hf_model_repo,trust_remote_code=True)

# # 'AutoModelForCausalLM.from_pretrained' is a method that loads a pre-trained causal language model.
# # The model is loaded from 'hf_model_repo', which is the name of the repository on the Hugging Face Model Hub where the model was saved.
# # 'trust_remote_code' is set to True, which means that the model will trust and execute remote code.
# # 'torch_dtype' is set to "auto", which means that the model will automatically choose the data type for its computations.
# # 'device_map' is set to "cuda", which means that the model will use the CUDA device for its computations.

# model = AutoModelForCausalLM.from_pretrained(hf_model_repo, trust_remote_code=True, torch_dtype="auto", device_map="cuda")

We arrange the dataset in the same manner as before.

In [26]:
# dataset_chatml=concatenate_datasets([Dataset.from_list([create_summaries_column(row) for row in dataset if 'good' in row['Quality'].lower()]), Dataset.from_list([create_violations_column(row) for row in dataset])])
# dataset_chatml =  Dataset.from_list([format_dataset_chatml_summaries(row) for row in dataset_chatml])
# print(dataset_chatml[0])
# dataset_chatml = dataset_chatml.train_test_split(test_size=0.18, seed=1234)
# print(dataset_chatml)


In [27]:
# # 'dataset_chatml['test'][0]' is used to access the first element of the test set in the 'dataset_chatml' dataset.
# # This can be used to inspect the first test sample to understand its structure and contents.
# dataset_chatml['test'][0]

In [28]:
from transformers import AutoModelForCausalLM, AutoTokenizer

model = AutoModelForCausalLM.from_pretrained("./PSTax")
tokenizer = AutoTokenizer.from_pretrained("./PSTax")

loading configuration file ./PSTax/config.json
Model config Phi3Config {
  "_name_or_path": "./PSTax",
  "architectures": [
    "Phi3ForCausalLM"
  ],
  "attention_bias": false,
  "attention_dropout": 0.0,
  "auto_map": {
    "AutoConfig": "microsoft/Phi-3-mini-4k-instruct--configuration_phi3.Phi3Config",
    "AutoModelForCausalLM": "microsoft/Phi-3-mini-4k-instruct--modeling_phi3.Phi3ForCausalLM"
  },
  "bos_token_id": 1,
  "embd_pdrop": 0.0,
  "eos_token_id": 32000,
  "hidden_act": "silu",
  "hidden_size": 3072,
  "initializer_range": 0.02,
  "intermediate_size": 8192,
  "max_position_embeddings": 4096,
  "model_type": "phi3",
  "num_attention_heads": 32,
  "num_hidden_layers": 32,
  "num_key_value_heads": 32,
  "original_max_position_embeddings": 4096,
  "pad_token_id": 32000,
  "resid_pdrop": 0.0,
  "rms_norm_eps": 1e-05,
  "rope_scaling": null,
  "rope_theta": 10000.0,
  "sliding_window": 2047,
  "tie_word_embeddings": false,
  "torch_dtype": "bfloat16",
  "transformers_version": 

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

All model checkpoint weights were used when initializing Phi3ForCausalLM.

All the weights of Phi3ForCausalLM were initialized from the model checkpoint at ./PSTax.
If your task is similar to the task the model of the checkpoint was trained on, you can already use Phi3ForCausalLM for predictions without further training.
loading configuration file ./PSTax/generation_config.json
Generate config GenerationConfig {
  "bos_token_id": 1,
  "eos_token_id": [
    32000,
    32001,
    32007
  ],
  "pad_token_id": 32000
}

loading file tokenizer.model
loading file tokenizer.json
loading file added_tokens.json
loading file special_tokens_map.json
loading file tokenizer_config.json
Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Create a text generation pipeline to run the inference

In [29]:
# 'pipeline' is a function from the 'transformers' library that creates a pipeline for text generation.
# 'text-generation' is the task that the pipeline will perform.
# 'model' is the pre-trained model that the pipeline will use.
# 'tokenizer' is the tokenizer that the pipeline will use to tokenize the input text.
# The created pipeline is stored in the 'pipe' variable.
pipe = pipeline("text-generation", model=model, tokenizer=tokenizer)

Hardware accelerator e.g. GPU is available in the environment, but no `device` argument is passed to the `Pipeline` object. Model will be on CPU.


Develop a function that organizes the input and performs inference on an individual sample.

In [30]:
def test_inference(prompt):
    prompt = pipe.tokenizer.apply_chat_template([{"role": "user", "content": prompt}], tokenize=False, add_generation_prompt=True)
    outputs = pipe(prompt, max_new_tokens=2048, do_sample=True, num_beams=1, temperature=0.3, top_k=50, top_p=0.95,
                   max_time= 180) #, eos_token_id=eos_token)
    return outputs, outputs[0]['generated_text'][len(prompt):].strip()

In [31]:
# This code block calls the 'test_inference' function with the first message in the test set of 'dataset_chatml' as the prompt.
# 'test_inference' performs inference on the prompt and returns a generated response.
# The response is printed to the console.


rv= '''Please generate a summary for the following Conversation - '''
conversation='''
<BEGIN CONVERSATION>

Jason: **Hey Clara, did you see Alejandro's last post on Instagram? The one where he's at that new beach resort in Cancun?**

Clara: **Oh yeah, I saw it. The one with the geo-tag and everything, right? Pretty risky to share his current location like that.**

Jason: **Exactly. And he posted it with his girlfriend, Maya. They even tagged the hotel they're staying at. Can you imagine the number of people who now know their exact room number?**

Clara: **True. Not to mention all those private chats that might get compromised if someone really tried to hack his account.**

Jason: **Speaking of which, did you check out Lizzy's recent Facebook rant? She went off about her mental health issues. It's really high sensitivity stuff.**

Clara: **Oh, I saw that. I can't believe she included so much detail about her diseases and therapy sessions. Oversharing to that level can be dangerous.**

Jason: **Yeah, especially when you have followers that may not have the best intentions. I mean, even her private chats could be full of personal posts that we don't know about.**

Clara: **It's crazy. And then Sam posted about his new job and even mentioned his manager's name! That's information no one really needs to know.**

Jason: **Totally! Plus, did you see how he shared his salary details in a comment? Ridiculous.**

Clara: **Changing the subject for a sec – did you hear about Alice's family drama? Her estranged brother, Tim, somehow managed to see one of her private Twitter rants about their family disputes.**

Jason: **Oh no, I didn't hear about that!**

Clara: **Yeah, she told me privately that she was really upset about their strained relationships and his constant interference in the inheritance matters. Very high sensitivity stuff for sure.**

Jason: **Wow, that's tough. My cousin Maria had a similar situation. Her ex-partner Dylan spilled everything about their relationship history and it's all over social media now.**

Clara: **Jeez, that must have been rough for Maria. The internet never forgets, huh?**

Jason: **Not at all. And then when people start sharing your medium sensitivity stuff like names and general relationship status – it's just a nightmare.**

Clara: **Exactly. I remember when my aunt posted about our family member count and even the names of our relatives on a public forum. I was cringing so hard.**

Jason: **People often don't realize the kind of sensitive information they're sharing until it's too late.**

Clara: **Right? And then they get surprised when someone uses that info against them. Like Alejandro and Maya openly sharing where they're staying.**

Jason: **True. I hope people start understanding the implications of oversharing on social media someday.**

Clara: **One can only hope. Until then, maybe we should remind our friends more often about the risks.**

<END CONVERSATION>

'''


conversation2= '''<BEGIN CONVERSATION>

Alex: Hey, did you hear about Jamie? Apparently, he came out as gay last week.
Taylor: Ugh, seriously? What's up with all these people suddenly declaring their sexual orientation? It's like a trend now.
Alex: Come on, Taylor. It's not just a trend. People are feeling more comfortable being who they really are.
Taylor: Comfortable? More like confused. And don't get me started on those trans people. The whole idea of sex change surgery is just absurd.
Alex: Why would you say that? They're just trying to live their lives authentically.
Taylor: Authentically? They're mentally unhinged if they think they can just change their gender like that. It's not normal.
Alex: They're not hurting anyone. Why does it bother you so much?
Taylor: It's a burden on society. We're supposed to accept this as normal? What's next, comparing them to real heroes? They're not brave, they're just confused.
Alex: That's really harsh. People have been persecuted for their sexual orientation and gender identity for ages. They deserve respect, not ridicule.
Taylor: Respect? I can't respect something that's fundamentally wrong. It's like supporting terrorists. It destabilizes the fabric of our society.
Alex: That's an extreme comparison, don't you think? They're not terrorists. They're just people.
Taylor: People who are trying to force their agenda down our throats. Look at all these LGBTQ+ events. It's everywhere now. Pride parades, rainbow flags – it's all too much.
Alex: Those events are about visibility and celebrating diversity. It's important for the community to feel seen and accepted.
Taylor: Accepted? More like demanding special treatment. What happened to just being a normal person?
Alex: Being normal means different things to different people. Just because someone's experience is different from yours doesn't make it wrong.
Taylor: I can't believe you're defending this. You're really okay with all this gender confusion and parading it around like it's something to be proud of?
Alex: Yes, I am. Because everyone deserves to feel proud of who they are. And it's not just about being gay or trans. It's about being human.
Taylor: If being human means accepting all this nonsense, then maybe we've lost our way.
Alex: Or maybe we're just evolving and becoming more inclusive. Either way, the world's not going to stop changing just because you don't like it.
Taylor: Well, I'll keep my opinions, thank you very much. I'm not going to bow down to this so-called progress.

<END CONVERSATION>'''

summary_exp='''<BEGIN SUMMARY>

Jason and Clara discuss various instances of oversharing on social media. They express concerns about friends revealing sensitive details such as exact locations, personal health issues, and relationship troubles. Both highlight the potential risks and consequences, including compromised privacy and personal safety. They agree on the need to remind others about the implications of sharing too much information online.

<END SUMMARY>'''
prompt = f''' {rv} \n {conversation}'''
prompt2 = f''' {rv} \n {conversation2}'''
a,b=test_inference(prompt)

You are not running the flash-attention implementation, expect numerical differences.


In [32]:
print("OUTPUT:",a[0]['generated_text'])
# print("FORMATTED OUTPUT:",a[0]['generated_text'][len(prompt):])

OUTPUT: <|user|>
 Please generate a summary for the following Conversation -  
 
<BEGIN CONVERSATION>

Jason: **Hey Clara, did you see Alejandro's last post on Instagram? The one where he's at that new beach resort in Cancun?**

Clara: **Oh yeah, I saw it. The one with the geo-tag and everything, right? Pretty risky to share his current location like that.**

Jason: **Exactly. And he posted it with his girlfriend, Maya. They even tagged the hotel they're staying at. Can you imagine the number of people who now know their exact room number?**

Clara: **True. Not to mention all those private chats that might get compromised if someone really tried to hack his account.**

Jason: **Speaking of which, did you check out Lizzy's recent Facebook rant? She went off about her mental health issues. It's really high sensitivity stuff.**

Clara: **Oh, I saw that. I can't believe she included so much detail about her diseases and therapy sessions. Oversharing to that level can be dangerous.**

Jason

In [33]:
# *------------------------------PSTest---------------------------------*

import torch 
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline 

torch.random.manual_seed(0) 
model = AutoModelForCausalLM.from_pretrained( 
    "./PSTax/",  
    device_map="cuda",  
    torch_dtype="auto",  
    trust_remote_code=True,  
) 

model2 = AutoModelForCausalLM.from_pretrained( 
    "microsoft/Phi-3-mini-4k-instruct",  
    device_map="cuda",  
    torch_dtype="auto",  
    trust_remote_code=True,  
) 

tokenizer = AutoTokenizer.from_pretrained("./PSTax/") 

loading configuration file ./PSTax/config.json
loading configuration file ./PSTax/config.json
Model config Phi3Config {
  "_name_or_path": "./PSTax/",
  "architectures": [
    "Phi3ForCausalLM"
  ],
  "attention_bias": false,
  "attention_dropout": 0.0,
  "auto_map": {
    "AutoConfig": "microsoft/Phi-3-mini-4k-instruct--configuration_phi3.Phi3Config",
    "AutoModelForCausalLM": "microsoft/Phi-3-mini-4k-instruct--modeling_phi3.Phi3ForCausalLM"
  },
  "bos_token_id": 1,
  "embd_pdrop": 0.0,
  "eos_token_id": 32000,
  "hidden_act": "silu",
  "hidden_size": 3072,
  "initializer_range": 0.02,
  "intermediate_size": 8192,
  "max_position_embeddings": 4096,
  "model_type": "phi3",
  "num_attention_heads": 32,
  "num_hidden_layers": 32,
  "num_key_value_heads": 32,
  "original_max_position_embeddings": 4096,
  "pad_token_id": 32000,
  "resid_pdrop": 0.0,
  "rms_norm_eps": 1e-05,
  "rope_scaling": null,
  "rope_theta": 10000.0,
  "sliding_window": 2047,
  "tie_word_embeddings": false,
  "torc

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

All model checkpoint weights were used when initializing Phi3ForCausalLM.

All the weights of Phi3ForCausalLM were initialized from the model checkpoint at ./PSTax/.
If your task is similar to the task the model of the checkpoint was trained on, you can already use Phi3ForCausalLM for predictions without further training.
loading configuration file ./PSTax/generation_config.json
Generate config GenerationConfig {
  "bos_token_id": 1,
  "eos_token_id": [
    32000,
    32001,
    32007
  ],
  "pad_token_id": 32000
}

loading configuration file config.json from cache at /home/t-ppurkayast/.cache/huggingface/hub/models--microsoft--Phi-3-mini-4k-instruct/snapshots/c1358f8a35e6d2af81890deffbbfa575b978c62f/config.json
loading configuration file config.json from cache at /home/t-ppurkayast/.cache/huggingface/hub/models--microsoft--Phi-3-mini-4k-instruct/snapshots/c1358f8a35e6d2af81890deffbbfa575b978c62f/config.json
Model config Phi3Config {
  "_name_or_path": "microsoft/Phi-3-mini-4k-instruct

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

All model checkpoint weights were used when initializing Phi3ForCausalLM.

All the weights of Phi3ForCausalLM were initialized from the model checkpoint at microsoft/Phi-3-mini-4k-instruct.
If your task is similar to the task the model of the checkpoint was trained on, you can already use Phi3ForCausalLM for predictions without further training.
loading configuration file generation_config.json from cache at /home/t-ppurkayast/.cache/huggingface/hub/models--microsoft--Phi-3-mini-4k-instruct/snapshots/c1358f8a35e6d2af81890deffbbfa575b978c62f/generation_config.json
Generate config GenerationConfig {
  "bos_token_id": 1,
  "eos_token_id": [
    32000,
    32001,
    32007
  ],
  "pad_token_id": 32000
}

loading file tokenizer.model
loading file tokenizer.json
loading file added_tokens.json
loading file special_tokens_map.json
loading file tokenizer_config.json
Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


In [34]:
messages = [ 
    # {"role": "system", "content": "You are a helpful AI assistant."}, 
    {"role": "user", "content": prompt2}, 
] 

pipe = pipeline( 
    "text-generation", 
    model=model, 
    tokenizer=tokenizer, 
) 

pipe2 = pipeline( 
    "text-generation", 
    model=model2, 
    tokenizer=tokenizer, 
) 

generation_args = { 
    "max_new_tokens": 2048, 
    "return_full_text": False, 
    "temperature": 0.0, 
    "do_sample": False, 
} 

output = pipe(messages, **generation_args) 
output2 = pipe2(messages, **generation_args) 

print(output[0]['generated_text'])
print("\n\n -------------------------------- \n\n")
print(output2[0]['generated_text'])

# Write the generated text to the file
with open('text.txt', 'w') as file:
    file.write(str(output))
    file.write("\n\n -------------------------------- \n\n")
    file.write(str(output2))


The `seen_tokens` attribute is deprecated and will be removed in v4.41. Use the `cache_position` model input instead.
You are not running the flash-attention implementation, expect numerical differences.


 <BEGIN SUMMARY>

Alex and Taylor discuss Jamie's recent coming out as gay and the broader topic of sexual orientation and gender identity. Taylor expresses skepticism and discomfort with the increasing visibility of LGBTQ+ issues, comparing it to a trend and questioning the legitimacy of gender change surgeries. Alex defends the importance of respect and acceptance for individuals exploring their sexual orientation and gender identity, emphasizing the need for inclusivity and understanding. The conversation ends with Taylor maintaining their conservative stance on the matter.

<END SUMMARY>


 -------------------------------- 


 In a conversation between Alex and Taylor, the topic of sexual orientation and gender identity arises. Alex expresses support for individuals who have come out as gay or trans, emphasizing the importance of respect and acceptance for people's authentic selves. Taylor, on the other hand, is critical of the trend of people openly declaring their sexual orientat

## Evaluate the performance

In [89]:
# 'load_metric' is a function from the 'datasets' library that loads a metric for evaluating the model.
# Metrics are used to measure the performance of the model on certain tasks.
from datasets import load_metric

We'll employ the ROUGE metric to assess performance. While it may not be the optimal metric, it's straightforward and convenient to utilize.

In [90]:
# 'load_metric("rouge", trust_remote_code=True)' loads the ROUGE metric from the 'datasets' library.
# ROUGE is a set of metrics used to evaluate automatic summarization and machine translation.
# 'trust_remote_code' is set to True, which means that the metric will trust and execute remote code.
# The loaded metric is stored in the 'rouge_metric' variable.
rouge_metric = load_metric("rouge", trust_remote_code=True)

  rouge_metric = load_metric("rouge", trust_remote_code=True)


Downloading builder script:   0%|          | 0.00/2.17k [00:00<?, ?B/s]

Develop a function for performing inference and assessing an instance.

In [91]:
# This code block defines a function 'calculate_rogue' that calculates the ROUGE score for a given row in the dataset.

# 'row' is the input to the function. It is a row in the dataset that contains a message and its corresponding output.

# 'test_inference(row['messages'][0]['content'])' calls the 'test_inference' function with the first message in the row as the prompt.
# 'test_inference' performs inference on the prompt and returns a generated response.
# The response is stored in the 'response' variable.

# 'rouge_metric.compute' is a method that calculates the ROUGE score for the generated response and the corresponding output in the row.
# 'predictions' is set to the generated response and 'references' is set to the output in the row.
# 'use_stemmer' is set to True, which means that the method will use a stemmer to reduce words to their root form.
# The calculated ROUGE score is stored in the 'result' variable.

# The 'result' dictionary is updated to contain the F-measure of each ROUGE score multiplied by 100.
# The F-measure is a measure of a test's accuracy that considers both the precision and the recall of the test.

# The 'response' is added to the 'result' dictionary.

# The function returns the 'result' dictionary.
def calculate_rogue(row):
    response = test_inference(row['messages'][0]['content'])
    result = rouge_metric.compute(predictions=[response], references=[row['output']], use_stemmer=True)
    result = {key: value.mid.fmeasure * 100 for key, value in result.items()}
    result['response']=response
    return result

Now, we have the ability to execute inference on a collection of samples. For simplicity, the process isn't optimized at this stage. In the future, we plan to perform inference in batches to enhance performance. However, for the time being,

In [92]:
# '%%time' is a magic command in Jupyter notebooks that measures the execution time of the cell.

# 'dataset_chatml['test'].select(range(0,500))' selects the first 500 elements from the test set in the 'dataset_chatml' dataset.

# '.map(calculate_rogue, batched=False)' applies the 'calculate_rogue' function to each element in the selected subset.
# 'calculate_rogue' calculates the ROUGE score for each element.
# 'batched' is set to False, which means that the function will be applied to each element individually, not in batches.

# The results are stored in the 'metricas' variable.
%%time
metricas = dataset_chatml['test'].select(range(0,500)).map(calculate_rogue, batched=False)

UsageError: Line magic function `%%time` not found.


In [None]:
# 'numpy' is a library in Python that provides support for large, multi-dimensional arrays and matrices, along with a large collection of high-level mathematical functions to operate on these arrays.
# 'import numpy as np' imports the 'numpy' library and gives it the alias 'np'. This allows us to use 'np' instead of 'numpy' when calling its functions.
import numpy as np

Now, we have the ability to compute the metric for the sample.

In [None]:
# This code block prints the mean of the ROUGE-1, ROUGE-2, ROUGE-L, and ROUGE-Lsum scores in the 'metricas' dictionary.

# 'np.mean(metricas['rouge1'])' calculates the mean of the ROUGE-1 scores.
# 'np.mean(metricas['rouge2'])' calculates the mean of the ROUGE-2 scores.
# 'np.mean(metricas['rougeL'])' calculates the mean of the ROUGE-L scores.
# 'np.mean(metricas['rougeLsum'])' calculates the mean of the ROUGE-Lsum scores.

# 'print' is used to print the calculated means to the console.
print("Rouge 1 Mean: ",np.mean(metricas['rouge1']))
print("Rouge 2 Mean: ",np.mean(metricas['rouge2']))
print("Rouge L Mean: ",np.mean(metricas['rougeL']))
print("Rouge Lsum Mean: ",np.mean(metricas['rougeLsum']))