In [1]:
# Additional references: 
# Mistral 7B: https://arxiv.org/pdf/2310.06825.pdf (Mistral.AI)
# https://www.datacamp.com/tutorial/mistral-7b-tutorial

# Import necessary libraries

import torch
from transformers import (
    AutoModelForCausalLM, 
    AutoTokenizer, 
    pipeline, 
    BitsAndBytesConfig
)
from datasets import load_dataset
from random import randint

# Define the model ID and load the tokenizer

#https://huggingface.co/docs/transformers/model_doc/auto#transformers.AutoTokenizer
model_id = 'mistralai/Mistral-7B-v0.1'
tokenizer = AutoTokenizer.from_pretrained(model_id)

######################################################################
# Configure BitsAndBytes (BnB) quantization                          #
# for advanced usage to allow model running in 4-bit precision       #
#                                                                    #
# Ref: https://huggingface.co/blog/4bit-transformers-bitsandbytes    #
#                                                                    #
# Dettmers, T., Pagnoni, A., Holtzman, A. and Zettlemoyer, L. (2023).#
# QLoRA: Efficient Finetuning of Quantized LLMs.                     #
# [online] arXiv.org. Available at: https://arxiv.org/abs/2305.14314.#
######################################################################

bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,                    # loads model in 4 bit
    bnb_4bit_use_double_quant=True,       # uses second quantization to save 
                                          # an additional 0.4 bits per parameter
    bnb_4bit_quant_type='nf4',            # normalised float 4 bit quantization
    bnb_4bit_compute_dtype=torch.bfloat16 # faster training
)

# Load the model with specified quantization config

# https://huggingface.co/docs/transformers/model_doc/auto#transformers.AutoModelForCausalLM
model = AutoModelForCausalLM.from_pretrained(
    model_id,
    device_map='auto',             # accelerate handles the device map computation
    torch_dtype=torch.bfloat16,    # better manage memory usage at the cost of performance
    quantization_config=bnb_config # bits and bytes quantization configuration
)

# Add padding tokens to the right side of the input sequence
# if it is shorter than the maximum length to avoid warnings
tokenizer.padding_side = 'right'

# Create text generation pipeline (an automated pipeline for chat)
# https://huggingface.co/docs/transformers/en/chat_templating#is-there-an-automated-pipeline-for-chat
# https://huggingface.co/docs/transformers/v4.39.2/en/main_classes/pipelines#transformers.TextGenerationPipeline
pipe = pipeline(
    'text-generation',
    model=model,
    tokenizer=tokenizer,
    torch_dtype=torch.bfloat16, 
    device_map="auto"
)

# Load dataset and select a random index from the dataset as sample to test text generation result

# https://huggingface.co/docs/datasets/en/loading#json
dataset = load_dataset(
    'json',
    data_files='../data/test_CRM_data.json',
    split='train'
)
idx = randint(0, len(dataset))

# Prepare prompt for generation and generate text based on prompt

# https://huggingface.co/docs/transformers/main/en/chat_templating
prompt = pipe.tokenizer.apply_chat_template(
    dataset[idx]["messages"][:2],
    tokenize=False,
    add_generation_prompt=True
)

##### WARNING #####
# Ignore the below warning for now as the default chat template is applicable for the given model.
# No chat template is defined for this tokenizer - using the default template for the LlamaTokenizerFast class. 
# If the default is not appropriate for your model, please set `tokenizer.chat_template` to an appropriate template. 
# See https://huggingface.co/docs/transformers/main/chat_templating for more information.
##### ####### #####

# Enabling the below print statements help understand the different ways the chat template can be implemented

# print(
#     tokenizer.decode(
#         tokenizer.apply_chat_template(
#             dataset[idx]["messages"][:2], 
#             tokenize=True, 
#             add_generation_prompt=True, 
#             return_tensors="pt"
#         )[0]
#     )
# )

# print(
#     model.generate(
#         tokenizer.apply_chat_template(
#             dataset[idx]["messages"][:2], 
#             tokenize=True, 
#             add_generation_prompt=True, 
#             return_tensors="pt"
#         ), 
#         max_new_tokens=128,
#         pad_token_id = tokenizer.eos_token_id
#     )[0]
# )

# print(
#     tokenizer.decode(
#         model.generate(
#             tokenizer.apply_chat_template(
#                 dataset[idx]["messages"][:2], 
#                 tokenize=True, 
#                 add_generation_prompt=True, 
#                 return_tensors="pt"
#             ), 
#             max_new_tokens=128,
#             pad_token_id = tokenizer.eos_token_id
#         )[0]
#     )
# )

# print(
#     pipe(
#         prompt, 
#         max_new_tokens=128, 
#         pad_token_id=tokenizer.eos_token_id
#     )[0]['generated_text']
# )

# https://huggingface.co/docs/transformers/en/main_classes/text_generation
outputs = pipe(
    prompt,
    max_new_tokens=1024,                      # maximum numbers of tokens to generate upto 4096
    do_sample=True,                           # do not use sampling
    temperature=0.95,                         # control randomness
    top_k=10,                                 # sample from most likely next tokens at each step
    top_p=0.95,                               # cumulative prob. cutoff for token selection
    eos_token_id=pipe.tokenizer.eos_token_id, # end of sequence token
    pad_token_id=pipe.tokenizer.eos_token_id  # padding token - for enabling open-ended generation
)

# Print query, context, original answer and generated answer
print(f"Query:\n{dataset[idx]['messages'][1]['content']}\n")
print(f"Context:\n{dataset[idx]['messages'][0]['content']}\n")
original_answer = dataset[idx]['messages'][2]['content'].replace("\\\\", "\\").strip().replace("\\n", "\n").rstrip('\\').replace("\\'", "'")
print(f"Original Answer:\n{original_answer}\n")
print(f"Generated Answer:\n{outputs[0]['generated_text'][len(prompt):].strip()}\n")

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Generating train split: 0 examples [00:00, ? examples/s]


No chat template is defined for this tokenizer - using the default template for the LlamaTokenizerFast class. If the default is not appropriate for your model, please set `tokenizer.chat_template` to an appropriate template. See https://huggingface.co/docs/transformers/main/chat_templating for more information.



Query:
I'd like to know the top emerging trends in product categories based on recent transactions within the last 30 days. Can you provide me with the top 3 product categories with the highest purchase counts?

Context:

You are a Python function generator. Users will ask you questions in English, 
and you will produce a Python function as answer to the question based on the provided CONTEXT.

CONTEXT:
Pandas DataFrame df containing transaction data with columns order_id, user_id, item_id, timestamp, score.
order_id takes string datatype and identifies the order.
user_id takes string datatype and identifies the customer.
item_id takes string datatype that identifies the product.
timestamp takes timestamp datatype and represents the datetimestamp of transaction.
score takes float datatype and represents the score of the transaction.
Note that a customer can make multiple transactions for a product but the customer product pair will be just one entry for each order.
Pandas DataFrame pro

In [3]:
%%capture cap

# Get the print statements of query, context, original answer and generated answer from stdout
print(f"Query:\n{dataset[idx]['messages'][1]['content']}\n")
print(f"Context:\n{dataset[idx]['messages'][0]['content']}\n")
original_answer = dataset[idx]['messages'][2]['content'].replace("\\\\", "\\").strip().replace("\\n", "\n").rstrip('\\').replace("\\'", "'")
print(f"Original Answer:\n{original_answer}\n")
print(f"Generated Answer:\n{outputs[0]['generated_text'][len(prompt):].strip()}\n")

In [9]:
# Save the captured output to a text file
with open('../data/random_untuned_model_gen_data.txt', 'w') as file:
    file.write(cap.stdout)

In [12]:
# Capturing the generated output from untuned model for all samples from dataset.
import pandas as pd

data = []

def generate_answer(dataset, idx):
    """
    Generate an answer using the given dataset and index.

    Args:
    - dataset (Dataset): pyarrow Dataset containing data for generating answers.
    - idx (int): Index of the dataset to use.

    Returns:
    - tuple: A tuple containing the query, context, original answer, and generated answer.
    """
    # Create prompt using the query and context in the dataset
    prompt = pipe.tokenizer.apply_chat_template(
        dataset[idx]["messages"][:2],
        tokenize=False,
        add_generation_prompt=True
    )
    
    # Generate answer using the prompt
    outputs = pipe(
        prompt,
        max_new_tokens=1024,                      # maximum numbers of tokens to generate upto 4096
        do_sample=True,                           # do not use sampling
        temperature=0.95,                         # control randomness
        top_k=10,                                 # sample from most likely next tokens at each step
        top_p=0.95,                               # cumulative prob. cutoff for token selection
        eos_token_id=pipe.tokenizer.eos_token_id, # end of sequence token
        pad_token_id=pipe.tokenizer.eos_token_id  # padding token - for enabling open-ended generation
    )
    
    # Extract relevant information from the dataset
    query = dataset[idx]['messages'][1]['content']
    context = dataset[idx]['messages'][0]['content']
    original_answer = dataset[idx]['messages'][2]['content'].replace("\\\\", "\\").strip().replace("\\n", "\n").rstrip('\\').replace("\\'", "'")
    
    # Extract generated answer from the outputs
    generated_answer = outputs[0]['generated_text'][len(prompt):].strip()
    
    return query, context, original_answer, generated_answer
    

# Load the dataset
dataset = load_dataset(
    'json',
    data_files='../data/test_CRM_data.json',
    split='train'
)

##### WARNING #####
# Ignore this warning for now as it may not be the best way of implementation.
# The below warning will starting coming after 10 calls.
# UserWarning: You seem to be using the pipelines sequentially on GPU. 
# In order to maximize efficiency please use a dataset
#  warnings.warn(
##### ####### #####

for idx in range(len(dataset)):
    print(f"Index: {idx} is in progress.")
    query, context, original_answer, generated_answer = generate_answer(dataset, idx)
    data.append([query, context, original_answer, generated_answer])

df = pd.DataFrame(data, columns=['Query', 'Context', 'Original_Answer', 'Generated_Answer'])
df.to_csv('../data/untuned_model_gen_data.csv', index=False)

Index: 0 is in progress.
Index: 1 is in progress.
Index: 2 is in progress.
Index: 3 is in progress.
Index: 4 is in progress.
Index: 5 is in progress.
Index: 6 is in progress.
Index: 7 is in progress.
Index: 8 is in progress.
Index: 9 is in progress.




Index: 10 is in progress.
Index: 11 is in progress.
Index: 12 is in progress.
Index: 13 is in progress.
Index: 14 is in progress.
Index: 15 is in progress.
Index: 16 is in progress.
Index: 17 is in progress.
Index: 18 is in progress.
Index: 19 is in progress.
Index: 20 is in progress.
Index: 21 is in progress.
Index: 22 is in progress.
Index: 23 is in progress.
Index: 24 is in progress.
Index: 25 is in progress.
Index: 26 is in progress.
Index: 27 is in progress.
Index: 28 is in progress.
Index: 29 is in progress.
Index: 30 is in progress.
Index: 31 is in progress.
Index: 32 is in progress.
Index: 33 is in progress.
Index: 34 is in progress.
Index: 35 is in progress.
Index: 36 is in progress.
Index: 37 is in progress.
Index: 38 is in progress.
Index: 39 is in progress.
Index: 40 is in progress.
Index: 41 is in progress.


In [1]:
from huggingface_hub import login

login(
  token="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",
)

Token has not been saved to git credential helper. Pass `add_to_git_credential=True` if you want to set the git credential as well.
Token is valid (permission: write).
Your token has been saved to /home/jovyan/.cache/huggingface/token
Login successful


In [None]:
# Import modules
import torch
from transformers import (
    AutoTokenizer, 
    AutoModelForCausalLM, 
    BitsAndBytesConfig,
    TrainingArguments
)
from trl import (
    setup_chat_format, 
    SFTTrainer
)
from peft import LoraConfig
from datasets import (
    load_dataset, 
    concatenate_datasets
)
from random import randint

model_id = "mistralai/Mistral-7B-v0.1"

bnb_config = BitsAndBytesConfig(
    load_in_4bit=True, 
    bnb_4bit_use_double_quant=True, 
    bnb_4bit_quant_type="nf4", 
    bnb_4bit_compute_dtype=torch.bfloat16
)

model = AutoModelForCausalLM.from_pretrained(
    model_id,
    device_map="auto",
    torch_dtype=torch.bfloat16,
    quantization_config=bnb_config,
    load_in_4bit=True, ### Need to test this
)
tokenizer = AutoTokenizer.from_pretrained(model_id)
tokenizer.padding_side = 'right'

model, tokenizer = setup_chat_format(model, tokenizer)

peft_config = LoraConfig(
    lora_alpha=128,
    lora_dropout=0.05,
    r=256,
    bias="none",
    target_modules="all-linear",
    task_type="CAUSAL_LM",
)

args = TrainingArguments(
    output_dir="../models/code-mistral-7b-text-to-python",
    num_train_epochs=3,
    per_device_train_batch_size=3,
    gradient_accumulation_steps=2,
    gradient_checkpointing=True,
    optim="adamw_torch_fused",
    logging_steps=10,
    save_strategy="epoch",
    learning_rate=2e-4,
    bf16=True,
    tf32=True,
    max_grad_norm=0.3,
    warmup_ratio=0.03,
    lr_scheduler_type="constant",
    push_to_hub=True,
)

dataset = load_dataset("json", data_files="../data/train_CRM_data.json", split='train')
val_dataset = load_dataset("json", data_files="../data/val_CRM_data.json", split='train')

max_seq_length = 1024

trainer = SFTTrainer(
    model=model,
    args=args,
    train_dataset=dataset,
    eval_dataset=val_dataset,
    peft_config=peft_config,
    max_seq_length=max_seq_length,
    tokenizer=tokenizer,
    packing=True,
    dataset_kwargs={
        "add_special_tokens": False,
        "append_concat_token": False,
    }
)

In [None]:
trainer.train()

trainer.save_model()

In [None]:
del model
del trainer
torch.cuda.empty_cache()

In [None]:
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline, BitsAndBytesConfig
from datasets import load_dataset
from random import randint
from peft import AutoPeftModelForCausalLM

model_id = "../models/code-mistral-7b-text-to-python"
tokenizer = AutoTokenizer.from_pretrained(model_id)

bnb_config = BitsAndBytesConfig(
    load_in_4bit=True, bnb_4bit_use_double_quant=True, bnb_4bit_quant_type="nf4", bnb_4bit_compute_dtype=torch.bfloat16
)

model = AutoPeftModelForCausalLM.from_pretrained(
    model_id,
    device_map="auto",
    torch_dtype=torch.bfloat16,
    quantization_config=bnb_config
)

tokenizer.padding_side = 'right'

pipe = pipeline("text-generation", model=model, tokenizer=tokenizer)

dataset = load_dataset("json", data_files="../data/test_CRM_data.json", split="train")
idx = randint(0, len(dataset))

prompt = pipe.tokenizer.apply_chat_template(dataset[idx]["messages"][:2], tokenize=False, add_generation_prompt=True)
outputs = pipe(prompt, max_new_tokens=256, do_sample=False, temperature=0.1, top_k=50, top_p=0.1, eos_token_id=pipe.tokenizer.eos_token_id, pad_token_id=pipe.tokenizer.pad_token_id)

print(f"Query:\n{dataset[idx]['messages'][1]['content']}")
print(f"Original Answer:\n{dataset[idx]['messages'][2]['content']}")
print(f"Generated Answer:\n{outputs[0]['generated_text'][len(prompt):].strip()}")
