In [None]:
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline, TextStreamer, QuantoConfig
import torch
from dotenv import load_dotenv
from datasets import load_dataset

data = load_dataset("squad", split="train")
load_dotenv("../.env")

base_model_path = "meta-llama/Llama-3.2-1B-Instruct"
tokenizer = AutoTokenizer.from_pretrained(base_model_path)
if tokenizer.pad_token_id is None:
    tokenizer.pad_token_id = tokenizer.eos_token_id
quant_config = QuantoConfig(weights="int8")

def fetch_model(is_quant: bool=False):
    config = dict(
        pretrained_model_name_or_path=base_model_path,
        return_dict=True,
        low_cpu_mem_usage=True,
        # torch_dtype=torch.float16,
        device_map="auto",
        trust_remote_code=True,
        quantization_config=quant_config,
    )
    if is_quant:
        quant_cfg = {
            "quantization_config": quant_config,
        }
        config = config | quant_cfg

    model = AutoModelForCausalLM.from_pretrained(**config)
    if model.config.pad_token_id is None:
        model.config.pad_token_id = model.config.eos_token_id
    return model

def get_trainable_params(model) -> None:
    """
    Get the number of trainable parameters in the model.
    """
    params = sum(p.numel() for p in model.parameters() if p.requires_grad)
    print(f"Total trainable parameters: {params}")

quant_model = fetch_model(is_quant=True)
base_model = fetch_model(is_quant=False)

In [None]:
base_model.config._name_or_path

In [None]:
pipe = pipeline(
    "text-generation",
    model=base_model,
    tokenizer=tokenizer,
    # torch_dtype=torch.float16,
    device_map="auto",
)

In [None]:
get_trainable_params(base_model)

In [None]:
get_trainable_params(quant_model)

In [None]:
from peft import (
    get_peft_model,
    prepare_model_for_kbit_training,
    LoraConfig
)

lora_config = LoraConfig(
    r=16,
    lora_alpha=16,
    lora_dropout=0,
    target_modules=[
        "q_proj","k_proj","v_proj",
        "o_proj", "gate_proj", "up_proj",
        "down_proj"
    ],
    bias="none",
    task_type="CAUSAL_LM",
)

lora_base_model = prepare_model_for_kbit_training(base_model)
lora_base_model = get_peft_model(lora_base_model, lora_config)

In [None]:
get_trainable_params(lora_base_model)

In [None]:
get_trainable_params(quant_model)

In [None]:
11272192/262735872

In [None]:
messages = [{"role": "user", "content": "Who is Vincent van Gogh?"}]

prompt = tokenizer.apply_chat_template(
    messages, tokenize=False, add_generation_prompt=True
)

outputs = pipe(prompt, max_new_tokens=120, do_sample=True)

print(outputs[0]["generated_text"])

In [None]:
from IPython.display import Markdown, display

messages = [
    {
        "role": "system",
        "content": "You are a skilled Python developer specializing in database management and optimization.",
    },
    {
        "role": "user",
        "content": "I'm experiencing a sorting issue in my database. Could you please provide Python code to help resolve this problem?",
    },
]

prompt = tokenizer.apply_chat_template(
    messages, tokenize=False, add_generation_prompt=True
)

outputs = pipe(prompt, max_new_tokens=512, do_sample=True)

display(
    Markdown(
        outputs[0]["generated_text"].split(
            "<|start_header_id|>assistant<|end_header_id|>"
        )[1]
    )
)

In [None]:
data["train"][0]

In [None]:
def apply_chat_template(example):
    answer = example['answers']['text']
    messages = [
        {"role": "user", "content": example['question']},
        {"role": "assistant", "content": answer}
    ]
    prompt = tokenizer.apply_chat_template(
        messages, tokenize=False, add_generation_prompt=True
    )
    return {"prompt": prompt}

templated_dataset = data.map(apply_chat_template)

In [None]:
from transformers import PreTrainedTokenizerFast


def apply_chat_template(
        input_dataset: dict, tokeniser: PreTrainedTokenizerFast
) -> dict:
    """Preprocess the input dataset to fit expected format.

    See: https://huggingface.co/docs/trl/en/sft_trainer for details.
    """
    answer = input_dataset['answers']['text']
    message_template = [
        {"role": "system", "content": input_dataset['context']},
        {"role": "user", "content": input_dataset['question'][0]},
        {"role": "assistant", "content": answer}
    ]
    # messages = tokeniser.apply_chat_template(
    #     message_template, tokenize=False, add_generation_prompt=True
    # )
    return {"messages": messages}

def apply_message_template(
    input_dataset: dict
) -> dict:
    """Preprocess the input dataset to fit expected format.

    See: https://huggingface.co/docs/trl/en/sft_trainer for details.
    """
    answer = input_dataset['answers']['text']
    if isinstance(input_dataset['answers']['text'], list):
        answer = input_dataset['answers']['text'][0]
    message_template = [
        {"role": "system", "content": input_dataset['context']},
        {"role": "user", "content": input_dataset['question']},
        {"role": "assistant", "content": answer}
    ]
    return {"messages": message_template}

templated_dataset = data.map(apply_message_template)

In [None]:
templated_dataset[0]

In [None]:
# tokenise the prompts
def tokenize_function(example):
    tokens = tokenizer(
        example['prompt'],
        padding="max_length",
        truncation=True,
        max_length=128
    )
    # Set padding token labels to -100 to ignore them in loss calculation
    tokens['labels'] = [
        -100 if token == tokenizer.pad_token_id else token for token in tokens['input_ids']
    ]
    return tokens

tokenised_dataset = templated_dataset.map(tokenize_function)
tokenised_dataset = tokenised_dataset.remove_columns(["title", "context", "question", "answers", "prompt"])

In [None]:
# # https://www.mercity.ai/blog-post/guide-to-fine-tuning-llms-with-lora-and-qlora
# # https://huggingface.co/docs/diffusers/en/quantization/quanto
# for name, module in model.named_modules(): # https://www.datacamp.com/tutorial/fine-tuning-llama-3-2
#     print(f"name: {name}")
#     print(f"module: {module}")


In [None]:
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, Trainer, TrainingArguments
from datasets import load_dataset
from trl import SFTConfig, SFTTrainer
lora_base_model.train()
training_args = SFTConfig(
    output_dir = "cp",
    per_device_train_batch_size = 8,
    gradient_accumulation_steps = 8,
    per_device_eval_batch_size = 8,
    eval_accumulation_steps = 8,
    # optim = "paged_adamw_32bit",
    save_steps = 10,
    logging_steps = 10,
    learning_rate = 5e-5,
    max_grad_norm = 0.3,
    max_steps = 50,
    warmup_ratio = 0.03,
    eval_strategy="steps",
    lr_scheduler_type = "linear",
)
trainer = SFTTrainer(
    model=lora_base_model,
    args=training_args,
    train_dataset=tokenised_dataset["train"],
)

# Initialize Trainer
# trainer = Trainer(
#     model=lora_base_model,
#     args=training_args,
#     train_dataset=tokenised_dataset["train"],
#     eval_dataset=tokenised_dataset["validation"],
#     tokenizer=tokenizer)
#
# # Train the model
# trainer.train()
#
# # Save the model and tokenizer
# trainer.save_model("./fine-tuned-model")
# tokenizer.save_pretrained("./fine-tuned-model")

In [None]:
import pandas as pd
import json

path = "raw_data/ConvFinQA/train.json"
with open(path, "r") as file:
    data = json.load(file)

In [None]:
data[0]

In [None]:
from peft import PeftModel
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from peft import PeftModel
def load_lora_model(base_model_path, adapter_path):
    # First load the base model
    tokenizer = AutoTokenizer.from_pretrained(base_model_path)
    base_model = AutoModelForCausalLM.from_pretrained(
        base_model_path,
        device_map="auto",
        trust_remote_code=True
    )

    # Then load the LoRA adapter weights
    fine_tuned_model = PeftModel.from_pretrained(base_model, adapter_path)
    return fine_tuned_model, base_model, tokenizer
model_path = "../cp/checkpoint-60"  # Your output directory from training
base_model_path = "meta-llama/Llama-3.2-3B-Instruct"
ft_model, base_model, tokeniser = load_lora_model(base_model_path, model_path)

In [None]:
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
from datasets import load_dataset
val_data = load_dataset("squad")

ft_pipe = pipeline(
    "text-generation",
    model=ft_model,
    tokenizer=tokeniser,
    # torch_dtype=torch.float16,
    device_map="auto",
)

base_pipe = pipeline(
    "text-generation",
    model=base_model,
    tokenizer=tokeniser,
    # torch_dtype=torch.float16,
    device_map="auto",
)

In [None]:
val_data["train"][1]

In [None]:

messages = [{"role": "user", "content": "What is in front of the Notre Dame Main Building?"}]

prompt = tokeniser.apply_chat_template(
    messages, tokenize=False, add_generation_prompt=True
)
with torch.no_grad():
    outputs = base_pipe(prompt, max_new_tokens=120, do_sample=True)

print(outputs[0]["generated_text"])

In [None]:
with torch.no_grad():
    outputs = ft_pipe(prompt, max_new_tokens=120, do_sample=True)

print(outputs[0]["generated_text"])

In [None]:
def extract_assistant_response(output_text):
    # Extract content after the assistant header
    if "<|start_header_id|>assistant<|end_header_id|>" in output_text:
        return output_text.split("<|start_header_id|>assistant<|end_header_id|>")[1].strip()
    return output_text

extract_assistant_response(outputs[0]["generated_text"])

In [1]:
from mlflow.models import infer_signature
import mlflow
from dotenv import load_dotenv
from transformers import AutoModelForCausalLM, AutoTokenizer, Trainer, TrainingArguments
load_dotenv("../.env")
# last_run_id = mlflow.last_active_run().info.run_id
# Save a tokenizer without padding because it is only needed for training
tokenizer_no_pad = AutoTokenizer.from_pretrained(
    "meta-llama/Llama-3.2-1B-Instruct", add_bos_token=True
)
model = AutoModelForCausalLM.from_pretrained(
    "meta-llama/Llama-3.2-1B-Instruct",
    device_map="auto",
    trust_remote_code=True,
)
mlflow.set_experiment("Llama-3.2-1B-Instruct")
with mlflow.start_run():
    # mlflow.log_params(self.lora_config.to_dict())
    mlflow.transformers.log_model(
        transformers_model=dict(
            model=model, tokenizer=tokenizer_no_pad
        ),
        artifact_path="model",
        signature=infer_signature(
            model_input={
                "role": "user",
                "content": "What is in front of the Notre Dame Main Building?",
            },
        ),
    )

  from .autonotebook import tqdm as notebook_tqdm
python-dotenv could not parse statement starting at line 2
2025/05/08 16:53:52 INFO mlflow.tracking.fluent: Experiment with name 'Llama-3.2-1B-Instruct' does not exist. Creating a new experiment.
Device set to use mps


🏃 View run bedecked-pug-295 at: http://127.0.0.1:8080/#/experiments/951531889965927899/runs/a283232fc9284556b2ae75053b0318fb
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/951531889965927899


In [8]:
from datasets import load_dataset
val_data = load_dataset("squad")

In [12]:
val_data["validation"][:100]

{'id': ['56be4db0acb8001400a502ec',
  '56be4db0acb8001400a502ed',
  '56be4db0acb8001400a502ee',
  '56be4db0acb8001400a502ef',
  '56be4db0acb8001400a502f0',
  '56be8e613aeaaa14008c90d1',
  '56be8e613aeaaa14008c90d2',
  '56be8e613aeaaa14008c90d3',
  '56bea9923aeaaa14008c91b9',
  '56bea9923aeaaa14008c91ba',
  '56bea9923aeaaa14008c91bb',
  '56beace93aeaaa14008c91df',
  '56beace93aeaaa14008c91e0',
  '56beace93aeaaa14008c91e1',
  '56beace93aeaaa14008c91e2',
  '56beace93aeaaa14008c91e3',
  '56bf10f43aeaaa14008c94fd',
  '56bf10f43aeaaa14008c94fe',
  '56bf10f43aeaaa14008c94ff',
  '56bf10f43aeaaa14008c9500',
  '56bf10f43aeaaa14008c9501',
  '56d20362e7d4791d009025e8',
  '56d20362e7d4791d009025e9',
  '56d20362e7d4791d009025ea',
  '56d20362e7d4791d009025eb',
  '56d600e31c85041400946eae',
  '56d600e31c85041400946eb0',
  '56d600e31c85041400946eb1',
  '56d9895ddc89441400fdb50e',
  '56d9895ddc89441400fdb510',
  '56be4e1facb8001400a502f6',
  '56be4e1facb8001400a502f9',
  '56be4e1facb8001400a502fa',
  '5

In [20]:
import pandas as pd

df = pd.DataFrame(val_data["validation"][:100])
answers = pd.json_normalize(df["answers"])
df['answers'] = answers['text']
df

Unnamed: 0,id,title,context,question,answers
0,56be4db0acb8001400a502ec,Super_Bowl_50,Super Bowl 50 was an American football game to...,Which NFL team represented the AFC at Super Bo...,"[Denver Broncos, Denver Broncos, Denver Broncos]"
1,56be4db0acb8001400a502ed,Super_Bowl_50,Super Bowl 50 was an American football game to...,Which NFL team represented the NFC at Super Bo...,"[Carolina Panthers, Carolina Panthers, Carolin..."
2,56be4db0acb8001400a502ee,Super_Bowl_50,Super Bowl 50 was an American football game to...,Where did Super Bowl 50 take place?,"[Santa Clara, California, Levi's Stadium, Levi..."
3,56be4db0acb8001400a502ef,Super_Bowl_50,Super Bowl 50 was an American football game to...,Which NFL team won Super Bowl 50?,"[Denver Broncos, Denver Broncos, Denver Broncos]"
4,56be4db0acb8001400a502f0,Super_Bowl_50,Super Bowl 50 was an American football game to...,What color was used to emphasize the 50th anni...,"[gold, gold, gold]"
...,...,...,...,...,...
95,56d2051ce7d4791d00902609,Super_Bowl_50,"CBS broadcast Super Bowl 50 in the U.S., and c...",How much money did a 1/2 minute commercial cost?,"[$5 million, $5 million, $5 million]"
96,56d2051ce7d4791d0090260a,Super_Bowl_50,"CBS broadcast Super Bowl 50 in the U.S., and c...",What band headlined half-time during Super Bow...,"[Coldplay, Coldplay, Coldplay]"
97,56d2051ce7d4791d0090260b,Super_Bowl_50,"CBS broadcast Super Bowl 50 in the U.S., and c...",What two artists came out with Coldplay during...,"[Beyoncé and Bruno Mars, Beyoncé and Bruno Mar..."
98,56d602631c85041400946ed8,Super_Bowl_50,"CBS broadcast Super Bowl 50 in the U.S., and c...",Who broadcast the Super Bowl on TV?,"[CBS, CBS, CBS]"


Unnamed: 0,id,title,context,question,answers
0,56be4db0acb8001400a502ec,Super_Bowl_50,Super Bowl 50 was an American football game to...,Which NFL team represented the AFC at Super Bo...,"{'text': ['Denver Broncos', 'Denver Broncos', ..."
1,56be4db0acb8001400a502ed,Super_Bowl_50,Super Bowl 50 was an American football game to...,Which NFL team represented the NFC at Super Bo...,"{'text': ['Carolina Panthers', 'Carolina Panth..."
2,56be4db0acb8001400a502ee,Super_Bowl_50,Super Bowl 50 was an American football game to...,Where did Super Bowl 50 take place?,"{'text': ['Santa Clara, California', 'Levi's S..."
3,56be4db0acb8001400a502ef,Super_Bowl_50,Super Bowl 50 was an American football game to...,Which NFL team won Super Bowl 50?,"{'text': ['Denver Broncos', 'Denver Broncos', ..."
4,56be4db0acb8001400a502f0,Super_Bowl_50,Super Bowl 50 was an American football game to...,What color was used to emphasize the 50th anni...,"{'text': ['gold', 'gold', 'gold'], 'answer_sta..."
...,...,...,...,...,...
95,56d2051ce7d4791d00902609,Super_Bowl_50,"CBS broadcast Super Bowl 50 in the U.S., and c...",How much money did a 1/2 minute commercial cost?,"{'text': ['$5 million', '$5 million', '$5 mill..."
96,56d2051ce7d4791d0090260a,Super_Bowl_50,"CBS broadcast Super Bowl 50 in the U.S., and c...",What band headlined half-time during Super Bow...,"{'text': ['Coldplay', 'Coldplay', 'Coldplay'],..."
97,56d2051ce7d4791d0090260b,Super_Bowl_50,"CBS broadcast Super Bowl 50 in the U.S., and c...",What two artists came out with Coldplay during...,"{'text': ['Beyoncé and Bruno Mars', 'Beyoncé a..."
98,56d602631c85041400946ed8,Super_Bowl_50,"CBS broadcast Super Bowl 50 in the U.S., and c...",Who broadcast the Super Bowl on TV?,"{'text': ['CBS', 'CBS', 'CBS'], 'answer_start'..."
