<a href="https://colab.research.google.com/github/mightyoctopus/amazon-pricer-model-open-source-fine-tuned-models/blob/main/w7_d5_test_finetuned_model.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install -q --upgrade torch==2.5.1+cu124 torchvision==0.20.1+cu124 torchaudio==2.5.1+cu124 --index-url https://download.pytorch.org/whl/cu124
!pip install -q --upgrade requests==2.32.3 bitsandbytes==0.46.0 transformers==4.48.3 accelerate==1.3.0 datasets==3.2.0 peft==0.14.0 trl==0.14.0 matplotlib wandb

In [2]:
import os
import re
import math

from google.colab import userdata
from huggingface_hub import login
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig, set_seed
from datasets import load_dataset, Dataset, DatasetDict
from peft import PeftModel

from tqdm import tqdm
from datetime import datetime
import matplotlib.pyplot as plt


In [4]:
BASE_MODEL = "meta-llama/Meta-Llama-3.1-8B"
PROJECT_NAME = "pricer"
ED_HF_USER = "ed-donner"
MO_HF_USER = "MightyOctopus"

RUN_NAME = "2024-09-13_13.04.39"
PROJECT_RUN_NAME = f"{PROJECT_NAME}-{RUN_NAME}"
REVISION = "e8d637df551603dc86cd7a1598a8f44af4d7ae36"
FINETUNED_MODEL = f"{ED_HF_USER}/{PROJECT_RUN_NAME}"

DATASET_NAME = f"MightyOctopus/amazon-pricer-dataset-v2-0"

QUANT_4_BIT = True

%matplotlib inline


# Used for writing to output in color
GREEN = "\033[92m"
YELLOW = "\033[93m"
RED = "\033[91m"
RESET = "\033[0m"
COLOR_MAP = {"red":RED, "orange": YELLOW, "green": GREEN}

In [None]:
hf_token = os.getenv("HF_TOKEN")
login(hf_token, add_to_git_credential=True)

In [7]:
dataset = load_dataset(DATASET_NAME)
train = dataset["train"]
test = dataset["test"]

In [None]:
test[0]

## Load the tokenizer and models

In [10]:
### Pick the right quantization config for preference (4 or 8 bit)
if QUANT_4_BIT:
    quant_config = BitsAndBytesConfig(
        load_in_4bit=True,
        bnb_4bit_use_double_quant=True,
        bnb_4bit_quant_type="nf4",
        bnb_4bit_compute_dtype=torch.bfloat16
    )
else:
    quant_config = BitsAndBytesConfig(
        load_in_8bit=True,
        bnb_4bit_compute_dtype=torch.bfloat16
    )

In [None]:
tokenizer = AutoTokenizer.from_pretrained(
    BASE_MODEL,
    trust_remote_code=True
)

tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "right"

base_model = AutoModelForCausalLM.from_pretrained(
    BASE_MODEL,
    quantization_config=quant_config,
    trust_remote_code=True,
    device_map="auto"
)
base_model.generation_config.pad_token_id = tokenizer.pad_token_id

### Load the fine tuned model with PEFT
if REVISION:
    fine_tuned_model = PeftModel.from_pretrained(
        base_model,
        FINETUNED_MODEL,
        revision=REVISION
    )
else:
    fine_tuned_model = PeftModel.from_pretrained(
        base_model,
        FINETUNED_MODEL
    )

print(f"Memory Footprint: {fine_tuned_model.get_memory_footprint() / 1e9:.1f} GB")

In [59]:
def extract_price(s):
    filter_phrase = "Price is $"
    if filter_phrase in s:
        content = s.split(filter_phrase)[1]
        content = content.replace(",", "")
        match = re.search(r"[-+]?\d*\.\d+|\d+", content)
        return float(match.group()) if match else 0
    return 0

In [None]:
extract_price("Price is $a fabulous 899.99 or so")

In [64]:
# 1. take an input and tokenize it by cuda
# 2. set attention mask on it
# 3. generate output
# 4. decode the output to the natural language
def model_predict(prompt):
    set_seed(42)
    inputs = tokenizer.encode(prompt, return_tensors="pt").to("cuda")
    attention_mask = torch.ones(inputs.shape, device="cuda")
    outputs = fine_tuned_model.generate(
        inputs,
        attention_mask=attention_mask,
        max_new_tokens=3,
        num_return_sequences=1
        )
    response = tokenizer.decode(outputs[0])

    return extract_price(response)
