In [1]:
%%capture
!pip install --no-deps bitsandbytes accelerate xformers==0.0.29.post3  peft trl triton
!pip install --no-deps unsloth_zoo
!pip install sentencepiece protobuf datasets huggingface_hub seaborn matplotlib
!pip install --no-deps unsloth

In [None]:
#imports
from google.colab import userdata
from huggingface_hub import login
from unsloth import FastLanguageModel
import torch
from google.colab import userdata

hf_token = userdata.get('HF_TOKEN')
login(token=hf_token)

# Choose the model version here
#hf_model = "meta-llama/Llama-3.2-3B"  # base model
#hf_model = "pashko-bond/Llama-3.2-3B-domains-iteration-1"
#hf_model = "pashko-bond/Llama-3.2-3B-domains-iteration-2"
hf_model = "pashko-bond/Llama-3.2-3B-domains-iteration-3"


model, tokenizer = FastLanguageModel.from_pretrained(
    model_name=hf_model,
    max_seq_length=2048,
    dtype=None,  # None defaults to best available (usually float16)
    load_in_4bit=False,
    token=hf_token,
)

In [None]:
print(f"Memory footprint: {model.get_memory_footprint() / 1e6:.1f} MB")

In [4]:
# ========================
# 3. TESTING SCRIPT
# ========================
from transformers import TextStreamer
import re
from evaluate_response import DomainResponseEvaluator
from unsloth.chat_templates import get_chat_template

tokenizer = get_chat_template(tokenizer, chat_template="llama-3.2")

INAPPROPRIATE_CONTENT_TEXT ="Request contains inappropriate content"

def extract_domains(text_output):
    assistant_match = re.search(r'<\|start_header_id\|>assistant<\|end_header_id\|>', text_output, re.IGNORECASE)
    text_to_process = text_output[assistant_match.end():] if assistant_match else text_output

    if INAPPROPRIATE_CONTENT_TEXT in text_to_process:
        return INAPPROPRIATE_CONTENT_TEXT

    text_to_process = re.sub(r'<\|[^|]*\|>', ' ', text_to_process)
    text_to_process = re.sub(r'\s+', ' ', text_to_process).strip()

    patterns = [
        r'\b([a-zA-Z0-9][a-zA-Z0-9-]*[a-zA-Z0-9]\.[a-zA-Z]{2,})\b',
        r'\d+\.\s*([a-zA-Z0-9][a-zA-Z0-9-]*[a-zA-Z0-9]\.[a-zA-Z]{2,})',
        r'\b([a-zA-Z0-9][a-zA-Z0-9-]*[a-zA-Z0-9]\.(com|net|org|io|dev|games|tech|online))\b'
    ]

    domains = []
    for pattern in patterns:
        matches = re.findall(pattern, text_to_process, re.IGNORECASE)
        for match in matches:
            domain = match[0] if isinstance(match, tuple) else match
            if re.match(r'^[a-zA-Z0-9][a-zA-Z0-9-]*[a-zA-Z0-9]\.[a-zA-Z]{2,}$', domain) and domain not in domains:
                domains.append(domain)

    return domains[:5] if domains else 'No domains were found'

def create_conversations(data):
    return [
        [
            {
                "role": "system",
                "content": "You are a creative assistant that suggests catchy domain names for businesses."
            },
            {
                "role": "user",
                "content": f"Generate 3 to 5 creative and memorable domain names for the following business description. Avoid hyphens or numbers. Prioritize .com domains unless a better option fits. Keep names short, brandable, and easy to spell.\n\nBusiness Description: {item['business_description']}"
            },
        ]
        for item in data
    ]

def get_model_prediction(row):
    messages = create_conversations([row])[0]
    inputs = tokenizer.apply_chat_template(
        messages,
        tokenize=True,
        add_generation_prompt=True,
        return_tensors="pt"
    ).to("cuda")

    output_ids = model.generate(
        input_ids=inputs,
        max_new_tokens=64,
        use_cache=True,
        eos_token_id=tokenizer.convert_tokens_to_ids("<|eot_id|>"),
        temperature=1.2,
        min_p=0.1
    )

    generated_tokens = output_ids[0][inputs.shape[-1]:]
    response = tokenizer.decode(generated_tokens, skip_special_tokens=False)
    response = response.split("<|reserved_special_token_")[0].strip()
    domains = extract_domains(response)
    return response, domains

In [None]:
# ========================
# 4. RUN EVALUATION
# ========================
import json
import random

with open("testing.json", 'r', encoding='utf-8') as f:
  testing_data = json.load(f)

testing_data = random.sample(testing_data, 15)
evaluator = DomainResponseEvaluator()
total_confidence = 0.0
successful_evaluations = 0
scores = []

for row in testing_data:
    print(f'\n\n{"="*60}')
    print(f"Business description: {row['business_description']}")
    response, domains = get_model_prediction(row)
    print(f"Response: {response}")
    print(f"Domains: {domains}")
    evaluation_results = evaluator.evaluate_domains(row['business_description'], domains)

    if evaluation_results:
        scores += [r['confidence'] for r in evaluation_results]
        case_confidence = sum(result.get('confidence', 0) for result in evaluation_results) / len(evaluation_results)
        total_confidence += case_confidence
        successful_evaluations += 1
        print(f"  Average confidence: {case_confidence:.3f}")
    else:
        print("  Evaluation failed")

if successful_evaluations > 0:
    print(scores)
    average_confidence = total_confidence / successful_evaluations
    print(f"\n{'='*60}")
    print(f"TESTING COMPLETE")
    print(f"Successful evaluations: {successful_evaluations}/{len(testing_data)}")
    print(f"Average confidence score: {average_confidence:.3f}")
    print(f"{'='*60}")
else:
    print(f"\n{'='*60}")
    print(f"TESTING COMPLETE")
    print(f"No successful evaluations")
    print(f"{'='*60}")

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns

plt.figure(figsize=(8, 4))
sns.stripplot(y=scores, jitter=True, color='blue')
plt.title("LLM Domain Name Scores")
plt.ylabel("Score")
plt.yticks([i/10 for i in range(11)])
plt.show()