In [None]:
!pip install transformers
!pip install peft
!pip install torch
!pip install datasets




Import the necessary libraries

In [None]:
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, TrainingArguments, Trainer
from peft import LoraConfig, get_peft_model
from datasets import Dataset, DatasetDict

import os
os.environ["WANDB_DISABLED"] = "true"

Log into Hugging Face with a secure token

In [None]:
from google.colab import userdata
from huggingface_hub import login

# Retrieve the Hugging Face token securely
hf_token = userdata.get("HF_TOKEN")  # Replace 'HF_TOKEN' with your secret name

if not hf_token:
    raise ValueError("Hugging Face token not found in Colab secrets.")

# Log in using the token
login(token=hf_token)
print("Successfully logged into Hugging Face!")

Successfully logged into Hugging Face!


Load the tokenizer

In [None]:
from transformers import AutoTokenizer

model_name = "meta-llama/Llama-3.2-1B-Instruct"  # Adjust to an available model
model = AutoModelForCausalLM.from_pretrained(model_name, token=os.getenv("HF_TOKEN"))

tokenizer = AutoTokenizer.from_pretrained(model_name, token=os.getenv("HF_TOKEN"))

print("Tokenizer loaded successfully!")

Tokenizer loaded successfully!


Inference function

In [None]:
def generate_response(model, tokenizer, instruction, input_text=""):
    """
    Generate a response using the fine-tuned model.

    Args:
        model: The fine-tuned model
        tokenizer: The tokenizer
        instruction: The instruction for the model
        input_text: Optional input text

    Returns:
        str: The generated response
    """
    # Format input to match training format
    prompt = f"### Instruction: {instruction}\n### Input: {input_text}\n### Response:"

    # Tokenize the prompt
    inputs = tokenizer(prompt,
                      return_tensors="pt",
                      truncation=True,
                      max_length=512,
                      add_special_tokens=True)

    # Move inputs to the same device as model
    inputs = {k: v.to(model.device) for k, v in inputs.items()}

    # Generate response
    outputs = model.generate(
        **inputs,
        max_new_tokens=100,  # Adjust based on desired response length
        num_return_sequences=1,
        temperature=0.1,     # Adjust for response creativity (0.0-1.0)
        do_sample=True,
        top_p=0.95,         # Nucleus sampling
        top_k=50,           # Top-k sampling
        pad_token_id=tokenizer.pad_token_id,
        eos_token_id=tokenizer.eos_token_id,
    )

    # Decode the response
    response = tokenizer.decode(outputs[0], skip_special_tokens=True)

    # Extract only the response part (after "### Response:")
    response_parts = response.split("### Response:")
    if len(response_parts) > 1:
        response = response_parts[1].strip()

    return response


In [None]:
def post_process_response(response):
    """
    Post-process the model's response to ensure it adheres to the desired format.

    Args:
        response (str): The raw response from the model.

    Returns:
        str: The post-processed response with required sections.
    """
    # Define required sections
    sections = ["Order Status", "Reference", "Action Taken", "Need immediate assistance?"]
    processed_response = response.strip()

    # Ensure all required sections exist
    for section in sections:
        if section not in processed_response:
            processed_response += f"\n{section}: [Details Missing]"

    return processed_response



In [None]:
# Example input
instruction = "Respond to this customer inquiry following our format"
input_text = "I haven't received my order #12345 yet. It's been a week."

# Generate response
raw_response = generate_response(model, tokenizer, instruction, input_text)

# Post-process the response
final_response = post_process_response(raw_response)

# Print the results
print("\nINSTRUCTION:")
print(instruction)
print("\nINPUT:")
print(input_text)
print("\nFINAL RESPONSE:")
print(final_response)


INSTRUCTION:
Respond to this customer inquiry following our format

INPUT:
I haven't received my order #12345 yet. It's been a week.

FINAL RESPONSE:
Thank you for contacting TechCorp Support.

Order Status: Delayed
Reference: #12345
Action Taken: Tracking update sent
Need immediate assistance? Call: 1-800-TECH-HELP
Your satisfaction is our priority.

Best regards,
TechCorp Support


In [None]:
# Check training logs/loss
print("Training logs from the last few steps:")
print(trainer.state.log_history[-5:])  # Shows the last 5 training logs

Training logs from the last few steps:
[{'train_runtime': 9.3961, 'train_samples_per_second': 4.257, 'train_steps_per_second': 1.064, 'total_flos': 119789573898240.0, 'train_loss': 2.6833833694458007, 'epoch': 10.0, 'step': 10}]
