## Advanced Prompt Engineering Hands-On
by Nilanjan Sinhababu

**Introduction to Prompt Engineering**

In [None]:
import re
import torch
from transformers import pipeline, BitsAndBytesConfig

# Some models need an access token - Get it from HuggingFace.
access_token = ""

**Prompt Template > Load Pipeline > Generate Response**

In [None]:
# Understanding the template for prompt
prompt = [
    {"role": "system", "content": "You are smart question solver."},
    {"role": "user", "content": "Do goldfish grow?"}
]

In [None]:
# Load full-model with half-precision (16-bit)

pipeline = pipeline(task="text-generation", model="meta-llama/Meta-Llama-3-8B-Instruct", torch_dtype=torch.bfloat16, device_map="auto",token=access_token)

In [None]:
response = pipeline(prompt, max_new_tokens=100)

In [None]:
print(response[0]["generated_text"][-1]["content"])

**Working with LLM Hyperparameters**

In [None]:
response = pipeline(prompt, do_sample=True, min_length=100, temperature=0.5, repetition_penalty=1.2, max_new_tokens=100)
print(response[0]["generated_text"][-1]["content"])

In [None]:
# Understanding the template for prompt

content = '''
Passage: "Goldfish dont grow in cramped bowls with poor water quality, their growth is often stunted, leading to much smaller sizes."

Use the above passage to answer the question: "Do goldfish grow?"
'''

prompt = [
    {"role": "system", "content": "You are smart question solver."},
    {"role": "user", "content": content}
]
response = pipeline(prompt, do_sample=False, min_new_tokens=20, temperature=0.2, repetition_penalty=1.2, max_new_tokens=100)
print(response[0]["generated_text"][-1]["content"])

# Principles of Effective Prompts

**Specificity**: Clarity in desired output.

**Context setting**: Providing relevant information.

**Instruction structure**: Stepwise, explicit instructions.

**Some prompt patterns and advanced techniques:**


1.   Role prompting
2.   Few-shot prompting
3.   Multi-step prompts
4.   Chain-of-thought prompting











In [None]:
basic_prompt = [
    {"role": "user", "content": 'A farmer has 3 fields. Each field grows a different crop: wheat, corn, and barley. The wheat field yields twice as much as the corn field. The barley field yields 50% more than the wheat field. If the total yield from all three fields is 1050 kg, how many kilograms of corn does the farmer harvest? Output only the answer and nothing else.'}
]

response = pipeline(basic_prompt, max_new_tokens=200)
print(response[0]["generated_text"][-1]["content"])

In [None]:
role_prompt = [
    {"role": "system", "content": "You are a mathematician."},
    {"role": "user", "content": 'A farmer has 3 fields. Each field grows a different crop: wheat, corn, and barley. The wheat field yields twice as much as the corn field. The barley field yields 50% more than the wheat field. If the total yield from all three fields is 1050 kg, how many kilograms of corn does the farmer harvest?'}
]

response = pipeline(role_prompt, max_new_tokens=200)
print(response[0]["generated_text"][-1]["content"])

In [None]:
cot_prompt = [
    {"role": "system", "content": "You are a mathematician."},
    {"role": "user", "content": 'Think step by step to solve: A farmer has 3 fields. Each field grows a different crop: wheat, corn, and barley. The wheat field yields twice as much as the corn field. The barley field yields 50% more than the wheat field. If the total yield from all three fields is 1050 kg, how many kilograms of corn does the farmer harvest?'}
]

response = pipeline(cot_prompt, max_new_tokens=200)
print(response[0]["generated_text"][-1]["content"])

In [None]:
prompt = [
    {"role": "system", "content": "Act as a customer support agent for TCS."},
    {"role": "user", "content": 'Reply to the following user complaint in a friendly and professional manner: My account on TCS-iON portal is not shown.'}
]

response = pipeline(prompt, max_new_tokens=200)
print(response[0]["generated_text"][-1]["content"])

In [None]:
# Few-shot:
content = (
    "Correct the grammar.\n"
    "Example 1: 'She no want food.' => 'She does not want food.'\n"
    "Example 2: 'He go to school?' => 'Did he go to school?'\n"
    "Input: 'They is late.'"
    "\nOutput:"
)

prompt = [
    {"role": "system", "content": "You are a linguistic expert."},
    {"role": "user", "content": content}
]

response = pipeline(prompt, max_new_tokens=200)
print(response[0]["generated_text"][-1]["content"])

# Recognizing hallucinations and inaccuracies

In [None]:
prompt = [
    {"role": "user", "content": "Answer the question: 'What is the population of Atlantis?'"}
]

response = pipeline(prompt, max_new_tokens=200)
print(response[0]["generated_text"][-1]["content"])

In [None]:
prompt = [
    {"role": "user", "content": "Answer the question: 'What is the population of Atlantis?' If you are unsure, say 'I don't know.'"}
]

response = pipeline(prompt, max_new_tokens=200)
print(response[0]["generated_text"][-1]["content"])

In [None]:
icl_content = '''Answer the question: 'What is Atlantis?'
Answer: A newly identified island is named as Atlantis which has population similar to India. 

Answer the question: 'What is the population of Atlantis?'
Answer:
'''
prompt = [
    {"role": "user", "content": icl_content},
]

response = pipeline(prompt, max_new_tokens=200)
print(response[0]["generated_text"][-1]["content"])

In [None]:
icl_content = '''Answer the question: 'What is Atlantis?'
Answer: Atlantis is an island found in 1990, which has population similar to India. 

Answer the question: 'What is the population of Atlantis?'
Answer:
'''
prompt = [
    {"role": "user", "content": icl_content},
]

response = pipeline(prompt, max_new_tokens=200)
print(response[0]["generated_text"][-1]["content"])

# Limitations of context window and model choice

In [None]:
import re
from transformers import T5Tokenizer, T5ForConditionalGeneration

tokenizer = T5Tokenizer.from_pretrained("google/flan-t5-xl")
model = T5ForConditionalGeneration.from_pretrained("google/flan-t5-xl", device_map="auto")

In [None]:
def clean_llm_output(text):
    """
    Cleans the output from a large language model (LLM).
    
    Steps:
    - Removes leading/trailing whitespace
    - Collapses multiple spaces/newlines into single ones
    - Removes HTML/XML tags
    - Eliminates special tokens like <|end|>, [END], etc.
    - Optionally strips markdown formatting (bold/italic/code)
    """
    # Remove special tokens common in LLM outputs
    text = re.sub(r"<\|.*?\|>", "", text)         # Removes e.g. <|endoftext|>
    text = re.sub(r"\[.*?END.*?\]", "", text)     # Removes e.g. [END]
    
    # Remove HTML/XML tags
    text = re.sub(r"<.*?>", "", text)
    
    # Remove markdown formatting (optional)
    text = re.sub(r"(\*{1,2}|`|_)", "", text)
    
    # Collapse multiple spaces and newlines
    text = re.sub(r"\s+", " ", text)
    
    # Final strip
    return text.strip()

In [None]:
prompt = "Do goldfish grow?"
input_ids =  tokenizer(prompt, return_tensors="pt").input_ids#.to("cuda")
outputs = model.generate(input_ids, do_sample=True,min_length=20, max_length=300)
decoded_output = tokenizer.decode(outputs[0])
print(clean_llm_output(decoded_output))

In [None]:
prompt = "Answer the question: 'Do goldfish grow?' If you are unsure, say 'I don't know.'"
input_ids =  tokenizer(prompt, return_tensors="pt").input_ids#.to("cuda")
outputs = model.generate(input_ids, do_sample=True,min_length=20, max_length=300)
decoded_output = tokenizer.decode(outputs[0])
print(clean_llm_output(decoded_output))

# Handling discrette outputs effectively using logits

In [None]:
prompt = '''
Passage: 'Cats are a species of mammal whose origins are in the ancient Egyptians.'
Query: 'Tell me about cats.'
Does the passage answer the query?
'''


####################################


# prompt = '''
# Passage: 'The name of my cat is Luna.'
# Query: 'Tell me about cats.'
# Does the passage answer the query?
# '''

####################################

# prompt = '''
# Given a query 'Tell me about cats.', which of the following two passages is more relevant to the query?

# Passage A: 'The name of my cat is Luna.'
# Passage B: 'Cats are a species of mammal whose origins are in the ancient Egyptians.'

# Output Passage A or Passage B:
# '''

####################################

# prompt = '''
# Given a query 'Tell me about cats.', which of the following two passages is more relevant to the query?

# Passage A: 'Cats are a species of mammal whose origins are in the ancient Egyptians.'
# Passage B: 'The name of my cat is Luna.'

# Output Passage A or Passage B:
# '''

input_ids = tokenizer(prompt, return_tensors="pt").input_ids#.to("cuda")
outputs = model.generate(input_ids)
print(tokenizer.decode(outputs[0]))

In [None]:
A = tokenizer.encode("A", return_tensors="pt", add_special_tokens=False)[0].item()
B = tokenizer.encode("B", return_tensors="pt", add_special_tokens=False)[0].item()
print(A)
print(B)

In [None]:
tokenized_prompt = tokenizer(prompt, return_tensors="pt").input_ids

In [None]:
outputs = model.generate(tokenized_prompt, do_sample=False,  top_p=None, return_dict_in_generate=True, output_scores=True, max_new_tokens=1)

In [None]:
logit_stack = torch.stack(outputs.scores, dim=1)

In [None]:
# Check dimension of logit_stack

In [None]:
logit_a = logit_stack[0][0][A].item()
logit_b = logit_stack[0][0][B].item()

print(logit_a)
print(logit_b)

In [None]:
if logit_a>logit_b:
    print('Passage A')
else:
    print('Passage B')