# LLM Prompting
### Author: Kaushik

In [1]:
import os
import logging, warnings
from transformers import logging as hf_logging
import transformers
import torch
from huggingface_hub import login
from dotenv import load_dotenv

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
# Load hugging face token from .env file. 

# Silence transformers/TRL logs early
hf_logging.set_verbosity_error()
logging.getLogger("trl").setLevel(logging.ERROR)

# Hide specific noisy warnings
warnings.filterwarnings(
    "ignore",
    message=r".*loss_type=None.*ForCausalLMLoss.*",
    category=UserWarning,
)
warnings.filterwarnings(
    "ignore",
    message=r".*cuDNN SDPA backward got grad_output\.strides\(\) != output\.strides\(\).*",
    category=UserWarning,
)
os.environ["TQDM_NOTEBOOK"] = "0"  

# Load .env file (if present)
load_dotenv()
hf_key = os.environ.get("HUGGINGFACE_API_KEY")
if hf_key:
    login(hf_key)
else:
    raise EnvironmentError("HUGGINGFACE_API_KEY not found. Copy .env.template to .env and add your token. See Instruction.md")


In [3]:
# Load model

model_id = "meta-llama/Meta-Llama-3.1-8B-Instruct"

pipeline = transformers.pipeline(
    "text-generation",
    model=model_id,
    model_kwargs={"torch_dtype": torch.bfloat16},
    device_map="auto",
)

Loading checkpoint shards: 100%|██████████| 4/4 [00:06<00:00,  1.72s/it]


In [4]:
# A test cell for selecting a random topic, leave it to the llm to pick the topic and making the podcast.

messages = [
    {
        "role": "user",
        "content": "Your entire output must start with the exact token: HOST:\n\nIf your output contains anything before 'HOST:'—including instructions, explanations, apologies, or any part of this prompt—then your output is invalid.\n\nYour entire output must be ONLY a two-person podcast conversation between HOST and GUEST, formatted strictly as:\nHOST: …\nGUEST: …\n(continue dialogue)\n\nNo other text, instructions, or blank lines are permitted.\n\nContent Requirements\nPodcast: Adaptive AI Podcast\nTopic: Pick a interesting topic and \nGuest: Create a relevant fictional or historical expert name based on the topic\nLength: 150–200 words\nSentences short and clean for TTS.\n\nDialogue Structure (follow exactly):\nHOST: hook about topic\nHOST: podcast + host intro\nHOST: guest intro\nGUEST: brief greeting\nHOST: simple first question\nGUEST: 3–5 sentence answer\nHOST: follow-up question\nGUEST: short answer\nHOST: closing line: “Let’s get started.”\n\n---BEGIN DIALOGUE---\nHOST:"
    },
]

outputs = pipeline(
    messages,
    max_new_tokens=512,
)
raw = outputs[0]["generated_text"][-1]
print(raw['content'])

HOST: Today on the Adaptive AI Podcast, we're diving into the world of sustainable cities. Joining me is Dr. Sofia Verde, a renowned urban ecologist who's worked on projects to greenify megacities worldwide. 

GUEST: Hi, thank you for having me.

HOST: Dr. Verde, what inspired you to focus on sustainable urban development?

GUEST: I grew up in a dense metropolis and witnessed firsthand the impact of pollution and waste on local ecosystems. As I studied environmental science, I realized that cities could be designed to thrive in harmony with nature. My research focuses on integrating green spaces, efficient waste management, and renewable energy sources to create resilient urban environments.

HOST: That's fascinating. How do you see cities evolving in the next decade to become more sustainable?

GUEST: I believe we'll see a surge in urban agriculture, green roofs, and community-led initiatives to reduce waste and emissions. Cities will prioritize pedestrian-friendly infrastructure and 

In [5]:
raw = outputs[0]["generated_text"][-1]
print(raw['content'])

HOST: Today on the Adaptive AI Podcast, we're diving into the world of sustainable cities. Joining me is Dr. Sofia Verde, a renowned urban ecologist who's worked on projects to greenify megacities worldwide. 

GUEST: Hi, thank you for having me.

HOST: Dr. Verde, what inspired you to focus on sustainable urban development?

GUEST: I grew up in a dense metropolis and witnessed firsthand the impact of pollution and waste on local ecosystems. As I studied environmental science, I realized that cities could be designed to thrive in harmony with nature. My research focuses on integrating green spaces, efficient waste management, and renewable energy sources to create resilient urban environments.

HOST: That's fascinating. How do you see cities evolving in the next decade to become more sustainable?

GUEST: I believe we'll see a surge in urban agriculture, green roofs, and community-led initiatives to reduce waste and emissions. Cities will prioritize pedestrian-friendly infrastructure and 

In [10]:
# Modify prompt to extract host, guest genders

prompt_content = f"""
Your entire output MUST start with the exact token: ---METADATA---

---METADATA---
HOST_GENDER: [MALE or FEMALE]
GUEST_GENDER: [MALE or FEMALE]
GUEST_NAME: Create a relevant fictional or historical expert name based on the topic
---DIALOGUE---

Your entire output must contain a two-person podcast conversation between HOST and GUEST, formatted strictly as:
HOST: …
GUEST: …
(continue dialogue)

No other text, instructions, or blank lines are permitted AFTER the '---DIALOGUE---' line.

Content Requirements
Podcast: Adaptive AI Podcast
Topic: The reality of terraforming Mars
Length: 150–200 words

Dialogue Structure (follow exactly):
HOST: hook about topic
HOST: podcast + host intro
HOST: guest intro (MUST use the GUEST_NAME you generated)
GUEST: brief greeting
HOST: simple first question
GUEST: 3–5 sentence answer
HOST: follow-up question
GUEST: short answer
HOST: closing line: “Let’s get started.”

---BEGIN DIALOGUE (must immediately follow the '---DIALOGUE---' line)---
HOST:"""

messages = [
    {
        "role": "user",
        "content": prompt_content
    },
]

outputs = pipeline(
    messages,
    max_new_tokens=512,
)

# Extract the generated content
raw = outputs[0]["generated_text"][-1]
print(raw)


{'role': 'assistant', 'content': "---METADATA---\nHOST_GENDER: MALE\nGUEST_GENDER: FEMALE\nGUEST_NAME: Dr. Sofia Patel\n\n---DIALOGUE---\nHOST: Welcome to the Adaptive AI Podcast, I'm your host, Ryan Thompson. Today, we're diving into the fascinating topic of terraforming Mars. As we continue to explore space, the idea of making our red neighbor habitable becomes increasingly intriguing.\nHOST: I'm joined today by Dr. Sofia Patel, a renowned astrobiologist and expert in planetary habitability. Dr. Patel, welcome to the show.\nGUEST: Thank you, Ryan, it's great to be here.\nHOST: So, Dr. Patel, what are the primary challenges we face when considering terraforming Mars?\nGUEST: Well, Ryan, terraforming is a complex process that requires significant changes to Mars' atmosphere, temperature, and geological composition. We'd need to release greenhouse gases, warm the planet, and create a stable magnetosphere to protect against radiation. Not to mention, we'd have to address the issue of mak

In [11]:
import json
import re # Import the regular expression module

# Note: The 'pipeline' object/function is assumed to be defined elsewhere 
# for generating the model output.

# List of 50 Topics
topics = [
    "The Simulation Theory: Are we living in a video game?",
    "De-extinction: bringing the Woolly Mammoth back",
    "The psychology behind why we believe conspiracy theories",
    "Modern urban legends and their origins",
    "The future of commercial space tourism",
    "The enduring mystery of the Bermuda Triangle",
    "Artificial General Intelligence vs. Human Intuition",
    "The history of unbreakable codes and ciphers",
    "Biohacking and the ethics of human augmentation",
    "The cultural impact of finding alien life",
    "Deep sea exploration: What is really down there?",
    "The theoretical physics of time travel",
    "Minimalist living in a hyper-consumerist world",
    "The Mandela Effect and collective false memories",
    "The search for the Lost City of Atlantis",
    "The science and mystery of lucid dreaming",
    "The future of lab-grown meat and sustainable food",
    "Cryptozoology: The hunt for Bigfoot and Nessie",
    "The Voynich Manuscript: A book no one can read",
    "Psychological tricks used in modern advertising",
    "The history and culture of video game speedrunning",
    "How different languages shape our perception of reality",
    "The rise of digital nomads and van life",
    "The Singularity: When machines become smarter than us",
    "Investigating the reality of paranormal activity",
    "The Fermi Paradox: Where is everybody?",
    "The hidden world of the Dark Web",
    "The Golden Record aboard the Voyager probes",
    "Ancient civilizations that vanished overnight",
    "Applying Stoic philosophy to modern stress",
    "The psychology of cults and charismatic leaders",
    "Smart Cities: The metropolis of the future",
    "Sustainable architecture and vertical farming",
    "The history of espionage and cold war spy gadgets",
    "Interstellar travel: Wormholes and warp drives",
    "The weirdest medical treatments in history",
    "The art of storytelling in the age of TikTok",
    "Hypothetical survival strategies for a zombie apocalypse",
    "The mind-bending weirdness of quantum entanglement",
    "The evolutionary reason why we find things 'cute'",
    "The secret intelligence of fungi and mushrooms",
    "Preserving languages on the brink of extinction",
    "Brain-Computer Interfaces: Merging mind and machine",
    "The most audacious art heists in history",
    "The neuroscience of happiness and flow states",
    "The Great Filter theory of civilization",
    "The future of privacy in a surveillance state",
    "A history of unexplained radio signals from space",
    "The ethics of cloning pets (and people)",
    "The reality of terraforming Mars"
]

podcast_data = []

# Parsing Function
def parse_metadata_and_dialogue(text):
    """Parses the gender, name, and dialogue from the model's structured output."""
    
    # Define default values in case parsing fails
    metadata = {
        "HOST_GENDER": "UNKNOWN",
        "GUEST_GENDER": "UNKNOWN",
        "GUEST_NAME": "Unknown Guest"
    }
    dialogue = text

    try:
        # Split text into metadata block and dialogue block
        metadata_block, dialogue_block = text.split("---DIALOGUE---", 1)
        
        # Clean up the dialogue block (remove leading/trailing whitespace)
        dialogue = dialogue_block.strip()

        # Use regex to extract metadata key-value pairs from the metadata block
        # Pattern looks for a KEY: followed by any characters until the end of the line
        pattern = re.compile(r"(\w+)_GENDER:\s*(\w+)|GUEST_NAME:\s*(.*)", re.IGNORECASE)
        
        for line in metadata_block.split('\n'):
            match = pattern.search(line)
            if match:
                # Group 1: HOST/GUEST, Group 2: MALE/FEMALE
                if match.group(1) and match.group(2): 
                    key = f"{match.group(1).upper()}_GENDER"
                    metadata[key] = match.group(2).strip().upper()
                # Group 3: GUEST_NAME
                elif match.group(3):
                    metadata["GUEST_NAME"] = match.group(3).strip()

    except ValueError:
        # Handle cases where the delimiters are missing
        print("Warning: Delimiters (---DIALOGUE---) not found. Saving raw output.")
    except Exception as e:
        print(f"Error during metadata parsing: {e}. Saving raw output.")
        
    return metadata, dialogue

# Main Generation Loop 
print(f"Starting generation for {len(topics)} topics...")

for i, topic in enumerate(topics):
    
    # The prompt remains the same as it correctly requests the structured output
    prompt_content = f"""
Your entire output MUST start with the exact token: ---METADATA---

---METADATA---
HOST_GENDER: [MALE or FEMALE]
GUEST_GENDER: [MALE or FEMALE]
GUEST_NAME: Create a relevant fictional or historical expert name based on the topic
---DIALOGUE---

Your entire output must contain a two-person podcast conversation between HOST and GUEST, formatted strictly as:
HOST: …
GUEST: …
(continue dialogue)

No other text, instructions, or blank lines are permitted AFTER the '---DIALOGUE---' line.

Content Requirements
Podcast: Adaptive AI Podcast
Topic: {topic}
Length: 150–200 words

Dialogue Structure (follow exactly):
HOST: hook about topic
HOST: podcast + host intro
HOST: guest intro (MUST use the GUEST_NAME you generated)
GUEST: brief greeting
HOST: simple first question
GUEST: 3–5 sentence answer
HOST: follow-up question
GUEST: short answer
HOST: closing line: “Let’s get started.”

---BEGIN DIALOGUE (must immediately follow the '---DIALOGUE---' line)---
HOST:"""

    messages = [
        {
            "role": "user",
            "content": prompt_content
        },
    ]

    # Assuming 'pipeline' is your function call to the language model
    # Note: If this is an actual API call, error handling should be included here.
    outputs = pipeline(
        messages,
        max_new_tokens=512,
    )

    # Extract the generated content
    generated_text = outputs[0]["generated_text"][-1]['content']
    
    # Parse the metadata and dialogue
    metadata, dialogue_only = parse_metadata_and_dialogue(generated_text)
    
    entry = {
        "id": i + 1,
        "topic": topic,
        "host_gender": metadata["HOST_GENDER"],
        "guest_gender": metadata["GUEST_GENDER"],
        "guest_name": metadata["GUEST_NAME"],
        "dialogue": dialogue_only
    }
    
    podcast_data.append(entry)
    
    print(f"[{i+1}/{len(topics)}] Generated: {topic} | Host: {metadata['HOST_GENDER']}, Guest: {metadata['GUEST_GENDER']} ({metadata['GUEST_NAME']})")

# Save to JSON file
output_filename = "podcast_scripts.json"
with open(output_filename, "w", encoding="utf-8") as f:
    json.dump(podcast_data, f, indent=4, ensure_ascii=False)

print(f"\nSuccessfully saved {len(podcast_data)} conversations to {output_filename}")

Starting generation for 50 topics...
[1/50] Generated: The Simulation Theory: Are we living in a video game? | Host: MALE, Guest: MALE (Dr. Elliot Thompson)
[2/50] Generated: De-extinction: bringing the Woolly Mammoth back | Host: MALE, Guest: FEMALE (Dr. Sofia Rodriguez)
[3/50] Generated: The psychology behind why we believe conspiracy theories | Host: MALE, Guest: FEMALE (Dr. Sophia Patel)
[4/50] Generated: Modern urban legends and their origins | Host: MALE, Guest: FEMALE (Dr. Sofia Rodriguez)
[5/50] Generated: The future of commercial space tourism | Host: MALE, Guest: MALE (Dr. Elliot Thompson)
[6/50] Generated: The enduring mystery of the Bermuda Triangle | Host: MALE, Guest: FEMALE (Dr. Sophia Patel)
[7/50] Generated: Artificial General Intelligence vs. Human Intuition | Host: MALE, Guest: FEMALE (Dr. Sophia Patel)
[8/50] Generated: The history of unbreakable codes and ciphers | Host: MALE, Guest: MALE (Dr. Emilio DeCorte)
[9/50] Generated: Biohacking and the ethics of human aug