# Lab 1: Building Effective Prompts with the CRAFT Framework

In this lab, you'll learn to systematically construct high-quality prompts using the CRAFT framework. You'll measure prompt quality, build reusable templates, and visualize the difference between vague and well-structured prompts.

## Learning Objectives
- Apply the CRAFT framework systematically to any task
- Diagnose and fix flawed CRAFT prompts
- Critically evaluate prompt scoring heuristics
- Build reusable prompt templates from scratch
- Adapt prompts for different audiences and stakeholders

**Duration:** 55-65 minutes | **Difficulty:** Beginner to Intermediate

## Part 1: Understanding Prompt Components

The CRAFT framework breaks every prompt into five essential components:

- **C**ontext: Background information the AI needs to understand the situation
- **R**ole: The expert persona the AI should adopt
- **A**ction: The specific task you want performed
- **F**ormat: How the output should be structured (bullets, table, essay, etc.)
- **T**one: The voice and style of the response

Let's build a Python toolkit to construct and evaluate CRAFT prompts.

In [None]:
from dataclasses import dataclass, field
from typing import Dict, List, Optional, Tuple
import re
import textwrap
import matplotlib.pyplot as plt
import numpy as np


@dataclass
class CRAFTPrompt:
    """A structured prompt built using the CRAFT framework."""
    context: str
    role: str
    action: str
    format_spec: str
    tone: str

    def build(self) -> str:
        """Assemble components into a formatted prompt string."""
        sections = []
        if self.context:
            sections.append(f"Context: {self.context}")
        if self.role:
            sections.append(f"Role: Act as {self.role}.")
        if self.action:
            sections.append(f"Task: {self.action}")
        if self.format_spec:
            sections.append(f"Format: {self.format_spec}")
        if self.tone:
            sections.append(f"Tone: {self.tone}")
        return "\n\n".join(sections)

    def summary(self) -> str:
        """Return a one-line summary showing which fields are filled."""
        filled = []
        for label, val in [("C", self.context), ("R", self.role),
                           ("A", self.action), ("F", self.format_spec),
                           ("T", self.tone)]:
            status = "YES" if val.strip() else "---"
            filled.append(f"{label}:{status}")
        return "  |  ".join(filled)


def score_prompt(prompt_text: str) -> Dict[str, int]:
    """Score a prompt from 1-5 on five quality dimensions.

    Dimensions:
        clarity       - Is the request unambiguous?
        specificity   - Does it include concrete details?
        context       - Does it provide background information?
        format        - Does it specify desired output structure?
        actionability - Is there a clear, measurable action?

    Scoring uses keyword and pattern analysis as a lightweight heuristic.
    """
    text = prompt_text.lower()
    words = text.split()
    word_count = len(words)

    # --- Clarity ---
    clarity = 1
    if word_count >= 10:
        clarity += 1
    if any(w in text for w in ["specifically", "exactly", "precisely", "must", "should"]):
        clarity += 1
    if not any(w in text for w in ["something", "stuff", "things", "whatever", "etc"]):
        clarity += 1
    if word_count >= 25:
        clarity += 1
    clarity = min(clarity, 5)

    # --- Specificity ---
    specificity = 1
    if re.search(r'\d+', text):
        specificity += 1
    if any(w in text for w in ["example", "such as", "including", "e.g.", "for instance"]):
        specificity += 1
    specific_nouns = ["industry", "sector", "company", "product", "metric",
                      "audience", "stakeholder", "customer", "market", "region"]
    if sum(1 for w in specific_nouns if w in text) >= 2:
        specificity += 1
    if word_count >= 30:
        specificity += 1
    specificity = min(specificity, 5)

    # --- Context ---
    context_score = 1
    context_signals = ["background", "context", "situation", "scenario",
                       "currently", "our company", "the team", "we are",
                       "given that", "assuming", "based on"]
    matches = sum(1 for s in context_signals if s in text)
    context_score += min(matches, 2)
    if word_count >= 20:
        context_score += 1
    if re.search(r'(role|act as|you are|persona)', text):
        context_score += 1
    context_score = min(context_score, 5)

    # --- Format ---
    format_score = 1
    format_signals = ["bullet", "numbered", "table", "list", "heading",
                      "paragraph", "json", "csv", "markdown", "format",
                      "structure", "outline", "section", "step-by-step"]
    fmt_matches = sum(1 for s in format_signals if s in text)
    format_score += min(fmt_matches, 2)
    if re.search(r'\d+\s*(words|sentences|paragraphs|points|items|bullets)', text):
        format_score += 1
    if any(w in text for w in ["include", "exclude", "begin with", "end with"]):
        format_score += 1
    format_score = min(format_score, 5)

    # --- Actionability ---
    actionability = 1
    action_verbs = ["write", "create", "generate", "analyze", "compare",
                    "summarize", "evaluate", "list", "design", "draft",
                    "build", "develop", "produce", "identify", "recommend"]
    verb_hits = sum(1 for v in action_verbs if v in text)
    actionability += min(verb_hits, 2)
    if re.search(r'(for|targeting|aimed at)\s+\w+', text):
        actionability += 1
    if word_count >= 15:
        actionability += 1
    actionability = min(actionability, 5)

    scores = {
        "clarity": clarity,
        "specificity": specificity,
        "context": context_score,
        "format": format_score,
        "actionability": actionability,
    }

    total = sum(scores.values())
    max_total = 25

    print(f"{'Dimension':<16} {'Score':>5}")
    print("-" * 23)
    for dim, val in scores.items():
        bar = '#' * val + '.' * (5 - val)
        print(f"{dim:<16} [{bar}] {val}/5")
    print("-" * 23)
    print(f"{'TOTAL':<16} {total}/{max_total}")
    print()

    return scores


# Quick sanity check
print("=== Scoring a vague prompt ===")
score_prompt("Write about AI")

print("=== Scoring a detailed CRAFT prompt ===")
score_prompt(
    "Context: Our healthcare startup is preparing a board presentation. "
    "Role: Act as a healthcare strategy consultant with 15 years experience. "
    "Task: Write a 500-word executive summary analyzing AI adoption trends "
    "in the healthcare industry, including 3 specific examples of successful "
    "implementations. Format: Use bullet points for key findings followed by "
    "a numbered list of recommendations. Tone: Professional and data-driven."
)

## Part 2: Before & After Analysis

The real power of CRAFT becomes clear when you compare a raw, off-the-cuff prompt with its structured counterpart. Below we take three common but vague prompts, rebuild each one with CRAFT, and measure the difference.

In [None]:
# ---------- Before / After pairs ----------

before_after: List[Tuple[str, CRAFTPrompt]] = [
    # Pair 1 -- "Write about AI"
    (
        "Write about AI",
        CRAFTPrompt(
            context="Our hospital network is evaluating AI tools to reduce "
                    "diagnostic errors. The audience is the clinical leadership team.",
            role="a healthcare technology analyst with expertise in clinical AI",
            action="Write a 600-word briefing on how AI-assisted diagnostics are "
                   "reducing misdiagnosis rates, including 3 specific case studies "
                   "from peer-reviewed research.",
            format_spec="Start with a 2-sentence executive summary, then use "
                        "bullet points for each case study, and end with a "
                        "numbered list of 3 recommendations.",
            tone="Professional, evidence-based, and persuasive"
        )
    ),
    # Pair 2 -- "Summarize this"
    (
        "Summarize this",
        CRAFTPrompt(
            context="The attached 40-page quarterly earnings report covers "
                    "revenue, operating costs, and forward guidance for Q3 2025.",
            role="a senior financial analyst preparing materials for the CFO",
            action="Summarize the report into an executive briefing that "
                   "highlights the 5 most critical data points, identifies "
                   "2 risks, and notes any upward or downward trends.",
            format_spec="Use a structured format: one heading per section, "
                        "bullet points for data, and a comparison table for "
                        "quarter-over-quarter metrics. Keep it under 300 words.",
            tone="Concise, analytical, and neutral"
        )
    ),
    # Pair 3 -- "Help with email"
    (
        "Help with email",
        CRAFTPrompt(
            context="We delivered a software demo to a prospective enterprise "
                    "client last Tuesday. They asked about SSO integration "
                    "and data residency in the EU. We need to follow up "
                    "within 48 hours.",
            role="an enterprise account executive at a B2B SaaS company",
            action="Draft a follow-up email that thanks the client for their "
                   "time, directly addresses their SSO and data-residency "
                   "questions, and proposes a 30-minute technical deep-dive "
                   "call next week.",
            format_spec="Subject line + body. Body should have a greeting, "
                        "3 short paragraphs, and a clear call-to-action with "
                        "2 proposed meeting times.",
            tone="Warm, confident, and professional"
        )
    ),
]

# ---------- Score and display comparison ----------

all_before_scores: List[Dict[str, int]] = []
all_after_scores: List[Dict[str, int]] = []

for idx, (before_text, after_craft) in enumerate(before_after, start=1):
    after_text = after_craft.build()

    print("=" * 60)
    print(f"  PAIR {idx}")
    print("=" * 60)

    print(f"\n--- BEFORE ---")
    print(f'  "{before_text}"\n')
    b_scores = score_prompt(before_text)

    print(f"--- AFTER (CRAFT) ---")
    print(textwrap.indent(after_text, "  ") + "\n")
    a_scores = score_prompt(after_text)

    improvement = sum(a_scores.values()) - sum(b_scores.values())
    print(f"  >>> Improvement: +{improvement} points\n")

    all_before_scores.append(b_scores)
    all_after_scores.append(a_scores)

# ---------- Summary table ----------
print("\n" + "=" * 60)
print("  SUMMARY TABLE")
print("=" * 60)
header = f"{'Pair':<6} {'Before':>8} {'After':>8} {'Delta':>8}"
print(header)
print("-" * len(header))
for i in range(len(before_after)):
    b_total = sum(all_before_scores[i].values())
    a_total = sum(all_after_scores[i].values())
    print(f"{i+1:<6} {b_total:>7}/25 {a_total:>7}/25 {'+' + str(a_total - b_total):>7}")

## Exercise 1: Diagnose Broken Prompts (5 min)

Below are three `CRAFTPrompt` objects that **look** complete — every field is filled in — but each has a subtle flaw that would make the prompt ineffective in practice. Your job:

1. **Run** the cell to see each prompt and its score.
2. **Identify** the specific flaw in each prompt (write it in the comment).
3. **Fix** the prompt by creating a corrected version and score it.

| Prompt | Flaw Type | Hint |
|--------|-----------|------|
| Broken 1 | Context / Role mismatch | The role doesn't match what the context needs |
| Broken 2 | Vague action | The action verb is too ambiguous to produce useful output |
| Broken 3 | Wrong tone for audience | The tone will alienate the target reader |

In [None]:
# ── Broken Prompt 1: Context / Role Mismatch ─────────────────────

broken_1 = CRAFTPrompt(
    context="Our hospital is evaluating whether to adopt an AI-powered "
            "radiology screening tool. The decision committee includes "
            "the Chief Medical Officer and head of IT.",
    role="a social media influencer specializing in lifestyle content",
    action="Write a 400-word recommendation memo evaluating the clinical "
           "accuracy, integration requirements, and ROI of the AI tool.",
    format_spec="Executive memo with sections: Summary, Clinical Evidence, "
                "Technical Requirements, Cost-Benefit Analysis.",
    tone="Professional, evidence-based, and balanced"
)

print("=== BROKEN PROMPT 1 ===")
print(broken_1.build())
print()
score_prompt(broken_1.build())

# What's wrong?
flaw_1 = ("The role is a social media influencer, but the context requires "
          "clinical/technical expertise for a hospital AI evaluation")

# Fixed version
fixed_1 = CRAFTPrompt(
    context="Our hospital is evaluating whether to adopt an AI-powered "
            "radiology screening tool. The decision committee includes "
            "the Chief Medical Officer and head of IT.",
    role="a healthcare technology consultant with expertise in clinical AI "
         "systems and hospital IT integration",
    action="Write a 400-word recommendation memo evaluating the clinical "
           "accuracy, integration requirements, and ROI of the AI tool.",
    format_spec="Executive memo with sections: Summary, Clinical Evidence, "
                "Technical Requirements, Cost-Benefit Analysis.",
    tone="Professional, evidence-based, and balanced"
)
print("\n=== FIXED PROMPT 1 ===")
print(fixed_1.build())
print()
score_prompt(fixed_1.build())


# ── Broken Prompt 2: Vague Action ────────────────────────────────

broken_2 = CRAFTPrompt(
    context="Our e-commerce company had 15% cart abandonment increase "
            "last quarter. The product team needs actionable insights "
            "from user session data covering 50,000 transactions.",
    role="a senior UX researcher with expertise in e-commerce analytics",
    action="Look at the data and tell us what you think about it.",
    format_spec="Structured report with numbered findings, each including "
                "the data pattern, its likely cause, and a specific fix.",
    tone="Analytical, direct, and data-driven"
)

print("\n=== BROKEN PROMPT 2 ===")
print(broken_2.build())
print()
score_prompt(broken_2.build())

# What's wrong?
flaw_2 = ("The action 'Look at the data and tell us what you think about it' "
          "is vague — no specific analysis tasks")

# Fixed version
fixed_2 = CRAFTPrompt(
    context="Our e-commerce company had 15% cart abandonment increase "
            "last quarter. The product team needs actionable insights "
            "from user session data covering 50,000 transactions.",
    role="a senior UX researcher with expertise in e-commerce analytics",
    action="Analyze the 50,000 transaction dataset to identify the top 5 "
           "patterns contributing to the 15% cart abandonment increase, "
           "including specific metrics for each pattern and actionable UX "
           "recommendations to reduce abandonment by at least 8%",
    format_spec="Structured report with numbered findings, each including "
                "the data pattern, its likely cause, and a specific fix.",
    tone="Analytical, direct, and data-driven"
)
print("\n=== FIXED PROMPT 2 ===")
print(fixed_2.build())
print()
score_prompt(fixed_2.build())


# ── Broken Prompt 3: Wrong Tone for Audience ─────────────────────

broken_3 = CRAFTPrompt(
    context="A Fortune 500 CEO is presenting to the board of directors "
            "about the company's AI transformation roadmap for 2026. "
            "Board members include institutional investors and industry veterans.",
    role="a management consultant at a top-tier strategy firm",
    action="Write the opening remarks for the board presentation, covering "
           "strategic rationale, expected ROI, and competitive positioning.",
    format_spec="Speech script, 300 words, with clear transitions between "
                "the three topics.",
    tone="Super casual, lots of slang, use emojis and exclamation marks!!!"
)

print("\n=== BROKEN PROMPT 3 ===")
print(broken_3.build())
print()
score_prompt(broken_3.build())

# What's wrong?
flaw_3 = ("The tone (casual/slang/emojis) is completely wrong for a "
          "Fortune 500 CEO presenting to a board of institutional investors")

# Fixed version
fixed_3 = CRAFTPrompt(
    context="A Fortune 500 CEO is presenting to the board of directors "
            "about the company's AI transformation roadmap for 2026. "
            "Board members include institutional investors and industry veterans.",
    role="a management consultant at a top-tier strategy firm",
    action="Write the opening remarks for the board presentation, covering "
           "strategic rationale, expected ROI, and competitive positioning.",
    format_spec="Speech script, 300 words, with clear transitions between "
                "the three topics.",
    tone="Authoritative, measured, and executive — conveying confidence "
         "and strategic clarity appropriate for a board-level audience"
)
print("\n=== FIXED PROMPT 3 ===")
print(fixed_3.build())
print()
score_prompt(fixed_3.build())

## Part 3: Prompt Templates

Instead of crafting every prompt from scratch, experienced prompt engineers maintain a **template library** of reusable CRAFT prompts with placeholders. This section introduces a `PromptLibrary` class that lets you store, retrieve, list, and fill templates with specific values.

In [None]:
from collections import OrderedDict


class PromptLibrary:
    """A reusable library of CRAFT prompt templates."""

    def __init__(self):
        self._templates: OrderedDict[str, CRAFTPrompt] = OrderedDict()

    def add_template(self, name: str, prompt: CRAFTPrompt) -> None:
        """Register a new template."""
        self._templates[name] = prompt
        print(f"  [+] Template '{name}' added.")

    def get_template(self, name: str) -> CRAFTPrompt:
        """Retrieve a template by name."""
        if name not in self._templates:
            raise KeyError(f"Template '{name}' not found. "
                           f"Available: {list(self._templates.keys())}")
        return self._templates[name]

    def list_templates(self) -> None:
        """Display all registered templates."""
        print(f"{'#':<4} {'Template Name':<25} {'Placeholders'}")
        print("-" * 65)
        for i, (name, tmpl) in enumerate(self._templates.items(), 1):
            combined = tmpl.build()
            placeholders = sorted(set(re.findall(r'\{(\w+)\}', combined)))
            print(f"{i:<4} {name:<25} {', '.join(placeholders) or '(none)'}")

    def fill_template(self, name: str, **kwargs: str) -> str:
        """Fill placeholders in a template and return the final prompt."""
        tmpl = self.get_template(name)
        raw = tmpl.build()

        # Find unfilled placeholders
        expected = set(re.findall(r'\{(\w+)\}', raw))
        missing = expected - set(kwargs.keys())
        if missing:
            print(f"  Warning: unfilled placeholders: {missing}")

        # Safe formatting that ignores unknown keys
        for key, value in kwargs.items():
            raw = raw.replace("{" + key + "}", value)
        return raw


# ---------- Pre-populate the library ----------

library = PromptLibrary()

library.add_template("email_writer", CRAFTPrompt(
    context="We are a {company_type} company. The recipient is {recipient}.",
    role="a professional communications specialist",
    action="Draft a {email_type} email about {topic} that includes a clear "
           "call-to-action.",
    format_spec="Subject line + 3 concise paragraphs. Keep under 200 words.",
    tone="{tone}"
))

library.add_template("code_reviewer", CRAFTPrompt(
    context="The code is written in {language} for a {project_type} project. "
           "The team follows {standard} coding standards.",
    role="a senior software engineer and code reviewer",
    action="Review the following code for bugs, performance issues, and "
           "readability. Identify at least 3 specific improvements.",
    format_spec="Use a numbered list. For each issue: state the problem, "
                "show the offending line, and suggest a fix with a code snippet.",
    tone="Constructive and educational"
))

library.add_template("meeting_summarizer", CRAFTPrompt(
    context="The meeting was a {meeting_type} attended by {attendees}. "
           "Duration: {duration}.",
    role="an executive assistant skilled at distilling meeting notes",
    action="Summarize the meeting into key decisions, action items with "
           "owners, and open questions. Flag any items that are blocked.",
    format_spec="Use headings: Decisions, Action Items (table with Owner and "
                "Due Date columns), Open Questions. Keep under 250 words.",
    tone="Neutral and precise"
))

library.add_template("data_analyst", CRAFTPrompt(
    context="The dataset contains {dataset_description}. The stakeholder "
           "is {audience}.",
    role="a senior data analyst at a {industry} company",
    action="Analyze the data to identify the top {num_insights} insights, "
           "highlight any anomalies, and recommend next steps.",
    format_spec="Executive summary (3 sentences), then bullet points for each "
                "insight, then a table of anomalies with severity ratings.",
    tone="Data-driven and actionable"
))

# ---------- Show the library ----------
print("\n")
library.list_templates()

# ---------- Demo: fill a template ----------
print("\n" + "=" * 60)
print("  DEMO: Filling the 'email_writer' template")
print("=" * 60 + "\n")

filled = library.fill_template(
    "email_writer",
    company_type="B2B SaaS",
    recipient="the VP of Engineering at a Fortune 500 client",
    email_type="follow-up",
    topic="the security audit results from last week",
    tone="Confident, professional, and reassuring"
)
print(filled)
print("\n--- Score ---")
score_prompt(filled)

## Exercise 2: Game the System (10 min)

The `score_prompt()` function uses keyword heuristics — it isn't actually intelligent. This exercise exposes that.

**Part A — High score, terrible prompt:** Write a prompt that scores **22 or higher** out of 25 but would produce **useless output** from an AI. Stuff it with scoring keywords while making the actual request incoherent or contradictory.

**Part B — Low score, good prompt:** Write a prompt that scores **12 or lower** but would actually produce **excellent, useful output** from a real AI. Avoid the keywords the scorer looks for while keeping the request crystal clear.

**Part C — Reflection:** In a comment, identify at least 2 specific blind spots in the scoring function that your examples exploit.

In [None]:
# Part A: High-scoring TERRIBLE prompt (target: 22+ / 25)
# The prompt should score well but be genuinely bad — contradictory,
# incoherent, or so vague it would confuse any AI.

terrible_high_scorer = (
    "Context: Given that our company in the healthcare industry is currently "
    "evaluating the situation, we are specifically analyzing background context "
    "for our product and customer stakeholder audience in the market sector "
    "based on assumptions. Role: Act as a senior analyst. Task: Write and "
    "create a comprehensive, precisely structured step-by-step bullet-point "
    "numbered list in table format with heading sections, including 10 items "
    "for each category, comparing and analyzing multiple metrics for the "
    "audience. Format: Use bullet points, numbered list, table, headings, "
    "paragraphs, and sections. Tone: Professional specifically actionable."
)

print("=== Part A: High-scoring terrible prompt ===")
print(f'"{terrible_high_scorer}"')
print()
a_scores = score_prompt(terrible_high_scorer)
a_total = sum(a_scores.values())
print(f"Target: 22+  |  Your score: {a_total}")
assert a_total >= 22, f"Score {a_total} is below 22. Add more scoring keywords."
print("PASS: Score is 22 or higher.\n")


# Part B: Low-scoring GOOD prompt (target: 12 or lower)
# The prompt should be genuinely clear and useful to a real AI,
# but avoid the keywords that score_prompt() looks for.

good_low_scorer = (
    "What are three ways a bakery owner could use ChatGPT to save time each week?"
)

print("=== Part B: Low-scoring good prompt ===")
print(f'"{good_low_scorer}"')
print()
b_scores = score_prompt(good_low_scorer)
b_total = sum(b_scores.values())
print(f"Target: <=12  |  Your score: {b_total}")
assert b_total <= 12, f"Score {b_total} is above 12. Remove scoring keywords."
print("PASS: Score is 12 or lower.\n")


# Part C: Reflection — what are the blind spots?
blind_spot_1 = ("The scorer counts keywords like 'specifically', 'bullet', 'table' "
                "regardless of whether the prompt makes logical sense — a contradictory "
                "or incoherent prompt still scores high if it includes the right words")

blind_spot_2 = ("The scorer penalizes short prompts even when brevity is appropriate — "
                "a clear 15-word question can be more effective than a 50-word "
                "keyword-stuffed mess")

print(f"Blind spot 1: {blind_spot_1}")
print(f"Blind spot 2: {blind_spot_2}")

## Part 4: Visualization - Prompt Quality Comparison

A radar chart (spider chart) makes it easy to see which quality dimensions are strong or weak across different prompt styles. Below we compare three prompts representing increasing levels of structure:

1. **Vague** - A bare-minimum, one-line request
2. **Partially Structured** - Some detail but missing key CRAFT components
3. **Full CRAFT** - All five components filled in

In [None]:
# ---------- Three comparison prompts ----------

vague_prompt = "Make a marketing plan"

partial_prompt = (
    "Write a marketing plan for our new mobile app targeting millennials. "
    "Include social media strategy and budget estimates."
)

craft_prompt = CRAFTPrompt(
    context="Our fintech startup is launching a budgeting app aimed at "
            "millennials (ages 25-40) in the US market. We have a quarterly "
            "marketing budget of $50,000 and currently 2,000 beta users.",
    role="a digital marketing strategist with 10 years of experience in "
         "fintech product launches",
    action="Create a 90-day go-to-market plan that identifies the top 3 "
           "customer acquisition channels, proposes a content calendar, and "
           "estimates cost-per-acquisition for each channel.",
    format_spec="Use the following structure: Executive Summary (5 sentences), "
                "Channel Strategy (table with columns: Channel, Audience Fit, "
                "Monthly Budget, Est. CPA), Content Calendar (bullet points "
                "by week), and KPIs (numbered list of 5 metrics to track).",
    tone="Strategic, data-informed, and action-oriented"
).build()

# ---------- Score each ----------
print("Scoring: Vague prompt")
s_vague = score_prompt(vague_prompt)
print("Scoring: Partial prompt")
s_partial = score_prompt(partial_prompt)
print("Scoring: Full CRAFT prompt")
s_craft = score_prompt(craft_prompt)

# ---------- Radar chart ----------
dimensions = list(s_vague.keys())
n_dims = len(dimensions)

angles = np.linspace(0, 2 * np.pi, n_dims, endpoint=False).tolist()
angles += angles[:1]  # close the polygon

def values_for(scores: Dict[str, int]) -> List[int]:
    vals = [scores[d] for d in dimensions]
    return vals + vals[:1]

fig, ax = plt.subplots(figsize=(7, 7), subplot_kw=dict(polar=True))

ax.plot(angles, values_for(s_vague), 'o-', linewidth=2,
        label=f'Vague ({sum(s_vague.values())}/25)', color='#e74c3c')
ax.fill(angles, values_for(s_vague), alpha=0.10, color='#e74c3c')

ax.plot(angles, values_for(s_partial), 's-', linewidth=2,
        label=f'Partial ({sum(s_partial.values())}/25)', color='#f39c12')
ax.fill(angles, values_for(s_partial), alpha=0.10, color='#f39c12')

ax.plot(angles, values_for(s_craft), 'D-', linewidth=2,
        label=f'Full CRAFT ({sum(s_craft.values())}/25)', color='#27ae60')
ax.fill(angles, values_for(s_craft), alpha=0.15, color='#27ae60')

ax.set_xticks(angles[:-1])
ax.set_xticklabels([d.capitalize() for d in dimensions], fontsize=12)
ax.set_yticks([1, 2, 3, 4, 5])
ax.set_yticklabels(['1', '2', '3', '4', '5'], fontsize=9, color='grey')
ax.set_ylim(0, 5)
ax.set_title('Prompt Quality Comparison', fontsize=15, fontweight='bold', pad=20)
ax.legend(loc='upper right', bbox_to_anchor=(1.3, 1.1), fontsize=11)

plt.tight_layout()
plt.show()

## Exercise 3: Build a Template from Scratch (15 min)

Design a **competitive analysis** prompt template entirely from scratch — no skeleton code is provided.

**Requirements:**
1. Create a `CRAFTPrompt` with **at least 4 placeholders** (e.g., `{company}`, `{competitor}`, `{industry}`, `{metric}`)
2. Add it to the `library` under the name `"competitive_analysis"`
3. Fill the template for **two different industries** (e.g., fintech vs. healthcare)
4. Score both filled prompts — each must score **20 or higher** out of 25
5. Print the library listing to confirm your template appears

In [None]:
# Step 1: Create the CRAFTPrompt with 4+ placeholders
comp_analysis_template = CRAFTPrompt(
    context="Our {company_type} company is entering the {industry} market. "
            "We need to understand how our product compares to {competitor} "
            "and other players in the {region} region.",
    role="a senior competitive intelligence analyst specializing in {industry}",
    action="Create a competitive analysis comparing our product to {competitor}, "
           "evaluating market share, pricing strategy, product features, and "
           "customer satisfaction. Include at least 3 specific metrics and "
           "identify 2 strategic opportunities for differentiation.",
    format_spec="Use a structured report with sections: Executive Summary, "
                "Competitor Profile (table comparing 5 attributes), "
                "Opportunity Analysis (numbered list), and Recommendations "
                "(bullet points with priority ratings).",
    tone="Analytical, data-driven, and actionable"
)

# Step 2: Add to the library as "competitive_analysis"
library.add_template("competitive_analysis", comp_analysis_template)

# Step 3: Fill for Industry 1 — Fintech / Digital Payments
filled_1 = library.fill_template("competitive_analysis",
    company_type="fintech startup",
    industry="digital payments",
    competitor="Stripe",
    region="North America"
)
print("=== Industry 1: Digital Payments ===")
print(filled_1)
print()
scores_1 = score_prompt(filled_1)
total_1 = sum(scores_1.values())
print(f"Score: {total_1}/25 (target: 20+)")
assert total_1 >= 20, f"Score {total_1} is below 20."

# Step 4: Fill for Industry 2 — Healthcare / Telemedicine
filled_2 = library.fill_template("competitive_analysis",
    company_type="healthcare SaaS",
    industry="telemedicine",
    competitor="Teladoc",
    region="European Union"
)
print("\n=== Industry 2: Telemedicine ===")
print(filled_2)
print()
scores_2 = score_prompt(filled_2)
total_2 = sum(scores_2.values())
print(f"Score: {total_2}/25 (target: 20+)")
assert total_2 >= 20, f"Score {total_2} is below 20."

# Step 5: Show updated library
print("\n=== Updated Library ===")
library.list_templates()

## Exercise 4: The Audience Pivot (15 min)

The same information needs radically different prompts depending on who will read the output. In this exercise, you'll write **three CRAFT prompts** that all request the same core information — a summary of your company's Q3 AI initiative results — but tailored for three very different audiences:

| Audience | What they care about | Expected tone |
|----------|---------------------|---------------|
| **Engineering team** | Technical details, architecture, performance metrics | Technical, precise |
| **VP of Product** | Roadmap impact, resource needs, timelines | Strategic, concise |
| **CEO** | Business value, competitive advantage, ROI | Executive, high-level |

**Requirements:**
1. All 3 prompts must request the same underlying information (Q3 AI initiative results)
2. Each prompt must score **18+ / 25**
3. Create a **radar chart** comparing all 3 prompts (use the radar chart code from Part 4 as a reference)
4. In a comment, explain which CRAFT component changes the most across audiences and why

In [None]:
# ── Prompt for Engineering Team ──────────────────────────────────
prompt_engineering = CRAFTPrompt(
    context="Our company completed the Q3 AI initiative, deploying a "
            "recommendation engine and automated data pipeline. The engineering "
            "team needs a technical retrospective.",
    role="a senior ML engineer summarizing for a technical audience",
    action="Write a technical summary of the Q3 AI initiative results, "
           "including model architecture decisions, performance benchmarks "
           "(latency, accuracy, throughput), infrastructure costs, and "
           "technical debt identified during the project.",
    format_spec="Use sections: Architecture Overview, Performance Metrics "
                "(table with before/after comparisons), Infrastructure "
                "(bullet points), and Technical Debt (numbered list of 3 items).",
    tone="Technical, precise, and data-driven"
)

# ── Prompt for VP of Product ─────────────────────────────────────
prompt_vp = CRAFTPrompt(
    context="Our company completed the Q3 AI initiative. The VP of Product "
            "needs to understand roadmap impact and resource allocation "
            "for Q4 planning.",
    role="a product strategy lead summarizing for senior product leadership",
    action="Summarize the Q3 AI initiative outcomes in terms of product "
           "impact, including features shipped, user engagement metrics, "
           "resource utilization vs. plan, and recommended Q4 priorities.",
    format_spec="Executive brief: 3-sentence summary, then bullet points "
                "for key outcomes, a comparison table of planned vs. actual "
                "milestones, and a numbered list of Q4 recommendations.",
    tone="Strategic, concise, and action-oriented"
)

# ── Prompt for CEO ───────────────────────────────────────────────
prompt_ceo = CRAFTPrompt(
    context="Our company completed the Q3 AI initiative. The CEO is "
            "presenting to the board and needs a high-level view of "
            "business value delivered and competitive positioning.",
    role="a chief of staff preparing an executive briefing for the CEO",
    action="Create a business-value summary of the Q3 AI initiative, "
           "including revenue impact, customer retention improvements, "
           "competitive advantages gained, and 3 strategic recommendations "
           "for scaling AI investment.",
    format_spec="One-page executive summary: 2-sentence headline, then "
                "3 bullet points for key business outcomes, and a numbered "
                "list of strategic recommendations.",
    tone="Executive, high-level, and business-focused"
)

# Score all three
print("=== Engineering Team ===")
s_eng = score_prompt(prompt_engineering.build())
t_eng = sum(s_eng.values())

print("=== VP of Product ===")
s_vp = score_prompt(prompt_vp.build())
t_vp = sum(s_vp.values())

print("=== CEO ===")
s_ceo = score_prompt(prompt_ceo.build())
t_ceo = sum(s_ceo.values())

print(f"Engineering: {t_eng}/25 | VP: {t_vp}/25 | CEO: {t_ceo}/25")
print(f"All 18+? {all(t >= 18 for t in [t_eng, t_vp, t_ceo])}")

# ── Radar Chart: Compare all 3 audiences ────────────────────────
dimensions = list(s_eng.keys())
n_dims = len(dimensions)
angles = np.linspace(0, 2 * np.pi, n_dims, endpoint=False).tolist()
angles += angles[:1]

def values_for(scores):
    vals = [scores[d] for d in dimensions]
    return vals + vals[:1]

fig, ax = plt.subplots(figsize=(7, 7), subplot_kw=dict(polar=True))

ax.plot(angles, values_for(s_eng), 'o-', linewidth=2,
        label=f'Engineering ({t_eng}/25)', color='#3498db')
ax.fill(angles, values_for(s_eng), alpha=0.10, color='#3498db')

ax.plot(angles, values_for(s_vp), 's-', linewidth=2,
        label=f'VP Product ({t_vp}/25)', color='#e74c3c')
ax.fill(angles, values_for(s_vp), alpha=0.10, color='#e74c3c')

ax.plot(angles, values_for(s_ceo), 'D-', linewidth=2,
        label=f'CEO ({t_ceo}/25)', color='#27ae60')
ax.fill(angles, values_for(s_ceo), alpha=0.15, color='#27ae60')

ax.set_xticks(angles[:-1])
ax.set_xticklabels([d.capitalize() for d in dimensions], fontsize=12)
ax.set_yticks([1, 2, 3, 4, 5])
ax.set_yticklabels(['1', '2', '3', '4', '5'], fontsize=9, color='grey')
ax.set_ylim(0, 5)
ax.set_title('Audience Pivot: Prompt Quality Comparison', fontsize=15,
             fontweight='bold', pad=20)
ax.legend(loc='upper right', bbox_to_anchor=(1.3, 1.1), fontsize=11)
plt.tight_layout()
plt.show()

# Reflection
most_changed_component = "Action and Format"
explanation = ("The Action changes the most because each audience needs different "
               "information extracted from the same initiative — engineers want "
               "benchmarks and architecture, VPs want roadmap and resources, "
               "CEOs want revenue impact and competitive positioning. Format "
               "also shifts significantly as technical audiences expect detailed "
               "tables while executives prefer concise bullet points.")
print(f"\nMost changed component: {most_changed_component}")
print(f"Explanation: {explanation}")

## Exercise 5: The Prompt Audit (Challenge) (10 min)

Below are three prompts that all score **21 or higher** on our scoring function. But high scores don't guarantee real-world effectiveness. Each prompt has a hidden practical problem that would hurt its output quality when used with a real AI.

**Your task:**
1. Run the cell to see each prompt and its score
2. Identify the hidden real-world problem in each prompt (it won't show up in the score)
3. Rank the three prompts from **most effective** to **least effective** in practice
4. Write a 2-3 sentence justification for your ranking

**Possible problem types:** conflicting instructions, impossible constraints, missing critical context, overspecification that restricts useful output, scope creep

In [None]:
# ── Audit Prompt A: Conflicting Instructions ─────────────────────
audit_a = (
    "Context: Our company is launching a new product in the healthcare "
    "industry targeting hospital administrators and clinical stakeholders. "
    "Role: Act as a senior marketing strategist with 15 years experience. "
    "Task: Write a 100-word executive summary that comprehensively covers "
    "all 12 market segments, including 3 detailed case studies with full "
    "financial breakdowns for each, a competitive landscape analysis of "
    "the top 8 competitors, and a 5-year revenue projection model. "
    "Format: Use bullet points, numbered lists, and a comparison table. "
    "Tone: Concise and precisely data-driven."
)

# ── Audit Prompt B: Scope Creep ──────────────────────────────────
audit_b = (
    "Context: We are a fintech company preparing for a board meeting. "
    "The audience is our board of directors and key stakeholders. "
    "Role: Act as a financial analyst and also a product designer and "
    "also a legal compliance officer and also a marketing strategist. "
    "Task: Write a board presentation that covers Q3 financial results, "
    "redesign the customer onboarding flow, draft new compliance policies "
    "for 5 regions, create a marketing campaign for the next product "
    "launch, and build a competitive analysis of the market sector. "
    "Format: Structured report with numbered sections, bullet points, "
    "and a summary table. Include 10 items per section. "
    "Tone: Professional, specifically precise, and must be actionable."
)

# ── Audit Prompt C: Missing Critical Context ─────────────────────
audit_c = (
    "Context: Based on the data from last quarter, our company needs to "
    "make a strategic decision. The team is evaluating multiple options "
    "in our industry sector. "
    "Role: Act as a management consultant at a top-tier strategy firm "
    "specializing in our specific market and customer segment. "
    "Task: Analyze the situation and recommend precisely which option "
    "we should pursue, including a comparison table of 3 alternatives "
    "with estimated ROI for each stakeholder group. "
    "Format: Executive summary (5 sentences), then bullet points for each "
    "option, then a numbered list of 3 recommendations. "
    "Tone: Authoritative, data-driven, and specifically actionable."
)

# Score all three
print("=== AUDIT PROMPT A ===")
print(audit_a)
print()
sa = score_prompt(audit_a)

print("=== AUDIT PROMPT B ===")
print(audit_b)
print()
sb = score_prompt(audit_b)

print("=== AUDIT PROMPT C ===")
print(audit_c)
print()
sc = score_prompt(audit_c)

print(f"Prompt A: {sum(sa.values())}/25 | "
      f"Prompt B: {sum(sb.values())}/25 | "
      f"Prompt C: {sum(sc.values())}/25")


# Identify the hidden problems
problem_a = ("Conflicting constraints: asks for 100 words but demands comprehensive "
             "coverage of 12 market segments, 3 detailed case studies with full "
             "financial breakdowns, competitive analysis of 8 competitors, and a "
             "5-year projection. This is physically impossible in 100 words.")

problem_b = ("Scope creep: assigns 4 different expert roles and 5 unrelated tasks. "
             "No single response can be a board presentation AND a UX redesign AND "
             "compliance policies AND a marketing campaign AND a competitive analysis. "
             "The output will be shallow across all areas.")

problem_c = ("Missing critical context: uses vague references throughout — 'the data', "
             "'our company', 'multiple options', 'our industry sector', 'our specific "
             "market'. An AI has no way to know what any of these refer to, so the "
             "output will be generic filler despite the authoritative tone.")

# Ranking
ranking = ["A", "C", "B"]

justification = ("Prompt A is most effective despite its impossible length constraint "
                 "because the core request is coherent — a skilled user could simply "
                 "remove the word limit. Prompt C is second because adding specific "
                 "context would make it functional. Prompt B is least effective because "
                 "the fundamental design is flawed — no amount of tweaking fixes the "
                 "problem of asking one prompt to do 5 unrelated jobs requiring 4 "
                 "different domains of expertise.")

print(f"\nProblem A: {problem_a}")
print(f"\nProblem B: {problem_b}")
print(f"\nProblem C: {problem_c}")
print(f"\nRanking (most to least effective): {ranking}")
print(f"\nJustification: {justification}")