<a href="https://colab.research.google.com/github/sriharshamutnuru/AI_Learning/blob/main/Day8_PromptEngineering.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [5]:
# ============================================================
# 📘 Day 8 — Prompting Basics (Zero-shot & Few-shot)
# Objective: Extract structured FAQs from an article using 5 prompt variants
# Author: Harsha | Role: Principal Data Engineer
# Tested with: openai==1.109.1
# ============================================================

# --- STEP 1: Setup & Imports ---
!pip install --quiet openai==1.109.1 pandas tiktoken

from openai import OpenAI
from google.colab import userdata
import pandas as pd
import json
from datetime import datetime

# ✅ Secure API key from Google Colab secrets
client = OpenAI(api_key=userdata.get('OPENAI_API_KEY'))

MODEL = "gpt-4o-mini"   # ✅ Use gpt-4o or gpt-4o-mini (fast + cheaper for testing)

# --- STEP 2: Helper Functions ---

def call_llm(system_prompt, user_prompt, model=MODEL):
    """
    Call the OpenAI chat API using the modern 1.x client interface.
    Returns the assistant message text.
    """
    response = client.chat.completions.create(
        model=model,
        messages=[
            {"role": "system", "content": system_prompt},
            {"role": "user", "content": user_prompt}
        ],
        temperature=0.3,
    )
    return response.choices[0].message.content.strip()


def is_valid_json(response_text):
    """Check if LLM output is valid JSON."""
    try:
        json.loads(response_text)
        return True
    except:
        return False


def evaluate_output(output_text, expected_keywords):
    """Basic keyword-based scoring for relevance & completeness."""
    text_lower = output_text.lower()
    relevance = sum(1 for kw in expected_keywords if kw.lower() in text_lower)
    completeness = min(5, relevance)
    return relevance, completeness


# --- STEP 3: Input Article (Example Text) ---
article = """
Microsoft Azure provides robust cloud security capabilities.
Users can manage encryption, access policies, and monitor threats using Defender for Cloud.
Azure Key Vault helps store and manage secrets securely.
Additionally, Azure Security Center integrates compliance and governance tools.
"""

expected_keywords = ["encryption", "Defender for Cloud", "Key Vault", "Security Center"]


# --- STEP 4: Define 5 Prompt Variants ---

prompts = {
    "Prompt_1_ZeroShot": f"""
Extract FAQs from the following text and return in JSON format.
Text: {article}
""",

    "Prompt_2_ZeroShot_JSON_Enforced": f"""
You are a helpful assistant that extracts FAQs.
Return the result strictly as JSON with "question" and "answer" keys.
Text: {article}
""",

    "Prompt_3_FewShot": f"""
You are a helpful assistant that extracts FAQs.

Example 1:
Text: "We offer free shipping on orders above $50."
Output:
Q: What is the minimum order value for free shipping?
A: $50.

Example 2:
Text: "Users can reset their password from the login page."
Output:
Q: How can users reset their password?
A: From the login page.

Now extract FAQs from this:
Text: {article}
""",

    "Prompt_4_FewShot_SystemRole": f"""
You are an AI trained to produce structured FAQ pairs in JSON.
Always return a list of {{ "question": "...", "answer": "..." }} objects.

Text: {article}
""",

    "Prompt_5_ChainOfThought": f"""
You are a reasoning assistant.
Step 1: Identify key entities and actions in the text.
Step 2: Frame each as a user question and answer.
Step 3: Return the final result as JSON.

Text: {article}
"""
}

system_prompt = "You are a structured LLM assistant that returns precise, fact-based FAQs."

# --- STEP 5: Run Prompts and Collect Results ---

results = []

for name, prompt in prompts.items():
    print(f"\n🚀 Running {name} ...")
    try:
        response = call_llm(system_prompt, prompt)
        json_valid = is_valid_json(response)
        relevance, completeness = evaluate_output(response, expected_keywords)

        results.append({
            "Prompt": name,
            "JSON_Valid": "✅" if json_valid else "❌",
            "Relevance_Score(0-5)": relevance,
            "Completeness_Score(0-5)": completeness,
            "Response": response
        })
    except Exception as e:
        results.append({
            "Prompt": name,
            "JSON_Valid": "❌",
            "Relevance_Score(0-5)": 0,
            "Completeness_Score(0-5)": 0,
            "Response": f"⚠️ Error: {str(e)}"
        })

# --- STEP 6: Create DataFrame for Evaluation ---

df = pd.DataFrame(results)
display(df[["Prompt", "JSON_Valid", "Relevance_Score(0-5)", "Completeness_Score(0-5)"]])

# --- STEP 7: Save Results for Checkpoint ---
timestamp = datetime.now().strftime("%Y%m%d_%H%M")
df.to_csv(f"Day8_FAQ_Results_{timestamp}.csv", index=False)

print("\n✅ Evaluation complete. Results saved to CSV and displayed above.")

# --- STEP 6: Create DataFrame for Evaluation (Detailed View) ---

df = pd.DataFrame(results)

# Display evaluation summary
print("📊 Evaluation Summary:\n")
display(df[["Prompt", "JSON_Valid", "Relevance_Score(0-5)", "Completeness_Score(0-5)"]])

# Display detailed LLM outputs
print("\n🧠 Full LLM Responses:\n")
for i, row in df.iterrows():
    print(f"==============================")
    print(f"🧩 {row['Prompt']}")
    print(f"JSON Valid: {row['JSON_Valid']} | Relevance: {row['Relevance_Score(0-5)']} | Completeness: {row['Completeness_Score(0-5)']}")
    print("------------------------------")
    print(row["Response"])
    print("\n")




🚀 Running Prompt_1_ZeroShot ...

🚀 Running Prompt_2_ZeroShot_JSON_Enforced ...

🚀 Running Prompt_3_FewShot ...

🚀 Running Prompt_4_FewShot_SystemRole ...

🚀 Running Prompt_5_ChainOfThought ...


Unnamed: 0,Prompt,JSON_Valid,Relevance_Score(0-5),Completeness_Score(0-5)
0,Prompt_1_ZeroShot,❌,4,4
1,Prompt_2_ZeroShot_JSON_Enforced,❌,4,4
2,Prompt_3_FewShot,❌,4,4
3,Prompt_4_FewShot_SystemRole,❌,4,4
4,Prompt_5_ChainOfThought,❌,4,4



✅ Evaluation complete. Results saved to CSV and displayed above.
📊 Evaluation Summary:



Unnamed: 0,Prompt,JSON_Valid,Relevance_Score(0-5),Completeness_Score(0-5)
0,Prompt_1_ZeroShot,❌,4,4
1,Prompt_2_ZeroShot_JSON_Enforced,❌,4,4
2,Prompt_3_FewShot,❌,4,4
3,Prompt_4_FewShot_SystemRole,❌,4,4
4,Prompt_5_ChainOfThought,❌,4,4



🧠 Full LLM Responses:

🧩 Prompt_1_ZeroShot
JSON Valid: ❌ | Relevance: 4 | Completeness: 4
------------------------------
```json
{
  "FAQs": [
    {
      "question": "What cloud security capabilities does Microsoft Azure provide?",
      "answer": "Microsoft Azure provides robust cloud security capabilities."
    },
    {
      "question": "How can users manage encryption and access policies in Azure?",
      "answer": "Users can manage encryption, access policies, and monitor threats using Defender for Cloud."
    },
    {
      "question": "What is Azure Key Vault used for?",
      "answer": "Azure Key Vault helps store and manage secrets securely."
    },
    {
      "question": "What tools does Azure Security Center integrate?",
      "answer": "Azure Security Center integrates compliance and governance tools."
    }
  ]
}
```


🧩 Prompt_2_ZeroShot_JSON_Enforced
JSON Valid: ❌ | Relevance: 4 | Completeness: 4
------------------------------
```json
[
    {
        "question": "What