In [None]:
# Imports
import os
import json
from typing import Tuple
from dotenv import load_dotenv
from openai import OpenAI
from IPython.display import Markdown, display

In [None]:
# Load environment variables
load_dotenv(override=True)

In [None]:
# Show key prefixes (optional sanity check)
openai_api_key = os.getenv('OPENAI_API_KEY')
anthropic_api_key = os.getenv('ANTHROPIC_API_KEY')
google_api_key = os.getenv('GOOGLE_API_KEY')
deepseek_api_key = os.getenv('DEEPSEEK_API_KEY')
groq_api_key = os.getenv('GROQ_API_KEY')

for name, value in [
    ("OpenAI", openai_api_key),
    ("Anthropic", anthropic_api_key),
    ("Google", google_api_key),
    ("DeepSeek", deepseek_api_key),
    ("Groq", groq_api_key),
]:
    if value:
        print(f"{name} API Key exists and begins {value[:8]}")
    else:
        print(f"{name} API Key not set (optional for some)")

In [None]:
# Base idea prompt
base_prompt = """
Please come up with an idea for an app that I can build using Agentic AI.
The idea must:
- Be buildable for Android, iPhone, and Web
- Be highly profitable and easy to monetize
- Attract a niche but abundant audience
- Avoid heavy custom back-end systems (prefer existing APIs)
- Avoid complex legal/regulatory exposure

Return only the idea plus concise arguments for how it meets each criterion.
"""

In [None]:
# Helper functions
openai_client = OpenAI()
gemini = OpenAI(api_key=google_api_key, base_url="https://generativelanguage.googleapis.com/v1beta/openai/")
model_name = "gpt-5-mini"

messages = [{"role": "user", "content": base_prompt}]

response = openai_client.chat.completions.create(
    model="gpt-5-mini",
    messages=messages )

answers = []
answer = response.choices[0].message.content
answers.append(answer)

def build_qa_prompt(idea: str) -> str:
    return f"""
You are evaluating an app idea. Score each of the following criteria 0 or 1 (integer only):
1) clarity
2) accuracy
3) strength of arguments
4) can be built entirely using Agentic AI
5) can be built on Android, Web, and iPhone
6) highly profitable
7) attracts a niche but abundant audience
8) easy to monetize
9) avoids custom extensive back-end systems; can rely on existing APIs
10) avoids overly complex legal/regulatory clearance

Return JSON only, with schema: {{"results": ["<total_score_0-10>", "<failed criteria or 'Met all criteria'>"]}}.
Keep the second field short (<= 60 words). No markdown, no extra text.

Idea to evaluate:
{idea}
"""

def parse_qa_response(text: str) -> Tuple[int, str]:
    try:
        data = json.loads(text)
        results = data.get("results") or []
        score_raw = results[0] if len(results) > 0 else None
        feedback = results[1] if len(results) > 1 else ""
        score = int(str(score_raw))
        return score, str(feedback)
    except Exception as exc:
        # On parse failure, treat as zero score and pass raw text back
        return 0, f"QA parse error: {exc}; raw: {text}"

def generate_idea(prompt: str) -> str:
    resp = openai_client.chat.completions.create(
    model="gpt-5-mini",
    messages=[{"role": "user", "content": prompt}],
    )
    return resp.choices[0].message.content

def evaluate_idea(idea: str) -> Tuple[int, str, str]:
    qa_prompt = build_qa_prompt(idea)
    resp = gemini.chat.completions.create(
        model="gemini-2.5-flash",
        messages=[{"role": "user", "content": qa_prompt}],
    )
    qa_text = resp.choices[0].message.content
    score, feedback = parse_qa_response(qa_text)
    return score, feedback, qa_text

In [None]:
# Refinement loop: iterate until score == 10 or 10 iterations reached
max_iterations = 10
prompt_for_idea = base_prompt
idea_history = []
qa_history = []
best_idea = None
best_score = -1

for i in range(1, max_iterations + 1):
    print(f"--- Iteration {i} ---")
    idea = generate_idea(prompt_for_idea)
    print("Idea:", idea)

    score, feedback, raw_qa = evaluate_idea(idea)
    print(f"QA score: {score}")
    print(f"QA feedback: {feedback}")

    idea_history.append(idea)
    qa_history.append({"iteration": i, "score": score, "feedback": feedback, "raw": raw_qa})

    if score > best_score:
        best_score = score
        best_idea = idea

    if score == 10:
        print("Perfect score reached. Stopping.")
        break

    # Prepare next prompt using feedback
    prompt_for_idea = base_prompt + f"Improve the previous idea using this QA feedback (fix deficiencies without bloating scope): {feedback} Return only the revised idea plus concise arguments."

print("=== Summary ===")
print(f"Best score: {best_score}")
print("Best idea:", best_idea)
print("QA trail:")
for entry in qa_history:
    print(f"Iter {entry['iteration']}: score={entry['score']} feedback={entry['feedback']}")

In [None]:
display(Markdown(best_idea))

In [None]:
display(Markdown(qa_history))