In [2]:
# Setting API Keys

import os

os.environ["OPENAI_API_KEY"] = "your api key"
os.environ["SERPAPI_KEY"] = "your api key"


In [3]:
# Importing Libraries

import os
import requests, sqlite3, time, re
from sentence_transformers import SentenceTransformer
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
from typing import List, Dict, Tuple
from openai import OpenAI

import urllib3
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)

# Read keys from environment
SERPAPI_KEY = os.getenv("SERPAPI_KEY", "")
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY", "")

if not SERPAPI_KEY or not OPENAI_API_KEY:
    raise RuntimeError("Set SERPAPI_KEY and OPENAI_API_KEY as environment variables instead of hardcoding them.")

client = OpenAI(api_key=OPENAI_API_KEY)

# ================================
# GUARDRAILS
# ================================
# store lower-case, canonical words
PROFANITY = {
    "kill","hack","murder","harass",
    "assault","attack","abuse","threaten","stalk",
    "shoot","stab","kidnap","torture","poison",
    "destroy","bomb","explode","harm","injure","sabotage","strangle","burn"
}

# compile a regex pattern for word-boundary, case-insensitive checks
_PROFANITY_PATTERN = re.compile(r"\b(" + "|".join(re.escape(w) for w in PROFANITY) + r")\b", flags=re.IGNORECASE)

def sanitize_input(text: str) -> str:
    """Trim, limit length and remove control characters."""
    if text is None:
        return ""
    text = text.strip()
    # remove control characters
    text = re.sub(r'[\x00-\x1f\x7f-\x9f]', ' ', text)
    return text

def simple_moderation(text: str) -> Tuple[bool, str]:
    """
    Return (ok, message). Uses a regex to detect profanity/banned words with word boundaries.
    """
    if not text:
        return True, "OK"
    text = text.lower()
    m = _PROFANITY_PATTERN.search(text)
    if m:
        matched = m.group(1)
        return False, f"Contains banned word: {matched}"
    return True, "OK"

# ================================
# MEMORY (SQLite)
# ================================
DB = "research_memory.db"

def init_db():
    conn = sqlite3.connect(DB)
    c = conn.cursor()
    c.execute("""
    CREATE TABLE IF NOT EXISTS memory (
        id INTEGER PRIMARY KEY AUTOINCREMENT,
        role TEXT,
        content TEXT,
        created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
    );
    """)
    conn.commit()
    conn.close()

def store(role, content):
    conn = sqlite3.connect(DB)
    c = conn.cursor()
    c.execute("INSERT INTO memory (role, content) VALUES (?,?)", (role, content))
    conn.commit()
    conn.close()

def fetch_recent(n=10):
    conn = sqlite3.connect(DB)
    c = conn.cursor()
    c.execute("SELECT id, role, content, created_at FROM memory ORDER BY id DESC LIMIT ?", (n,))
    rows = c.fetchall()
    conn.close()
    return rows

# ================================
# RETRIEVER (Embeddings + Cosine)
# ================================
class SimpleRetriever:
    def __init__(self):
        self.model = SentenceTransformer("all-MiniLM-L6-v2")
        self.docs = []
        self.vecs = None

    def add(self, docs: List[Tuple[str, str]]):
        # docs = [(id, text)]
        texts = [t for _, t in docs]
        embeddings = self.model.encode(texts, convert_to_numpy=True)
        if self.vecs is None:
            self.vecs = embeddings
            self.docs = docs.copy()
        else:
            self.vecs = np.vstack([self.vecs, embeddings])
            self.docs.extend(docs)

    def retrieve(self, query: str, k: int = 3):
        if self.vecs is None or len(self.docs) == 0:
            return []  # nothing indexed yet
        qv = self.model.encode([query], convert_to_numpy=True)
        sims = cosine_similarity(qv, self.vecs)[0]
        idx = np.argsort(-sims)[:k]
        return [(self.docs[i][0], self.docs[i][1], float(sims[i])) for i in idx]

# ================================
# SERPAPI SEARCH TOOL
# ================================
def serpapi_search(query):
    url = "https://serpapi.com/search"
    params = {
        "engine": "google",
        "q": query,
        "api_key": SERPAPI_KEY
    }
    try:
        r = requests.get(url, params=params, timeout=8, verify = False)  # timeout and default SSL verify
        r.raise_for_status()
        data = r.json()
    except Exception as e:
        # log and return empty
        print("SerpAPI search error:", str(e))
        return []

    if "organic_results" not in data:
        return []

    results = []
    for item in data.get("organic_results", [])[:5]:
        results.append({
            "title": item.get("title", ""),
            "url": item.get("link", ""),
            "snippet": item.get("snippet", "")
        })
    return results

# ================================
# URL FETCH TOOL
# ================================
def fetch_url_text(url):
    try:
        r = requests.get(url, timeout=6)
        r.raise_for_status()
        text = r.text
        return text[:3000]  # limit to avoid too long responses
    except Exception as e:
        return f"Unable to fetch content: {e}"

# ================================
# OPENAI SUMMARY TOOL
# ================================
def openai_summarize(prompt):
    # ensure prompt is sanitized / limited in size
    prompt = sanitize_input(prompt)
    response = client.chat.completions.create(
        model="gpt-4o-mini",
        messages=[
            {"role": "system", "content": "You are an expert summarizer."},
            {"role": "user", "content": prompt}
        ]
    )
    # safe extraction
    try:
        return response.choices[0].message.content
    except Exception:
        return "No summary returned (OpenAI response structure unexpected)."

# ================================
# AGENT: ResearchAgent
# ================================
class ResearchAgent:
    def __init__(self, retriever):
        self.retriever = retriever

    def research(self, query):
        # sanitize before moderation and external calls
        query = sanitize_input(query)
        ok, msg = simple_moderation(query)
        if not ok:
            raise ValueError(msg)

        search_results = serpapi_search(query)
        findings = []

        for r in search_results:
            page_text = fetch_url_text(r["url"])
            store("research_raw", r["title"] + "\n" + page_text[:2000])
            findings.append({
                "title": r["title"],
                "url": r["url"],
                "snippet": r["snippet"],
                "text": page_text
            })

        # Load to retriever
        docs = [(f"doc{i}", f["text"]) for i, f in enumerate(findings)]
        if docs:
            self.retriever.add(docs)

        return findings

# ================================
# AGENT: SummaryAgent
# ================================
class SummaryAgent:
    def summarize(self, findings, question):
        prompt = f"Summarize the following research for the query: {question}\n\n"
        for f in findings:
            prompt += f"- {f['title']}\n  Snippet: {f['snippet']}\n  Content: {f['text'][:400]}\n\n"

        prompt += """
Provide a clear executive summary:
1. Key insights
2. Trends
3. Recommended next steps
"""

        summary = openai_summarize(prompt)
        store("summary", summary)
        return summary

# ================================
# WORKFLOW
# ================================
def run_workflow(query):
    init_db()
    retriever = SimpleRetriever()
    researcher = ResearchAgent(retriever)
    summarizer = SummaryAgent()

    findings = researcher.research(query)
    rag_hits = retriever.retrieve(query, k=3)

    # Add RAG results as additional findings
    for doc_id, text, score in rag_hits:
        findings.append({"title": f"RAG: {doc_id}", "snippet": text[:200], "text": text})

    summary = summarizer.summarize(findings, query)
    return findings, summary

# ================================
# USER INPUT + EXECUTION
# ================================
if __name__ == "__main__":
    print("*****************************************")
    query = input("Enter your research query: ")
    print("*****************************************")
    findings, summary = run_workflow(query)

    print("\n=======================")
    print("SUMMARY")
    print("=======================")
    print(summary)

    print("\n=======================")
    print("RECENT MEMORY")
    print("=======================")
    for row in fetch_recent(5):
        print(row)


*****************************************


Enter your research query:  Latest Trends in AI


*****************************************

SUMMARY
## Executive Summary on Latest Trends in AI (2025 AI Index Report)

### 1. Key Insights
- **Efficiency and Accessibility**: AI technologies are becoming increasingly efficient and affordable, making them more accessible to a broader audience.
- **Open vs. Closed Models**: Open-weight models are gaining traction, closing the performance gap with closed models, which enhances diversity in AI solutions.

### 2. Trends
- **AI Benchmark Saturation**: Indicators suggest that existing benchmarks for AI performance are reaching saturation, prompting a need for diversification in evaluation methods.
- **Transcending Transformers**: The evolution beyond traditional transformer architectures is anticipated, suggesting new methodologies and advancements in AI models.
- **Embodied AI and World Models**: There's a growing emphasis on embodied AI, which involves AI that interacts with and understands the physical world.
- **Privacy vs. Personalizatio