In [8]:
import requests
from newspaper import Article

# -------- CONFIG --------
SERPER_API_KEY = "b6377b6d2d07fe6a6f334f03986986351846d4bb"  # 🔁 Replace with your actual key
OLLAMA_API_URL = "http://localhost:11434/api/generate"

In [10]:
def extract_article_text(url):
    try:
        article = Article(url)
        article.download()
        article.parse()
        return f"Source: {url}\n\n{article.text}"
    except Exception as e:
        return f"Source: {url}\n\n[Failed to extract: {e}]"


In [11]:
def ask_llama3_locally(prompt):
    try:
        headers = {
            "Content-Type": "application/json",
            "Accept": "application/json"
        }
        response = requests.post(
            OLLAMA_API_URL,
            json={"model": "llama3", "prompt": prompt, "stream": False},
            headers=headers,
            verify=False  # Disable SSL verification for ngrok URL
        )
        print("📦 Raw response text:\n", response.text)  # Debug print

        if response.status_code != 200:
            return f"[HTTP {response.status_code}] Error from llama3"

        return response.json().get("response", "[No 'response' key in JSON]")

    except Exception as e:
        return f"[❌ Mistral call failed: {e}]"

In [13]:
def is_url_trusted_by_ai(url):
    prompt = f"""You are a trustworthiness evaluator.

Decide whether the following website is generally considered a **professional and trustworthy source of information** for serious research, financial news, or due diligence.

Do **not** include websites that:
- Are editable by the public (like Wikipedia, Reddit, Quora)
- Are known for unverified or user-generated content

Only reply with `yes` or `no`.

### Website:
{url}

### Is this source trusted?
"""

    try:
        headers = {
            "Content-Type": "application/json",
            "Accept": "application/json"
        }
        response = requests.post(
            OLLAMA_API_URL,
            json={"model": "llama3", "prompt": prompt, "stream": False},
            headers=headers,
            verify=False
        )
        if response.status_code == 200:
            verdict = response.json().get("response", "").strip().lower()
            print(f"🤖 AI verdict for {url}: {verdict}")
            return "yes" in verdict
        else:
            return False
    except Exception as e:
        print(f"[AI trust check failed for {url}]: {e}")
        return False


In [14]:
def web_search_serper(query, max_results=5):
    headers = {
        "X-API-KEY": SERPER_API_KEY
    }
    json_data = {
        "q": query
    }
    response = requests.post("https://google.serper.dev/search", headers=headers, json=json_data)

    if response.status_code != 200:
        print(f"❌ Serper API error: {response.status_code}")
        return []

    data = response.json()
    results = data.get("organic", [])
    urls = []
    for item in results:
        url = item["link"]
        print(f"🔍 Evaluating source: {url}")
        if is_url_trusted_by_ai(url):
            urls.append(url)
        if len(urls) >= max_results:
            break
    return urls


In [15]:
def classify_question_type(query):
    classification_prompt = f"""You are a classification assistant.

Decide if the following question requires a short, direct answer (like a number, name, or fact), or a detailed, multi-paragraph summary.

Respond with only one word: `short` or `detailed`.

### Question:
{query}

### Answer Style:"""

    # Send to LLaMA3
    try:
        headers = {
            "Content-Type": "application/json",
            "Accept": "application/json"
        }
        response = requests.post(
            OLLAMA_API_URL,
            json={"model": "llama3", "prompt": classification_prompt, "stream": False},
            headers=headers,
            verify=False
        )
        if response.status_code == 200:
            style = response.json().get("response", "").strip().lower()
            return "short" if "short" in style else "detailed"
        else:
            return "detailed"
    except:
        return "detailed"


# -------- FULL RAG FLOW ----------


In [16]:
def display_final_answer(answer, query, urls):
    print("\n" + "="*80)
    print(f"🧠 Answer to: {query}")
    print("="*80)
    print(answer.strip())
    print("\n🔗 Sources Used:")
    for url in urls:
        print(f" - {url}")
    print("="*80)

In [17]:
question = "Give me the name of the  founders , their position and the team of pantera capital fund?"
final_answer, urls = web_rag_query(question)



display_final_answer(final_answer, question, urls)



🔍 Searching with Serper for: Give me the name of the  founders , their position and the team of pantera capital fund?
🔍 Evaluating source: https://panteracapital.com/firm/
🤖 AI verdict for https://panteracapital.com/firm/: **yes**
🔍 Evaluating source: https://panteracapital.com/team/
🤖 AI verdict for https://panteracapital.com/team/: **yes**
🔍 Evaluating source: https://en.wikipedia.org/wiki/Pantera_Capital
🤖 AI verdict for https://en.wikipedia.org/wiki/Pantera_Capital: no
🔍 Evaluating source: https://pr.linkedin.com/in/joeykrug
🤖 AI verdict for https://pr.linkedin.com/in/joeykrug: no
🔍 Evaluating source: https://panteracapital.com/
🤖 AI verdict for https://panteracapital.com/: yes
🔍 Evaluating source: https://tracxn.com/d/venture-capital/pantera-capital/__d04rGCSR3UY_tzmolj_8VW2i038fL71D_Rd9G2yY-Z0
🤖 AI verdict for https://tracxn.com/d/venture-capital/pantera-capital/__d04rGCSR3UY_tzmolj_8VW2i038fL71D_Rd9G2yY-Z0: yes
🔍 Evaluating source: https://www.linkedin.com/in/dmorehead
🤖 AI verd