In [1]:
import os
import markdown

def save_full_report(ticker, markdown_content, charts_filepath):
    """
    Combines the LLM's Markdown report with the interactive Plotly HTML charts.
    """

    # 1. Read Charts HTML
    if charts_filepath and os.path.exists(charts_filepath):
        with open(charts_filepath, "r", encoding="utf-8") as f:
            charts_html = f.read()
    else:
        charts_html = "<p><em>Charts not available.</em></p>"

    # 2. Convert Markdown to HTML
    body_html = markdown.markdown(markdown_content, extensions=["tables"])

    # 3. HTML Template
    full_html = f"""
    <!DOCTYPE html>
    <html>
    <head>
        <title>Investment Memo: {ticker}</title>
        <style>
            body {{
                font-family: Segoe UI, Arial, sans-serif;
                max-width: 900px;
                margin: 40px auto;
                background-color: #f9f9f9;
            }}
            .report-container {{
                background: white;
                padding: 40px;
                border-radius: 8px;
            }}
            table {{
                width: 100%;
                border-collapse: collapse;
            }}
            th, td {{
                border: 1px solid #ddd;
                padding: 10px;
            }}
            th {{
                background: #f2f2f2;
            }}
        </style>
    </head>
    <body>
        <div class="report-container">
            {body_html}
            <hr>
            <h2>Financial Visuals</h2>
            {charts_html}
        </div>
    </body>
    </html>
    """

    # 4. Save output
    output_dir = os.path.join(os.getcwd(), "outputs")
    os.makedirs(output_dir, exist_ok=True)

    output_path = os.path.join(output_dir, f"Investment_Memo_{ticker}.html")

    with open(output_path, "w", encoding="utf-8") as f:
        f.write(full_html)

    return output_path


In [2]:
with open(
    r'C:\Users\sahaj\OneDrive\Investments\Listed securities\Research\EquityResearchAgents\GARP-agent\outputs\Investment_Memo_NVDA.md',
    "r",
    encoding="utf-8"
) as f:
    md = f.read()

save_full_report(
    "NVDA",
    markdown_content=md,
    charts_filepath=r'C:\Users\sahaj\OneDrive\Investments\Listed securities\Research\EquityResearchAgents\GARP-agent\outputs\charts_NVDA.html'
)

'c:\\Users\\sahaj\\OneDrive\\Investments\\Listed securities\\Research\\EquityResearchAgents\\GARP-agent\\static_inputs\\outputs\\Investment_Memo_NVDA.html'

In [1]:
import os
import time
import shutil
import requests
from datetime import datetime
from dotenv import load_dotenv
import yfinance as yf

# Agno (Phidata) Imports
from agno.agent import Agent
from agno.models.ollama import Ollama 
from agno.knowledge import Knowledge
from agno.vectordb.lancedb import LanceDb
from agno.knowledge.document import Document

from ddgs import DDGS 
from tqdm import tqdm 
from pypdf import PdfReader  # ‚úÖ Using pypdf as requested
from bs4 import BeautifulSoup
from urllib.parse import urljoin
import tempfile
import hashlib

# --- Configuration: Local Inference ---
OLLAMA_MODEL_ID = "llama3.2:3b"
OLLAMA_EMBEDDER_MODEL = "nomic-embed-text"

class AgnoOllamaEmbedderAdapter:
    def __init__(self, ollama_embedder, dimensions: int):
        self.ollama = ollama_embedder
        self.dimensions = dimensions

    def get_embedding_and_usage(self, text: str):
        embedding = self.ollama.get_embeddings([text])[0]
        return embedding, {}

class LocalOllamaEmbedder:
    def __init__(self, model: str = OLLAMA_EMBEDDER_MODEL, dimensions: int = 768):
        self.model = model
        self.dimensions = dimensions
        self.api_url = "http://localhost:11434/api/embeddings"

    def get_embedding(self, text: str) -> list[float]:
        """Generate embedding for a single text via HTTP request."""
        try:
            response = requests.post(
                self.api_url,
                json={"model": self.model, "prompt": text}
            )
            response.raise_for_status()
            return response.json()["embedding"]
        except Exception as e:
            print(f"‚ö†Ô∏è Embedding failed: {e}")
            return [0.0] * self.dimensions

    def get_embeddings(self, texts: list[str]) -> list[list[float]]:
        """Generate embeddings for a list of texts."""
        embeddings = []
        for text in texts:
            embeddings.append(self.get_embedding(text))
        return embeddings

def try_pdf_download(url, pdf_path, timeout=15):
    """
    Tries to download a PDF from a URL.
    - Works with direct PDFs
    - Handles redirects
    - Extracts embedded PDFs from HTML pages
    Returns True if PDF saved successfully, else False.
    """

    headers = {
        "User-Agent": "Mozilla/5.0"
    }

    try:
        resp = requests.get(
            url,
            timeout=timeout,
            headers=headers,
            allow_redirects=True
        )

        if resp.status_code != 200:
            return False

        content_type = resp.headers.get("Content-Type", "").lower()

        # ‚úÖ Case 1: Direct PDF (even if URL doesn't end with .pdf)
        if "application/pdf" in content_type:
            with open(pdf_path, "wb") as f:
                f.write(resp.content)

            tqdm.write(f"      ‚úÖ PDF downloaded: {url}")
            return True

        # ‚úÖ Case 2: HTML page ‚Üí extract embedded PDF links
        if "text/html" in content_type:
            soup = BeautifulSoup(resp.text, "html.parser")

            for a in soup.select("a[href]"):
                href = a["href"].strip()

                if ".pdf" not in href.lower():
                    continue

                pdf_url = urljoin(url, href)

                try:
                    pdf_resp = requests.get(
                        pdf_url,
                        timeout=timeout,
                        headers=headers,
                        allow_redirects=True
                    )

                    if (
                        pdf_resp.status_code == 200
                        and "application/pdf" in pdf_resp.headers.get("Content-Type", "").lower()
                    ):
                        with open(pdf_path, "wb") as f:
                            f.write(pdf_resp.content)

                        tqdm.write(f"      ‚úÖ Embedded PDF downloaded: {pdf_url}")
                        return True

                except Exception:
                    continue

        return False

    except Exception as e:
        tqdm.write(f"      ‚ö†Ô∏è PDF download error ({url}): {e}")
        return False

def download_annual_report(ticker, company_name):
    """
    Attempts to download the latest Annual Report PDF via DuckDuckGo.
    """
    pdf_path = os.path.join(INPUTS_DIR, f"{ticker}_AR.pdf")

    if os.path.exists(pdf_path):
        return True

    tqdm.write(f"   üìâ Attempting to download Annual Report for {ticker}...")

    current_year = datetime.now().year
    query = f"{company_name} annual report {current_year - 1} pdf"

    try:
        results = DDGS().text(query, max_results=10)

        for res in results:
            url = res.get("href")
            if not url:
                continue

            tqdm.write(f"      üîé Trying: {url}")

            if try_pdf_download(url, pdf_path):
                return True

        tqdm.write("      ‚ùå No downloadable PDF found in top results.")
        return False

    except Exception as e:
        tqdm.write(f"      ‚ö†Ô∏è Search error: {e}")
        return False

def get_annual_report_kb(ticker):
    """
    Read PDF using pypdf, chunk by page, and load into LanceDB (Agno) correctly.
    """
    pdf_path = os.path.join(INPUTS_DIR, f"{ticker}_AR.pdf")

    if not os.path.exists(pdf_path):
        return None

    tqdm.write(f"   üìÑ Found Annual Report: {pdf_path}")

    # ------------------ Read PDF ------------------
    try:
        reader = PdfReader(pdf_path)
        texts = []

        for i, page in enumerate(reader.pages):
            text = page.extract_text()
            if text and len(text.strip()) > 100:
                texts.append(f"[Page {i+1}] {text.strip()}")

        if not texts:
            tqdm.write("   ‚ö†Ô∏è PDF extraction yielded no usable text. Skipping.")
            return None

    except Exception as e:
        tqdm.write(f"   ‚ùå Error reading PDF: {e}")
        return None

        # ------------------ Embedder ------------------
    raw_embedder = LocalOllamaEmbedder(
        model=OLLAMA_EMBEDDER_MODEL,
        dimensions=768
    )

    embedder = AgnoOllamaEmbedderAdapter(
        ollama_embedder=raw_embedder,
        dimensions=768
    )


        # ------------------ LanceDB ------------------
    db_path = os.path.join(tempfile.gettempdir(), "lancedb", ticker)
    os.makedirs(db_path, exist_ok=True)

    vector_db = LanceDb(
        table_name=f"docs_{ticker}",
        uri=db_path,
        embedder=embedder
    )

    # ------------------ Prepare Documents ------------------
    documents = []
    for text in texts:
        documents.append(
            Document(
                content=text,
                meta_data={
                    "ticker": ticker
                }
            )
        )

    # ------------------ Batch Insert ------------------
    vector_db.insert(
        content_hash=None,
        documents=documents
    )

    tqdm.write(f"   ‚úÖ Inserted {len(documents)} pages into LanceDB")


    return Knowledge(
    vector_db=vector_db,
    max_results=5
    )


In [2]:
# Define Output Paths
BASE_DIR = os.getcwd()
OUTPUTS_DIR = os.path.join(BASE_DIR, "outputs")
INPUTS_DIR = os.path.join(BASE_DIR)

# --- Example Usage ---
if __name__ == "__main__":
    # Example: Download and process the annual report for Apple (AAPL)
    ticker = "ADBE"
    company_name = "Adobe Inc."

    # Step 1: Download the annual report
    success = download_annual_report(ticker, company_name)
    if success:
        print(f"‚úÖ Successfully downloaded the annual report for {ticker}.")
    else:
        print(f"‚ùå Failed to download the annual report for {ticker}.")
        exit(1)

    # Step 2: Process the PDF and set up the vector database
    knowledge_base = get_annual_report_kb(ticker)
    if knowledge_base:
        print(f"‚úÖ Successfully created a knowledge base for {ticker}.")
    else:
        print(f"‚ùå Failed to create a knowledge base for {ticker}.")
        exit(1)

‚úÖ Successfully downloaded the annual report for ADBE.
   üìÑ Found Annual Report: c:\Users\sahaj\OneDrive\Investments\Listed securities\Research\EquityResearchAgents\GARP-agent\static_inputs\ADBE_AR.pdf
   ‚úÖ Inserted 2 pages into LanceDB
‚úÖ Successfully created a knowledge base for ADBE.


In [3]:
def get_market_analyst(knowledge_base=None):
    """Returns the Analyst Agent."""
    return Agent(
        model=Ollama(id=OLLAMA_MODEL_ID), 
        description="You are a cynical, forensic financial analyst.",
        instructions=[
            "You are investigating a stock for a potential investment.",
            "Your goal is to find the TRUTH, not just repeat the marketing hype.",
            "Analyze the provided Search Results (and Annual Report if available) to answer the user's request.",
            "NOISE FILTER: Disregard 'Shareholder Investigation' or 'Class Action' spam.",
            "Always cite your sources."
        ],
        knowledge=knowledge_base,
        search_knowledge=True if knowledge_base else False, 
        debug_mode=True,
        markdown=True,
    )

agent = get_market_analyst(knowledge_base=None)
agent.run("What is the reporting currency?")

RunOutput(run_id='5c52d54b-60e9-45b6-bad5-151c58019367', agent_id='f9b0123c-14e1-421c-a3ed-36296993e45f', agent_name=None, session_id='293f6611-c437-4e8a-b491-3af40b2216c4', parent_run_id=None, workflow_id=None, user_id=None, input=RunInput(input_content='What is the reporting currency?', images=None, videos=None, audios=None, files=None), content="### Reporting Currency Analysis\n\nTo analyze the financial health of the company, we need to understand its reporting currency. This information can be found in the Annual Report (10-K) or other public filings.\n\nAfter reviewing the available reports, I found that the company's primary operating currency is **United States Dollars** ($).\n\nHowever, it appears that a significant portion of their revenue is generated from international transactions. According to their 2022 10-K filing, approximately 70% of their net sales were denominated in foreign currencies.\n\n### Currency Exposure\n\nThis mixed exposure to different currencies can crea