In [None]:
!pip install requests beautifulsoup4 sec-api sentence-transformers langchain langchain-community faiss-cpu openai reportlab

In [None]:
!export SEC_API_KEY="xxx"
!export OPENAI_API_KEY="xxx"

In [None]:
import os
import datetime
import sys
import requests
import re
from sec_api import QueryApi
from sentence_transformers import SentenceTransformer
from langchain_community.chat_models import ChatOpenAI
from langchain.chains import RetrievalQA
from langchain.vectorstores import FAISS
from langchain.embeddings import HuggingFaceEmbeddings
from reportlab.lib.styles import getSampleStyleSheet
from reportlab.platypus import Paragraph, Spacer, SimpleDocTemplate
from bs4 import BeautifulSoup

API_KEY = os.getenv("SEC_API_KEY") or input("Enter your sec-api.io API key: ")
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY") or input("Enter your OpenAI API key: ")

symbol = input("Enter the stock symbol (e.g., TSLA): ").upper()
print(f"Analyzing 10-Q filing for {symbol}.")

query_api = QueryApi(api_key=API_KEY)

query = {
    "query": f'ticker:{symbol} AND formType:"10-Q"',
    "from": "0",
    "size": "1",
    "sort": [{"filedAt": {"order": "desc"}}]
}
try:
    filings = query_api.get_filings(query)
    if filings["total"]["value"] == 0:
        print(f"No 10-Q filings found for {symbol}.")
        sys.exit(1)
    filing = filings["filings"][0]
    accession_number = filing["accessionNo"]
    filing_url = filing["linkToFilingDetails"]
    filing_date = filing["filedAt"].split("T")[0]
    print(f"Found filing: Accession Number: {accession_number}, URL: {filing_url}")
except Exception as e:
    print(f"Error fetching filings: {e}")
    sys.exit(1)

try:
    headers = {"User-Agent": "Your Name (your.email@example.com)", "Accept": "text/html"} #change name and mail
    response = requests.get(filing_url, headers=headers)
    response.raise_for_status()
    full_text = response.text
    print("Filing downloaded successfully.")
except Exception as e:
    print(f"Error downloading filing: {e}")
    sys.exit(1)

soup = BeautifulSoup(full_text, 'html.parser')
full_text = soup.get_text(separator='\n', strip=True)
chunks = []
current_chunk = ""
for line in full_text.split("\n"):
    if re.match(r"^(Item \d+\.|PART \S+|Revenue|Net Income|Financial Statements|Management’s Discussion|Key Metrics)", line, re.IGNORECASE):
        if current_chunk:
            chunks.append(current_chunk.strip())
        current_chunk = line
    else:
        current_chunk += " " + line
if current_chunk:
    chunks.append(current_chunk.strip())

chunks = [chunk[:2000] for chunk in chunks if len(chunk.strip()) > 100]
print(f"Created {len(chunks)} chunks for embedding.")

embeddings = HuggingFaceEmbeddings(model_name='all-MiniLM-L6-v2')
vectorstore = FAISS.from_texts(chunks, embeddings)

llm = ChatOpenAI(model_name='gpt-4o', openai_api_key=OPENAI_API_KEY)

prompts = [
    f"Extract the exact total revenue and net income figures from the financial statements section of the 10-Q report filed on {filing_date} for {symbol}.",
    f"List key financial ratios such as EPS, operating margin, and debt-to-equity from the 10-Q report filed on {filing_date} for {symbol}.",
    f"Summarize key points from the Management’s Discussion and Analysis (MD&A) section of the 10-Q report filed on {filing_date} for {symbol}.",
    f"Identify forward-looking statements or future outlook from the 10-Q report filed on {filing_date} for {symbol}.",
    f"Provide a concise summary of financial performance from the 10-Q report filed on {filing_date} for {symbol}."
]
sections = ["Revenue and Net Income", "Key Financial Ratios", "Management Discussion", "Future Outlook", "Summary"]

responses = []
for prompt, section in zip(prompts, sections):
    try:
        qa = RetrievalQA.from_chain_type(
            llm=llm,
            chain_type="stuff",
            retriever=vectorstore.as_retriever(search_kwargs={"k": 3})
        )
        response = qa.run(prompt)
        responses.append(response)
    except Exception as e:
        print(f"Error processing '{section}': {e}")
        responses.append(f"Analysis incomplete due to error: {e}")

styles = getSampleStyleSheet()
doc = SimpleDocTemplate(f"{symbol}_analysis.pdf")
elements = []

elements.append(Paragraph(f"{symbol} 10-Q Analysis Report", styles['Title']))
elements.append(Paragraph(f"Stock: {symbol}", styles['Normal']))
elements.append(Paragraph(f"Date: {datetime.datetime.now().strftime('%Y-%m-%d')}", styles['Normal']))
elements.append(Paragraph(f"Based on 10-Q Filing Dated: {filing_date}", styles['Normal']))
elements.append(Spacer(1, 12))

for section, response in zip(sections, responses):
    elements.append(Paragraph(section, styles['Heading2']))
    elements.append(Paragraph(response, styles['BodyText']))
    elements.append(Spacer(1, 12))

elements.append(Paragraph("Note", styles['Heading2']))
elements.append(Paragraph(f"As of March 01, 2025, the Q1 2025 10-Q for {symbol} is unavailable; this uses the latest available 10-Q.", styles['BodyText']))

doc.build(elements)
print(f"Generated {symbol}_analysis.pdf")