In [2]:
from sec_api import QueryApi, ExtractorApi
from dotenv import load_dotenv
import os

# Initialize SEC API
load_dotenv()
sec_api_key = os.getenv("SEC_API_KEY")
queryApi = QueryApi(api_key=sec_api_key)
extractorApi = ExtractorApi(api_key=sec_api_key)

# List of tickers to process
# tickers = ['AAPL', 'MSFT', 'NVDA', 'GOOGL', 'AMZN', 'CRM', 'SNOW', 'PLTR', 'AMD', 'META']
tickers = ['AAPL']

# Base folder for section outputs
base_folder = '10k_sections'
os.makedirs(base_folder, exist_ok=True)

# Sections to extract: {section_number: label}
sections = {
    "1": "business",
    "1A": "risk_factors",
    "7": "mdna",
}

# Get latest 10-K filing for each ticker in the list
for ticker in tickers:
    print(f"\nProcessing {ticker}...")
    try:
        # Query most recent 10-K
        query = {
            "query": {
                "query_string": {
                    "query": f"{ticker} AND formType:\"10-K\""
                }
            },
            "from": "0",
            "size": "1",
            "sort": [{"filedAt": {"order": "desc"}}]
        }
    
        response = queryApi.get_filings(query)
        filing_url = response['filings'][0]['linkToFilingDetails']
        print(f"10-K Filing URL: {filing_url}")

        # Make subfolder for ticker
        ticker_folder = os.path.join(base_folder, ticker)
        os.makedirs(ticker_folder, exist_ok=True)
        
        for section_num, label in sections.items():
            try:
                print(f"  Extracting section {section_num} ({label})...")
                section_text = extractorApi.get_section(filing_url, section_num)
                output_path = os.path.join(ticker_folder, f"{label}.txt")
                with open(output_path, "w", encoding="utf-8") as f:
                    f.write(section_text)
                print(f"    Saved {label} section.")
            except Exception as sec_err:
                print(f"    Failed to extract section {section_num}: {sec_err}")

    except Exception as e:
        print(f"  Failed for {ticker}: {e}")


Processing AAPL...
10-K Filing URL: https://www.sec.gov/Archives/edgar/data/320193/000032019324000123/aapl-20240928.htm
  Extracting section 1 (business)...
    Saved business section.
  Extracting section 1A (risk_factors)...
    Saved risk_factors section.
  Extracting section 7 (mdna)...
    Saved mdna section.


In [8]:
# Truncate text to fit GPT-3.5-turbo token limits
def truncate(text, max_words=3000):
    return ' '.join(text.split()[:max_words])

In [17]:
from openai import OpenAI
from dotenv import load_dotenv
import os

# Initialize Open AI API key
load_dotenv()
openai_api_key = os.getenv("OPENAI_API_KEY")
openai_client = OpenAI(api_key=openai_api_key)

# Folder where MD&A texts were saved
sections_folder = '10k_sections'

# Output folders
briefs_folder = 'investment_briefs'
full_reports_folder = 'full_reports'
os.makedirs(briefs_folder, exist_ok=True)
os.makedirs(full_reports_folder, exist_ok=True)

# To track word count summarized
total_word_count = 0

# Use OpenAI to summarize
def generate_research_report(business_text, risk_text, mdna_text, ticker):
    response = openai_client.chat.completions.create(
        model = 'gpt-3.5-turbo',
        messages = [
            {"role": "system", "content": "You are a financial analyst assistant. Respond in clear, concise, professional writing."},       # tells the model who it is and how to behave
            {"role": "user", "content": f"""Write a detailed equity research report on {ticker} based on the sections provided from its latest 10-K filing.

**Strict formatting rules:**
- Do NOT use any Markdown symbols (no *, **, #, -, or bullets).
- Do NOT use headings or numbered lists.
- Use plain paragraph text only.
- Write in clear, professional prose, using proper paragraph breaks for each section.

The report will be inserted directly into a Microsoft Word document.

Use the following structure:

1. Basic Information — Include placeholders:
   - Ticker: {ticker}
   - Exchange: [insert exchange]
   - Sector / Industry: [insert sector and industry]
   - Stock Price: [insert]
   - Market Cap: [insert]
   - Target Price: [insert]
   - Float / Liquidity: [insert]
   - Major Shareholders: [insert]

2. Business Description — Summarize the company’s operations, products/services, revenue drivers, and operating model based on the 10-K.

3. Industry Overview & Competitive Positioning — Provide a general overview of the industry and where the company fits. Note: use [insert industry insights here] as a placeholder if needed.

4. Investment Summary — Include significant developments and company outlook. Do not include a Buy/Hold/Sell rating; instead, write: [insert investment recommendation here].

5. Valuation — Use a placeholder to note this will be added manually:
   [insert valuation summary with methods and key ratios]

6. Financial Analysis — Summarize recent financial performance and notable trends. Include any accounting quirks or nonrecurring items mentioned in the MD&A.

7. Investment Risks — Describe major risks the company faces, using what's provided in the 10-K.

8. Environmental, Social, and Governance (ESG) — Provide a paragraph summarizing ESG-related disclosures if available. Otherwise, write: [insert ESG overview].

9. Analyst Commentary — Placeholder for your personal judgment:
   [insert your brief commentary on thesis, conviction level, or flags]

Here are the source materials:

- Business Description:
{business_text}

- Risk Factors:
{risk_text}

- MD&A:
{mdna_text}
"""}                    # tells the model what to do
            ]
        )
    return response.choices[0].message.content

# Generate investment brief
def generate_investment_brief(full_report, ticker):
    response = openai_client.chat.completions.create(
        model='gpt-3.5-turbo',
        messages=[
            {"role": "system", "content": "You are a financial analyst assistant. Respond in clear, concise, professional writing."},
            {"role": "user", "content": f"""Write a one-page investment brief for {ticker} based on the following full-length equity research report. Do not use bullet points or Markdown formatting. Write 2 to 4 clear, objective paragraphs in flowing prose.

Focus on:
- A high-level overview of the company's business and operations
- Key recent developments or strategic moves
- Summary of financial performance or trends mentioned
- Major risks or areas of concern
- Forward-looking observations (e.g., expected challenges or momentum)

Do not include a Buy/Hold/Sell recommendation or subjective opinions. The brief should be informative and professional, suitable for internal use or to hand to a portfolio manager.

{full_report}
"""}
        ]
    )
    return response.choices[0].message.content
    
# Main Processing
for ticker in tickers:
    print(f"\nProcessing report for {ticker}...")
    try:
        ticker_folder = os.path.join(sections_folder, ticker)
        
        # Read all 4 sections
        with open(os.path.join(ticker_folder, "business.txt"), "r", encoding="utf-8") as f:
            business_text = f.read()
        with open(os.path.join(ticker_folder, "risk_factors.txt"), "r", encoding="utf-8") as f:
            risk_text = f.read()
        with open(os.path.join(ticker_folder, "mdna.txt"), "r", encoding="utf-8") as f:
            mdna_text = f.read()

        business_text = truncate(business_text, max_words=2000)
        risk_text = truncate(risk_text, max_words=2000)
        # mdna_text = truncate(mdna_text, max_words=4000)
        
        # Word count tracking (if using gpt-4-turbo, no need to truncate)
        section_word_count = sum(len(section.split()) for section in [business_text, risk_text, mdna_text])
        total_word_count += section_word_count
        print(f"  Word count for {ticker}: {section_word_count} words")

        # Generate full report
        full_report = generate_research_report(business_text, risk_text, mdna_text, ticker)
        with open(os.path.join(full_reports_folder, f"{ticker}_full_report.txt"), "w", encoding="utf-8") as f:
            f.write(full_report)
        print(f"  Full report saved for {ticker}.")

        # Generate investment brief
        brief = generate_investment_brief(full_report, ticker)
        with open(os.path.join(briefs_folder, f"{ticker}_brief.txt"), "w", encoding="utf-8") as f:
            f.write(brief)
        print(f"  Investment brief saved for {ticker}.")

    except Exception as e:
        print(f"  Failed to process {ticker}: {e}")

print(f"\n Total words summarized across all tickers: {total_word_count}")


Processing report for AAPL...
  Word count for AAPL: 6453 words
  Full report saved for AAPL.
  Investment brief saved for AAPL.

 Total words summarized across all tickers: 6453


In [18]:
from docx import Document
from docx.shared import Pt, Inches
from docx.enum.text import WD_ALIGN_PARAGRAPH
import os


def txt_to_docx(folder_path):
    for filename in os.listdir(folder_path):
        if not filename.endswith(".txt"):
            continue

        txt_path = os.path.join(folder_path, filename)
        ticker = filename.split("_")[0]

        # Name Word file based on type
        docx_filename = f"{ticker} Investment Brief.docx" if 'brief' in filename else f"{ticker} Full Report.docx"
        docx_path = os.path.join(folder_path, docx_filename)

        with open(txt_path, "r", encoding="utf-8") as f:
            content = f.read()

        doc = Document()

        # Title heading
        title = doc.add_heading(docx_filename.replace(".docx", ""), level=0)
        title.alignment = WD_ALIGN_PARAGRAPH.LEFT

        for paragraph_text in content.split("\n\n"):
            if paragraph_text.strip():
                p = doc.add_paragraph(paragraph_text.strip())
                run = p.runs[0]
                run.font.name = 'Times New Roman'
                run.font.size = Pt(12)
                p.alignment = WD_ALIGN_PARAGRAPH.LEFT

        doc.save(docx_path)
        print(f"✅ Converted: {filename} → {docx_filename}")

# Run on both folders
txt_to_docx('full_reports')
txt_to_docx('investment_briefs')

✅ Converted: AAPL_full_report.txt → AAPL Full Report.docx
✅ Converted: AAPL_brief.txt → AAPL Investment Brief.docx
