In [43]:
import os
import requests
from bs4 import BeautifulSoup

HEADERS = {
    "User-Agent": "Your Name (your.email@example.com)"
}
BASE_URL = "https://www.sec.gov"
SAVE_DIR = "8K_Filings"
os.makedirs(SAVE_DIR, exist_ok=True)

def get_cik(ticker):
    url = "https://www.sec.gov/files/company_tickers.json"
    response = requests.get(url, headers=HEADERS)
    data = response.json()

    for key, value in data.items():
        if value["ticker"].lower() == ticker.lower():
            return str(value["cik_str"]).zfill(10)
    return None

def get_8k_filings(cik, count=3):
    search_url = f"https://www.sec.gov/cgi-bin/browse-edgar?action=getcompany&CIK={cik}&type=8-K&count={count}&output=atom"
    response = requests.get(search_url, headers=HEADERS)
    soup = BeautifulSoup(response.text, "xml")
    entries = soup.find_all("entry")
    filings = []

    for entry in entries:
        filing_url = entry.find("filing-href").text.replace("-index.htm", ".txt")
        filing_time = entry.find("updated").text
        filings.append((filing_url, filing_time))

    return filings

def download_filing(filing_url, save_path):
    response = requests.get(filing_url, headers=HEADERS)
    if response.status_code == 200:
        with open(save_path, "w", encoding="utf-8") as f:
            f.write(response.text)
        print(f"Saved: {save_path}")
    else:
        print(f"Failed to download: {filing_url}")

def main():
    tickers = ["AAPL", "MSFT", "AMZN", "GOOGL", "META", "NVDA", "JPM", "TSLA", "PFE", "PEP"]
    
    for ticker in tickers:
        cik = get_cik(ticker)
        if not cik:
            print(f"CIK not found for {ticker}")
            continue

        filings = get_8k_filings(cik)
        for i, (filing_url, filing_time) in enumerate(filings):
            filename = f"{ticker}_8K_{i+1}_{filing_time[:10]}.txt"
            save_path = os.path.join(SAVE_DIR, filename)
            download_filing(filing_url, save_path)

if __name__ == "__main__":
    main()


Saved: 8K_Filings\AAPL_8K_1_2025-02-25.txt
Saved: 8K_Filings\AAPL_8K_2_2025-01-30.txt
Saved: 8K_Filings\AAPL_8K_3_2025-01-03.txt
Saved: 8K_Filings\AAPL_8K_4_2024-10-31.txt
Saved: 8K_Filings\AAPL_8K_5_2024-09-10.txt
Saved: 8K_Filings\AAPL_8K_6_2024-08-26.txt
Saved: 8K_Filings\AAPL_8K_7_2024-08-23.txt
Saved: 8K_Filings\AAPL_8K_8_2024-08-01.txt
Saved: 8K_Filings\AAPL_8K_9_2024-05-03.txt
Saved: 8K_Filings\AAPL_8K_10_2024-05-02.txt
Saved: 8K_Filings\MSFT_8K_1_2025-01-29.txt
Saved: 8K_Filings\MSFT_8K_2_2025-01-22.txt
Saved: 8K_Filings\MSFT_8K_3_2024-12-11.txt
Saved: 8K_Filings\MSFT_8K_4_2024-12-03.txt
Saved: 8K_Filings\MSFT_8K_5_2024-10-30.txt
Saved: 8K_Filings\MSFT_8K_6_2024-08-21.txt
Saved: 8K_Filings\MSFT_8K_7_2024-07-30.txt
Saved: 8K_Filings\MSFT_8K_8_2024-04-25.txt
Saved: 8K_Filings\MSFT_8K_9_2024-03-08.txt
Saved: 8K_Filings\MSFT_8K_10_2024-01-30.txt
Saved: 8K_Filings\AMZN_8K_1_2025-02-06.txt
Saved: 8K_Filings\AMZN_8K_2_2024-10-31.txt
Saved: 8K_Filings\AMZN_8K_3_2024-08-01.txt
Saved: 8K

Modify This part below


In [47]:
import os
os.environ["OPENAI_API_KEY"] = 

In [57]:
import os
import csv
import json
from openai import OpenAI

# Set your API key
os.environ["OPENAI_API_KEY"] = 
client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))

INPUT_DIR = "8K_Filings"
OUTPUT_CSV = "8k_products.csv"

def extract_product_info(text):
    prompt = f"""
You are an expert in SEC filings. Analyze the following 8-K text and identify any new product announcements.

Return ONLY JSON in this format:
{{
  "new_product": "Product Name",
  "product_description": "Brief explanation of the product"
}}

If no new product is mentioned, return:
{{
  "new_product": null,
  "product_description": null
}}

Here is the 8-K text:
{text[:4000]}  <!-- limit to 4k chars for token safety -->
"""
    response = client.chat.completions.create(
        model="gpt-4",
        messages=[{"role": "user", "content": prompt}],
        temperature=0.3
    )
    return response.choices[0].message.content

def parse_filename(filename):
    parts = filename.split("_")
    stock = parts[0]
    date = parts[-1].replace(".txt", "")
    return stock, date

def process_all_filings():
    results = []

    for file in os.listdir(INPUT_DIR):
        if file.endswith(".txt"):
            with open(os.path.join(INPUT_DIR, file), "r", encoding="utf-8") as f:
                text = f.read()
            
            stock, filing_date = parse_filename(file)
            json_text = extract_product_info(text)

            try:
                response_json = json.loads(json_text)
            except json.JSONDecodeError:
                print(f"⚠️ JSON decoding failed for file: {file}")
                continue
            
            results.append({
                "company_name": stock,
                "stock_name": stock,
                "filing_time": filing_date,
                "new_product": response_json["new_product"],
                "product_description": response_json["product_description"]
            })

    with open(OUTPUT_CSV, "w", newline='', encoding="utf-8") as csvfile:
        fieldnames = ["company_name", "stock_name", "filing_time", "new_product", "product_description"]
        writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
        writer.writeheader()
        for row in results:
            writer.writerow(row)

if __name__ == "__main__":
    process_all_filings()


In [3]:
HEADERS = {"User-Agent": "Rafael Trotter ra494491@ucf.edu"}

In [5]:
# SEC EDGAR base URL
BASE_URL = "https://www.sec.gov"

In [7]:
# Create a directory to store filings
SAVE_DIR = "8K_Filings"
os.makedirs(SAVE_DIR, exist_ok=True)

In [13]:
def get_cik(ticker):
    ticker = ticker.lower()
    url = f"https://www.sec.gov/files/company_tickers.json"
    response = requests.get(url, headers=HEADERS)
    data = response.json()

In [33]:
def get_cik_by_ticker(data, ticker):
    for key, value in data.items():
        if value["ticker"].lower() == ticker.lower():
            return str(value["cik_str"]).zfill(10)  # Format as 10-digit CIK
    return None


In [37]:
def get_8k_filings(cik, count=5):
    """Fetch the most recent 8-K filings for a given company CIK."""
    search_url = f"https://www.sec.gov/cgi-bin/browse-edgar?action=getcompany&CIK={cik}&type=8-K&count={count}&output=atom"
    response = requests.get(search_url, headers=HEADERS)

    if response.status_code != 200:
        print("Error fetching filings.")
        return []

    soup = BeautifulSoup(response.text, "xml")
    entries = soup.find_all("entry")
    filings = []

    for entry in entries:
        filing_url = entry.find("filing-href").text.replace("-index.htm", ".txt")
        filings.append(filing_url)

    return filings


In [39]:
def download_filing(filing_url, save_path):
    """Download and save an 8-K filing."""
    response = requests.get(filing_url, headers=HEADERS)

    if response.status_code == 200:
        with open(save_path, "w", encoding="utf-8") as f:
            f.write(response.text)
        print(f"Saved: {save_path}")
    else:
        print(f"Failed to download: {filing_url}")


In [41]:
def main():
    ticker = input("Enter stock ticker (e.g., AAPL): ").strip().upper()
    cik = get_cik(ticker)

    if not cik:
        print("Invalid ticker or CIK not found.")
        return

    print(f"Fetching 8-K filings for CIK: {cik}")
    filings = get_8k_filings(cik)

    if not filings:
        print("No recent 8-K filings found.")
        return

    for idx, filing_url in enumerate(filings):
        save_path = os.path.join(SAVE_DIR, f"{ticker}_8K_{idx+1}.txt")
        download_filing(filing_url, save_path)


if __name__ == "__main__":
    main()


Enter stock ticker (e.g., AAPL):  AAPL


Invalid ticker or CIK not found.
