<a href="https://colab.research.google.com/github/polydeuces32/s-p500-predicter-on-Python-/blob/main/SEC_FILING.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import requests
import pandas as pd
from bs4 import BeautifulSoup
from datetime import datetime, timedelta

# SEC EDGAR base URL
base_url = "https://www.sec.gov/cgi-bin/browse-edgar"

# SEC requires a custom User-Agent header
headers = {
    "User-Agent": "GiancarloVizhnay contact@example.com",  # <-- use your name/email
    "Accept-Encoding": "gzip, deflate",
    "Host": "www.sec.gov"
}

# Choose forms and how many days back to search
forms_to_track = ["4", "S-1"]  # Form 4 (insider) and S-1 (IPOs)
days_back = 10

results = []

for form in forms_to_track:
    for day in range(days_back):
        target_date = (datetime.today() - timedelta(days=day)).strftime("%Y%m%d")
        print(f"🔍 Checking {form} filings on {target_date}...")

        params = {
            "action": "getcurrent",
            "datea": target_date,
            "type": form,
            "owner": "include",
            "count": "100"
        }

        response = requests.get(base_url, headers=headers, params=params)
        soup = BeautifulSoup(response.text, "html.parser")

        rows = soup.find_all("tr")[1:]  # Skip header row

        for row in rows:
            cols = row.find_all("td")
            if len(cols) < 4:
                continue
            company = cols[0].text.strip()
            form_type = cols[1].text.strip()
            cik = cols[2].text.strip()
            date_filed = cols[3].text.strip()
            link_tag = cols[1].find("a")
            if link_tag:
                doc_link = "https://www.sec.gov" + link_tag.get("href").strip()
            else:
                doc_link = None

            results.append({
                "Date": date_filed,
                "Company": company,
                "CIK": cik,
                "Form Type": form_type,
                "Link": doc_link
            })

# Create DataFrame
df = pd.DataFrame(results)
df = df.sort_values(by="Date", ascending=False).reset_index(drop=True)

# Display first 15 filings
import IPython
from IPython.display import display
display(df.head(15))


In [None]:
import requests
import pandas as pd
from bs4 import BeautifulSoup
from datetime import datetime, timedelta

# EDGAR URL and headers
base_url = "https://www.sec.gov/cgi-bin/browse-edgar"
headers = {
    "User-Agent": "GiancarloVizhnay contact@example.com",
    "Accept-Encoding": "gzip, deflate",
    "Host": "www.sec.gov"
}

# CONFIG
form_filter = "S-1"   # Choose "4" for Insider Trades, "S-1" for IPOs
days_back = 5         # How many days back to scrape
keywords = ["bio", "tech"]  # Filter by these words in company names

results = []

for day in range(days_back):
    target_date = (datetime.today() - timedelta(days=day)).strftime("%Y%m%d")
    print(f"🔍 Scraping {form_filter} filings on {target_date}...")

    params = {
        "action": "getcurrent",
        "datea": target_date,
        "type": form_filter,
        "owner": "include",
        "count": "100"
    }

    response = requests.get(base_url, headers=headers, params=params)
    soup = BeautifulSoup(response.text, "html.parser")
    rows = soup.find_all("tr")[1:]

    for row in rows:
        cols = row.find_all("td")
        if len(cols) < 4:
            continue

        company = cols[0].text.strip()
        form_type = cols[1].text.strip()
        cik = cols[2].text.strip()
        date_filed = cols[3].text.strip()
        link_tag = cols[1].find("a")
        doc_link = f"https://www.sec.gov{link_tag['href'].strip()}" if link_tag else None

        # Filter by keyword in company name
        if any(kw.lower() in company.lower() for kw in keywords):
            results.append({
                "Company Name": company,
                "Form Type": form_type,
                "CIK": cik,
                "Date Filed": date_filed,
                "Link to Filing": doc_link
            })

# Safely create DataFrame and display
if results:
    df = pd.DataFrame(results)
    df = df.sort_values("Date Filed", ascending=False).reset_index(drop=True)
    pd.set_option("display.max_colwidth", None)
    display(df.head(15))
else:
    print("🚫 No matching filings found with keywords:", ", ".join(keywords))


In [None]:
import requests
import pandas as pd
from bs4 import BeautifulSoup
from datetime import datetime, timedelta

# EDGAR config
base_url = "https://www.sec.gov/cgi-bin/browse-edgar"
headers = {
    "User-Agent": "GiancarloVizhnay contact@example.com",
    "Accept-Encoding": "gzip, deflate",
    "Host": "www.sec.gov"
}

# 📌 CONFIG
form_filter = "S-1"    # Choose "4" for Insider Trades, "S-1" for IPOs
days_back = 5          # How many days to go back
keywords = []          # e.g., ["bio", "tech"], leave empty to show ALL

results = []

# Scrape loop
for day in range(days_back):
    target_date = (datetime.today() - timedelta(days=day)).strftime("%Y%m%d")
    print(f"🔍 Scraping {form_filter} filings on {target_date}...")

    params = {
        "action": "getcurrent",
        "datea": target_date,
        "type": form_filter,
        "owner": "include",
        "count": "100"
    }

    response = requests.get(base_url, headers=headers, params=params)
    soup = BeautifulSoup(response.text, "html.parser")
    rows = soup.find_all("tr")[1:]  # Skip header

    for row in rows:
        cols = row.find_all("td")
        if len(cols) < 4:
            continue

        company = cols[0].text.strip()
        form_type = cols[1].text.strip()
        cik = cols[2].text.strip()
        date_filed = cols[3].text.strip()
        link_tag = cols[1].find("a")
        doc_link = f"https://www.sec.gov{link_tag['href'].strip()}" if link_tag else None

        # Only filter if keywords are specified
        if keywords:
            if not any(kw.lower() in company.lower() for kw in keywords):
                continue

        results.append({
            "Company Name": company,
            "Form Type": form_type,
            "CIK": cik,
            "Date Filed": date_filed,
            "Link to Filing": doc_link
        })

# Output result
if results:
    df = pd.DataFrame(results)
    df = df.sort_values("Date Filed", ascending=False).reset_index(drop=True)
    pd.set_option("display.max_colwidth", None)
    display(df.head(20))
else:
    msg = f"No matching filings found for Form {form_filter}"
    if keywords:
        msg += " with keywords: " + ", ".join(keywords)
    print("🚫", msg)


In [None]:
import requests
import pandas as pd
from bs4 import BeautifulSoup
from datetime import datetime, timedelta

# EDGAR setup
base_url = "https://www.sec.gov/cgi-bin/browse-edgar"
headers = {
    "User-Agent": "GiancarloVizhnay contact@example.com",
    "Accept-Encoding": "gzip, deflate",
    "Host": "www.sec.gov"
}

# 🎛️ Settings
form_filter = "4"    # "S-1" for IPOs, "4" for insider buys/sells
days_back = 10          # Look back this many days
keywords = []          # e.g., ["tech", "bio"], leave empty to get everything

# 🗂️ Where we store what we find
results = []

# 🕵️ Loop through recent days
for day in range(days_back):
    date_str = (datetime.today() - timedelta(days=day)).strftime("%Y%m%d")
    print(f"📅 Looking at {form_filter} filings from {date_str}...")

    params = {
        "action": "getcurrent",
        "datea": date_str,
        "type": form_filter,
        "owner": "include",
        "count": "100"
    }

    response = requests.get(base_url, headers=headers, params=params)
    soup = BeautifulSoup(response.text, "html.parser")
    rows = soup.find_all("tr")[1:]

    for row in rows:
        cols = row.find_all("td")
        if len(cols) < 4:
            continue

        company = cols[0].text.strip()
        form_type = cols[1].text.strip()
        cik = cols[2].text.strip()
        filed = cols[3].text.strip()
        link_tag = cols[1].find("a")
        doc_link = f"https://www.sec.gov{link_tag['href'].strip()}" if link_tag else None

        # Only filter if keywords exist
        if keywords:
            if not any(kw.lower() in company.lower() for kw in keywords):
                continue

        results.append({
            "🧾 Company": company,
            "📄 Form": form_type,
            "🆔 CIK": cik,
            "📅 Date Filed": filed,
            "🔗 Link": doc_link
        })

# ✅ Show what we got
if results:
    df = pd.DataFrame(results)
    df = df.sort_values("📅 Date Filed", ascending=False).reset_index(drop=True)
    pd.set_option("display.max_colwidth", None)

    print("✅ Here's what we found — latest at the top:\n")
    display(df.head(30))
else:
    if keywords:
        print(f"🤷 No {form_filter} filings mentioning {', '.join(keywords)} in the last {days_back} days.")
    else:
        print(f"🤷 No {form_filter} filings found in the last {days_back} days.")
