In [2]:
import zipfile
import requests
from pathlib import Path
from utils.csv import excel_to_csv

xlsx_path = Path("data/2025_GAAP_Taxonomy.xlsx")
csv_path = Path("data/2025_GAAP_Concepts.csv")
zip_url = "https://xbrl.fasb.org/resources/annualrelease/2025/GAAP_Taxonomy.zip"
zip_path = Path("data/GAAP_Taxonomy_2025.zip")

headers = {
    "User-Agent": (
        "Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
        "AppleWebKit/537.36 (KHTML, like Gecko) "
        "Chrome/114.0.0.0 Safari/537.36"
    ),
    "Accept": "application/zip",
    "Referer": "https://xbrl.fasb.org/",
}

# Ensure data dir exists
xlsx_path.parent.mkdir(parents=True, exist_ok=True)

# Download and unzip if the Excel file is not present
if not xlsx_path.exists():
    print("Downloading taxonomy zip...")
    resp = requests.get(zip_url, headers=headers)
    resp.raise_for_status()
    zip_path.write_bytes(resp.content)

    print("Extracting Excel file...")
    with zipfile.ZipFile(zip_path, "r") as zip_ref:
        for file in zip_ref.namelist():
            if file.lower().endswith(".xlsx"):
                zip_ref.extract(file, path=xlsx_path.parent)
                Path(xlsx_path.parent / file).rename(xlsx_path)
                break

# Convert to CSV
excel_to_csv(xlsx_path, "Concepts", csv_path)
