In [3]:
import os
import json
from bs4 import BeautifulSoup
from datetime import datetime

In [4]:
import os
import json
from bs4 import BeautifulSoup
from datetime import datetime


def generate_projects_json(project_root, output_path, overwrite=False):
  """
  Scans subfolders in the given project_root for index.html files and
  generates project data JSON:
      - slug: folder name
      - title: from meta og:title
      - description: from meta og:description
      - meta: dictionary of all meta tags with name="project:<property>"
      - created: ISO 8601 formatted creation time of index.html

  The array is sorted by creation time before output.

  If overwrite=True, writes to path given in `output_path`. Otherwise, prints to console.

  Any subfolder containing a file named `EXCLUDED_FROM_PROJECTS` will be ignored.
  """
  projects = []

  for folder_name in os.listdir(project_root):
    folder_path = os.path.join(project_root, folder_name)
    index_path = os.path.join(folder_path, "index.html")
    exclude_marker = os.path.join(folder_path, "EXCLUDED_FROM_PROJECTS")

    # Skip if not a directory or missing index.html
    if not os.path.isdir(folder_path) or not os.path.isfile(index_path):
      continue

    # Skip directory if exclusion marker is present
    if os.path.isfile(exclude_marker):
      print(f"🚫 skipping {folder_name}: found EXCLUDED_FROM_PROJECTS")
      continue

    # Parse HTML
    with open(index_path, "r", encoding="utf-8") as f:
      soup = BeautifulSoup(f, "html.parser")

    title_meta = soup.find("meta", property="og:title")
    desc_meta = soup.find("meta", property="og:description")

    if not title_meta or not desc_meta:
      print(f"⚠️  skipping {folder_name}: missing og:title or og:description")
      continue

    # Collect meta tags starting with name="project:"
    meta_tags = {}
    for meta in soup.find_all("meta", attrs={"name": True}):
      name_attr = meta["name"].strip()
      if name_attr.startswith("project:"):
        key = name_attr[len("project:"):].strip()
        meta_tags[key] = meta.get("content", "").strip()

    created_iso = datetime.fromtimestamp(os.path.getctime(index_path)).isoformat()

    project_data = {
        "title": title_meta.get("content", "").strip(),
        "description": desc_meta.get("content", "").strip(),
        "slug": folder_name,
        "meta": meta_tags,
        "created": created_iso
    }

    projects.append(project_data)

  # Sort by creation time, newest first
  projects.sort(key=lambda x: x["created"], reverse=True)

  if overwrite:
    with open(output_path, "w", encoding="utf-8") as f:
      json.dump(projects, f, indent=2)
    print(f"✅ generated {output_path} with {len(projects)} projects.")
  else:
    print(json.dumps(projects, indent=2))

In [5]:
generate_projects_json(
    project_root='../',
    output_path="../assets/projects_data.json",
    overwrite=True
)

🚫 skipping practice: found EXCLUDED_FROM_PROJECTS
✅ generated ../assets/projects_data.json with 12 projects.
