# Project Database Scanner

This notebook scans ~/git/active and populates the project database.

In [1]:
# Setup - load environment and initialize database
import os
from pathlib import Path
from dotenv import load_dotenv

In [2]:

# Load .env from project root
# project_root = Path.cwd().parent
# env_file = project_root / ".env"
load_dotenv()

print(f"DATABASE_PATH: {os.getenv('DATABASE_PATH')}")

DATABASE_PATH: /home/romilly/git/active/project-database/data/projects.sqlite


In [3]:
# Initialize database
from project_database.database import init_database, get_session
from project_database.models import Project

init_database()
print("Database initialized successfully!")

Database initialized successfully!


In [4]:
# Check current database state
session = get_session()
count = session.query(Project).count()
print(f"Current projects in database: {count}")
session.close()

Current projects in database: 322


In [5]:
# Scan ~/git/active directory
from project_database.scanner import populate_database

projects_dir = Path.home() / "git" / "active"
print(f"Scanning: {projects_dir}")

# This will take a moment...
populate_database(projects_dir)

print("\nScan complete!")

Scanning: /home/romilly/git/active

Scan complete!


In [6]:
# Show statistics
session = get_session()

total = session.query(Project).count()
with_readme = session.query(Project).filter(Project.readme_path.isnot(None)).count()
with_logseq = session.query(Project).filter(Project.logseq_page.isnot(None)).count()
with_github = session.query(Project).filter(Project.github_url.isnot(None)).count()

print(f"Total projects: {total}")
print(f"Projects with README: {with_readme}")
print(f"Projects with Logseq page: {with_logseq}")
print(f"Projects with GitHub URL: {with_github}")

session.close()

Total projects: 322
Projects with README: 154
Projects with Logseq page: 177
Projects with GitHub URL: 131


In [7]:
# Show sample projects with GitHub URLs
session = get_session()

github_projects = session.query(Project).filter(Project.github_url.isnot(None)).limit(10).all()

print("Sample projects with GitHub URLs:\n")
for p in github_projects:
    print(f"  {p.name}")
    print(f"    GitHub: {p.github_url}")
    if p.logseq_page:
        print(f"    Logseq: {p.logseq_page}")
    print()

session.close()

Sample projects with GitHub URLs:

  servus-orig
    GitHub: https://github.com/romilly/servus

  redpajama.cpp
    GitHub: https://github.com/romilly/redpajama.cpp

  github_star_tracker
    GitHub: https://github.com/romilly/github_star_tracker

  ii-microservice
    GitHub: https://github.com/romilly/ii-microservice
    Logseq: project/ii-microservice

  oxo
    GitHub: https://github.com/romilly/oxo

  mooc-talk
    GitHub: https://github.com/romilly/mooc-talk

  pimoroni-booklet
    GitHub: https://github.com/romilly/pimoroni-booklet

  P3SE
    GitHub: https://github.com/romilly/P3SE
    Logseq: P3SE

  LLMs-from-scratch
    GitHub: https://github.com/romilly/LLMs-from-scratch

  TIL-private
    GitHub: https://github.com/romilly/TIL-private
    Logseq: project/TIL-private



In [None]:
# Query projects by name
session = get_session()

search_term = "project-database"  # Change this to search for different projects
results = session.query(Project).filter(Project.name.like(f"%{search_term}%")).all()

print(f"Projects matching '{search_term}':\n")
for p in results:
    print(f"  {p.name}")
    print(f"    Path: {p.path}")
    print(f"    README: {p.readme_path or 'None'}")
    print(f"    Logseq: {p.logseq_page or 'None'}")
    print(f"    GitHub: {p.github_url or 'None'}")
    print()

session.close()

## Optional: View all projects

Uncomment and run the cell below to see all projects (may be a long list!)

In [9]:
session = get_session()
all_projects = session.query(Project).order_by(Project.name).all()

for p in all_projects:
    print(f"{p.name}: {p.readme_path or 'no README'} {p.github_url or 'no GitHub'}")

session.close()

.claude: no README no GitHub
AutoPair: no README no GitHub
BASB: /home/romilly/git/active/BASB/README.md no GitHub
BCPNN: /home/romilly/git/active/BCPNN/README.md https://github.com/romilly/BCPNN
ChatGPT-Micro-Cap-Experiment: /home/romilly/git/active/ChatGPT-Micro-Cap-Experiment/README.md https://github.com/romilly/ChatGPT-Micro-Cap-Experiment
ComfyUI: /home/romilly/git/active/ComfyUI/README.md https://github.com/comfyanonymous/ComfyUI
Emerge-Education-Pitch: no README no GitHub
HelixNet-experiments: no README no GitHub
LLMs-from-scratch: /home/romilly/git/active/LLMs-from-scratch/README.md https://github.com/romilly/LLMs-from-scratch
Lanchain-AutoGPT: /home/romilly/git/active/Lanchain-AutoGPT/README.md no GitHub
LinkedIn: no README no GitHub
LlamaFarmer: /home/romilly/git/active/LlamaFarmer/README.md no GitHub
LlamaParse-experiments: no README no GitHub
MinerU: /home/romilly/git/active/MinerU/README.md https://github.com/romilly/MinerU
OPRO: /home/romilly/git/active/OPRO/README.md no 