# Ingesting repo documentation

This notebook downloads and installs a python repo, then ingests all markdown documents from both the repo (and optionally, all of the repo's dependencies).

Source documents are stored for traditional RAG, but an additional memory layer makes summarized and synthesized knowledge available to the assistant.

A bash version of the script is available at [ingest_repo_docs.sh](./ingest_repo_docs.sh)

In [None]:
# first, clone the repo to a tmp dir
import os
import subprocess

from elroy.config.paths import get_home_dir


if not os.path.exists("/tmp/elroy"):
    subprocess.run(["git", "clone", "--branch", "main", "--single-branch", "https://github.com/elroy-bot/elroy.git"])



# To ingest not only the project's documents, but the docs for the dependencies as well, install the project:
if False:
    subprocess.run(["git", "pull", "origin", "main"])
    subprocess.run(["uv", "venv"])
    venv_path = os.path.join("/tmp", "elroy", ".venv")

    os.environ["PATH"] = f"{venv_path}:{os.environ['PATH']}"
    subprocess.run(["uv", "pip", "install", "-e", "."])
print(get_home_dir())

markdown_files = []
for root, dirs, files in os.walk("/tmp/elroy"):
    for file in files:
        if file.endswith(".md"):
            markdown_files.append(os.path.join(root, file))

print(f"Found {len(markdown_files)} markdown files")



In [None]:
from tqdm import tqdm
from elroy.api import Elroy
import elroy

print(elroy.__file__)

ai = Elroy(check_db_migration=False)
ai.ctx

for file in tqdm(markdown_files):
    ai.ingest_doc(file) # to re-ingest even if there are no changes, use force_refresh=True