In [1]:
import json
import os

from usfm_grammar import USFMParser

USFM_FOLDER = "data/bsb_usfm"
usfm_files = [
    os.path.join(USFM_FOLDER, file)
    for file in os.listdir(USFM_FOLDER)
    if file.endswith(".usfm")
]

In [2]:
bible_data = {}

for file in usfm_files:
    with open(file, "r", encoding="utf-8") as f:
        content = f.read()
        parser = USFMParser(content)

        # Check for errors
        if parser.errors:
            print(f"Errors in file {file}: {parser.errors}")
        else:
            # Convert to dictionary (USJ format)
            bible_data[file] = parser.to_usj()

Errors in file data/bsb_usfm/38ZECBSB.usfm: [('At Point(row=553, column=0)', '\\d\n\\v 1 This is the burden of the word of the LORD concerning Israel.'), ('At Point(row=554, column=65)', '.')]


KeyboardInterrupt: 

In [4]:
print(json.dumps(bible_data[usfm_files[0]], indent=4))

{
    "type": "USJ",
    "version": "3.1",
    "content": [
        {
            "type": "book",
            "marker": "id",
            "content": [
                "- Berean Study Bible"
            ],
            "code": "RUT"
        },
        {
            "type": "para",
            "marker": "h",
            "content": [
                "Ruth\n"
            ]
        },
        {
            "type": "para",
            "marker": "toc1",
            "content": [
                "Ruth\n"
            ]
        },
        {
            "type": "para",
            "marker": "mt1",
            "content": [
                "Ruth\n"
            ]
        },
        {
            "type": "chapter",
            "marker": "c",
            "number": "1",
            "sid": "RUT 1"
        },
        {
            "type": "para",
            "marker": "s1",
            "content": [
                "Naomi Becomes a Widow\n"
            ]
        },
        {
            "type": "para",
    

Single JSON file


In [8]:
import json

# Save the entire Bible data to a single JSON file
output_file = "bible3.json"
with open(output_file, "w", encoding="utf-8") as f:
    json.dump(bible_data, f, ensure_ascii=False, indent=2)

print(f"Bible data saved to {output_file}")


Bible data saved to bible.json


Multiple JSON Files (One Per Book)


In [9]:
import json
import os

# Create a folder for the JSON files
output_dir = "bible_json"
os.makedirs(output_dir, exist_ok=True)

# Save each book as an individual JSON file
for file_name, book_data in bible_data.items():
    book_name = os.path.splitext(os.path.basename(file_name))[0]
    output_file = os.path.join(output_dir, f"{book_name}.json")
    with open(output_file, "w", encoding="utf-8") as f:
        json.dump(book_data, f, ensure_ascii=False, indent=2)

print(f"Bible data saved to {output_dir}")


Bible data saved to bible_json


Full flow


In [1]:
import json
import os

from usfm_grammar import USFMParser

# Canonical Order for Bible Books
CANONICAL_ORDER = [
    # Torah (Pentateuch)
    "01GENBSB",
    "02EXOBSB",
    "03LEVBSB",
    "04NUMBSB",
    "05DEUBSB",
    # Historical Books
    "06JOSBSB",
    "07JDGBSB",
    "08RUTBSB",
    "091SABSB",
    "102SABSB",
    "111KIBSB",
    "122KIBSB",
    "131CHBSB",
    "142CHBSB",
    "15EZRBSB",
    "16NEHBSB",
    "17ESTBSB",
    # Wisdom Literature
    "18JOBBSB",
    "19PSABSB",
    "20PROBSB",
    "21ECCBSB",
    "22SNGBSB",
    # Major Prophets
    "23ISABSB",
    "24JERBSB",
    "25LAMBSB",
    "26EZKBSB",
    "27DANBSB",
    # Minor Prophets
    "28HOSBSB",
    "29JOLBSB",
    "30AMOBSB",
    "31OBABSB",
    "32JONBSB",
    "33MICBSB",
    "34NAMBSB",
    "35HABBSB",
    "36ZEPBSB",
    "37HAGBSB",
    "38ZECBSB",
    "39MALBSB",
    # New Testament
    "41MATBSB",
    "42MRKBSB",
    "43LUKBSB",
    "44JHNBSB",
    "45ACTBSB",
    "46ROMBSB",
    "471COBSB",
    "482COBSB",
    "49GALBSB",
    "50EPHBSB",
    "51PHPBSB",
    "52COLBSB",
    "531THBSB",
    "542THBSB",
    "551TIBSB",
    "562TIBSB",
    "57TITBSB",
    "58PHMBSB",
    "59HEBBSB",
    "60JASBSB",
    "611PEBSB",
    "622PEBSB",
    "631JNBSB",
    "642JNBSB",
    "653JNBSB",
    "66JUDBSB",
    "67REVBSB",
]

# Folder containing USFM files
USFM_FOLDER = "data/bsb_usfm"

# Initialize ordered Bible data
ordered_bible_data = {}

# Iterate through canonical order to preserve sequence
for book_code in CANONICAL_ORDER:
    file_path = os.path.join(USFM_FOLDER, f"{book_code}.usfm")
    if os.path.exists(file_path):
        with open(file_path, "r", encoding="utf-8") as f:
            content = f.read()
            parser = USFMParser(content)

            # Check for errors
            if parser.errors:
                print(f"Errors in file {file_path}: {parser.errors}")
                continue

            usj_data = parser.to_usj()

            # Add parsed data to ordered_bible_data
            ordered_bible_data[book_code] = usj_data

# Save the ordered Bible data to a JSON file
output_file = "ordered_bible.json"
with open(output_file, "w", encoding="utf-8") as f:
    json.dump(ordered_bible_data, f, ensure_ascii=False, indent=2)

print(f"Ordered Bible data saved to {output_file}")


Errors in file data/bsb_usfm/19PSABSB.usfm: [('At Point(row=217, column=179)', 'A Psalm of David.'), ('At Point(row=217, column=195)', '.'), ('At Point(row=256, column=87)', 'of David, which he sang to the LORD concerning the words of Cush, a Benjamite.'), ('At Point(row=256, column=95)', ','), ('At Point(row=256, column=151)', ','), ('At Point(row=256, column=164)', '.'), ('At Point(row=322, column=145)', 'A Psalm of David.'), ('At Point(row=322, column=161)', '.'), ('At Point(row=543, column=152)', 'A Psalm of David.'), ('At Point(row=543, column=168)', '.'), ('At Point(row=673, column=120)', 'of David.'), ('At Point(row=673, column=128)', '.'), ('At Point(row=1904, column=159)', 'so that the king drove him away.'), ('At Point(row=1904, column=190)', '.'), ('At Point(row=2675, column=171)', 'of the sons of Korah.'), ('At Point(row=2675, column=191)', '.'), ('At Point(row=2773, column=204)', 'of the sons of Korah. A love song.'), ('At Point(row=2773, column=224)', '.'), ('At Point(row