Let's import the dataset and take a look at its structure:
(caches to ~/.cache/huggingface/datasets)

In [None]:
from datasets import load_dataset

ds = load_dataset("JDRJ/kjv-bible")

print(ds)  # shows splits and some info
print(ds["train"].features)  # shows column names & types
print(ds["train"][0])  # first verse as dict
print(len(ds["train"]))  # total verses


Let's get a sense of the data:

In [None]:
for i in range(3):
    ex = ds["train"][i]
    print(f"{ex['Book']} {ex['Chapter']}:{ex['Verse']}   {ex['Text']}")

Get a list of quoted verses:

In [None]:
# Create list of "Book Chapter:Verse Text" strings – perfect for simple search or RAG
verses = [
    f"{ex['Book']} {ex['Chapter']}:{ex['Verse']} {ex['Text']}" for ex in ds["train"]
]

# Quick test
print(verses[1])  # first verse
print(verses[31100])  # near the end – Revelation

If you want readable book names (e.g. "Genesis" instead of "Ge"), add a simple mapping dictionary:

In [None]:
# KJV book name mapping (standard 66 books)
book_map = {
    "Ge": "Genesis",
    "Ex": "Exodus",
    "Le": "Leviticus",
    "Nu": "Numbers",
    "De": "Deuteronomy",
    "Jos": "Joshua",
    "Jg": "Judges",
    "Ru": "Ruth",
    "1Sa": "1 Samuel",
    "2Sa": "2 Samuel",
    "1Ki": "1 Kings",
    "2Ki": "2 Kings",
    "1Ch": "1 Chronicles",
    "2Ch": "2 Chronicles",
    "Ezr": "Ezra",
    "Ne": "Nehemiah",
    "Es": "Esther",
    "Jb": "Job",
    "Ps": "Psalms",
    "Pr": "Proverbs",
    "Ec": "Ecclesiastes",
    "So": "Song of Solomon",
    "Is": "Isaiah",
    "Je": "Jeremiah",
    "La": "Lamentations",
    "Eze": "Ezekiel",
    "Da": "Daniel",
    "Ho": "Hosea",
    "Jl": "Joel",
    "Am": "Amos",
    "Ob": "Obadiah",
    "Jon": "Jonah",
    "Mi": "Micah",
    "Na": "Nahum",
    "Hab": "Habakkuk",
    "Zep": "Zephaniah",
    "Hg": "Haggai",
    "Zec": "Zechariah",
    "Mal": "Malachi",
    "Mt": "Matthew",
    "Mk": "Mark",
    "Lk": "Luke",
    "Jn": "John",
    "Ac": "Acts",
    "Ro": "Romans",
    "1Co": "1 Corinthians",
    "2Co": "2 Corinthians",
    "Ga": "Galatians",
    "Eph": "Ephesians",
    "Php": "Philippians",
    "Col": "Colossians",
    "1Th": "1 Thessalonians",
    "2Th": "2 Thessalonians",
    "1Ti": "1 Timothy",
    "2Ti": "2 Timothy",
    "Ti": "Titus",
    "Phm": "Philemon",
    "He": "Hebrews",
    "Ja": "James",
    "1Pe": "1 Peter",
    "2Pe": "2 Peter",
    "1Jn": "1 John",
    "2Jn": "2 John",
    "3Jn": "3 John",
    "Jd": "Jude",
    "Re": "Revelation",
}

# Now print with full names
for i in range(3):
    ex = ds["train"][i]
    full_book = book_map.get(ex["Book"], ex["Book"])  # fallback to abbr if missing
    print(f"{full_book} {ex['Chapter']}:{ex['Verse']}   {ex['Text']}")
