In [1]:
import json
from collections import defaultdict

In [2]:
def get_verse_info(src_text):
    # Get Book with number of chapters and number of verses
    # dict of book name to array of numbers for # of verses
    with open(src_text) as f:
        text = json.load(f)
        info = defaultdict(list)
        for verse_info in text:
            book_name = verse_info['book_name']
            chap_num = int(verse_info['chapter_id']) - 1
            if chap_num >= len(info[book_name]):
                info[book_name].append(0)
            info[book_name][chap_num] += 1
    
    return info
ot = get_verse_info('esv_ot.txt')
# for book_name, verses in ot.items():
#     print(book_name, len(verses))
nt = get_verse_info('esv_nt.txt')
# for book_name, verses in nt.items():
#     print(book_name, len(verses))
bible = {**ot, **nt}
for book_name, verses in bible.items():
    print(book_name, len(verses))

Genesis 50
Exodus 40
Leviticus 27
Numbers 36
Deuteronomy 34
Joshua 24
Judges 21
Ruth 4
1 Samuel 31
2 Samuel 24
1 Kings 22
2 Kings 25
1 Chronicles 29
2 Chronicles 36
Ezra 10
Nehemiah 13
Esther 10
Job 42
Psalm 150
Proverbs 31
Ecclesiastes 12
Song of Solomon 8
Isaiah 66
Jeremiah 52
Lamentations 5
Ezekiel 48
Daniel 12
Hosea 14
Joel 3
Amos 9
Obadiah 1
Jonah 4
Micah 7
Nahum 3
Habakkuk 3
Zephaniah 3
Haggai 2
Zechariah 14
Malachi 4
Matthew 28
Mark 16
Luke 24
John 21
Acts 28
Romans 16
1 Corinthians 16
2 Corinthians 13
Galatians 6
Ephesians 6
Philippians 4
Colossians 4
1 Thessalonians 5
2 Thessalonians 3
1 Timothy 6
2 Timothy 4
Titus 3
Philemon 1
Hebrews 13
James 5
1 Peter 5
2 Peter 3
1 John 5
2 John 1
3 John 1
Jude 1
Revelation 22


The rule is: if the line only consists of numbers, then the next line consists of the verse
If it starts with a number and then text, it will be that verse's text.
Maybe get the # of verse per chapter from ESV, and apply it here. Hard to distinguish between chapters in this text

In [216]:
def start_new_verse(line):
    try:
        verse_num = int(line)
        return True, len(line)
    except:
        for length in range(3, 0, -1):
            try:
                verse_num = int(line[:length])
                return True, length
            except:
                pass
        return False, -1

def clean_nlt(bible_info, nlt_src, nlt_dest):
    with open(nlt_src, 'r') as src:
        lines = src.readlines()
    
    nlt = defaultdict(lambda: defaultdict(list))
    # dict of book name to chapter to list of verses
    cur_book_name = ''
    book_names = []
    expected_book_idx = 0
    is_read_book_names = False
    current_verse = ""
    current_chapter = 1
    for line in lines:
        line = line.strip()
        if line == 'Book Names':
            is_read_book_names = True
        elif is_read_book_names:
            if line:
                book_names.append(line)
            else:
                is_read_book_names = False
        else: # Reading verses
            if expected_book_idx < len(book_names) and line == book_names[expected_book_idx]:
                if current_verse.strip():
                    nlt[cur_book_name][current_chapter].append(current_verse.strip())
                cur_book_name = line
                current_chapter = 1
                expected_book_idx += 1
                current_verse = ''
            else:
                new_verse, length = start_new_verse(line)
                if new_verse: # starting a new verse
                    current_verse = current_verse.strip()
                    if current_verse:
                        nlt[cur_book_name][current_chapter].append(current_verse)
                    
                    # Update chapter after because line will refer to next chapter
                    if current_chapter - 1 < len(bible[cur_book_name]):
                        if len(nlt[cur_book_name][current_chapter]) == bible[cur_book_name][current_chapter - 1]:
                            current_chapter += 1
                    current_verse = line[length:]
                else: # Continuting the current verse
                    current_verse += ' {}'.format(line)
    return nlt

def is_valid(gt, nlt):
    if len(gt) != len(nlt): # Number of books
        print(f'Exptected {len(gt)} # of books, but got {len(nlt)} # of books')
        return False
    
    for book_name in gt:
        if book_name not in nlt:
            print(f'{book_name} not found in NLT')
            return False
        if len(gt[book_name]) != len(nlt[book_name]):
            print(f'Expected {len(gt[book_name])} chapters for {book_name} but got {len(nlt[book_name])}')
            return False
        for chap_idx, verse_counts in enumerate(gt[book_name], 1):
            if len(nlt[book_name][chap_idx]) != verse_counts:
                print(f'Expected {verse_counts} verses for {book_name} {chap_idx} but got {len(nlt[book_name][chap_idx])} verses')
                return False
    
    return True

nlt = clean_nlt(bible, 'nlt_new.txt', 'nlt_clean.txt')
# for name, data in nlt.items():
#     print(name, len(data))
print(is_valid(bible, nlt))
print(bible['Job'][0], len(nlt['Job'][1]))
for verse_idx, verse in enumerate(nlt['Exodus'][40], 1):
    print(verse_idx, verse)
    print()
# for chapter, verses in nlt['Zechariah'].items():
#     print(chapter, verses)
for name, data in nlt.items():
    print(name, len(data))

Expected 40 chapters for Exodus but got 41
False
22 22
1 the beautifully crafted garments to be worn while ministering in the Holy Place- -the holy garments for Aaron the priest and for his sons to wear while on duty.

2 So the people of Israel followed all of the LORD's instructions to Moses.

3 Moses inspected all their work and blessed them because it had been done as the LORD had commanded him.

4 The LORD now said to Moses,

5 "Set up the Tabernacle on the first day of the new year.

6 Place the Ark of the Covenant inside, and install the inner curtain to enclose the Ark within the Most Holy Place.

7 Then bring in the table, and arrange the utensils on it. And bring in the lampstand, and set up the lamps.

8 "Place the incense altar just outside the inner curtain, opposite the Ark of the Covenant. Set up the curtain made for the entrance of the Tabernacle.

9 Place the altar of burnt offering in front of the Tabernacle entrance.

10 Set the large washbasin between the Tabernacle 