In [16]:
from bible_prep import BibleExtractor
from plotly import graph_objs as go
from typing import List, Dict

Now that we have our Bible extractor setup, let's check the distribution of chapters per book of the bible.

In [2]:
extractor = BibleExtractor()
bible_text = extractor.download_text()
chapters = extractor.extract_chapters(bible_text)
chapters

{5693: 'Genesis Chapter 1',
 11129: 'Genesis Chapter 2',
 15239: 'Genesis Chapter 3',
 19929: 'Genesis Chapter 4',
 25055: 'Genesis Chapter 5',
 28188: 'Genesis Chapter 6',
 33080: 'Genesis Chapter 7',
 36471: 'Genesis Chapter 8',
 40214: 'Genesis Chapter 9',
 44743: 'Genesis Chapter 10',
 48209: 'Genesis Chapter 11',
 51880: 'Genesis Chapter 12',
 55135: 'Genesis Chapter 13',
 57721: 'Genesis Chapter 14',
 61637: 'Genesis Chapter 15',
 64382: 'Genesis Chapter 16',
 67003: 'Genesis Chapter 17',
 70865: 'Genesis Chapter 18',
 76065: 'Genesis Chapter 19',
 82121: 'Genesis Chapter 20',
 84997: 'Genesis Chapter 21',
 89146: 'Genesis Chapter 22',
 92838: 'Genesis Chapter 23',
 95844: 'Genesis Chapter 24',
 105842: 'Genesis Chapter 25',
 110099: 'Genesis Chapter 26',
 115222: 'Genesis Chapter 27',
 122281: 'Genesis Chapter 28',
 125639: 'Genesis Chapter 29',
 130203: 'Genesis Chapter 30',
 136364: 'Genesis Chapter 31',
 144387: 'Genesis Chapter 32',
 149435: 'Genesis Chapter 33',
 152394: 'G

In [29]:
def count_books(chapters: Dict) -> Dict[str, int]:
    """
    Count the number of chapters in each book of the Bible.

    Args:
        chapters (Dict): Dictionary of chapters in the Bible.

    Returns:
        Dict[str, int]: Dictionary of book names and the number of chapters in each book.
    """
    book_counts = {}
    
    for index, chapter in chapters.items():
        book = chapter.split(" ")[:-2]
        book = " ".join(book)
        
        if book in book_counts:
            book_counts[book] += 1
        else:
            book_counts[book] = 1
    
    return book_counts

In [33]:
def plot_chapters_distribution(chp_grp_dict: Dict, title: str = "", bar_height: int = 20):
    """
    Plot the distribution of chapters by group.

    Args:
        chp_grp_dict (Dict): Dictionary of chapters grouped by name.
        title (str): Title of the plot.
        bar_height (int): Height of each bar in the plot.
    """

    fig = go.Figure()
    fig.add_trace(
        go.Bar(y=list(chp_grp_dict.keys()), x=list(chp_grp_dict.values()), orientation="h")
    )

    # Calculate the height based on the number of bars
    num_bars = len(chp_grp_dict)
    bar_height = bar_height
    fig_height = num_bars * bar_height

    # Update layout to ensure bars are as thick as the titles
    fig.update_layout(
        height=fig_height,
        margin=dict(l=200),  # Increase left margin to accommodate long titles
        yaxis=dict(automargin=True),
        title=title,
    )

    fig.show()

In [22]:
chp_grp_dict = count_books(chapters)

In [23]:
plot_chapters_distribution(chp_grp_dict, "Bible books distribution by chapters")

Annoyngly, one of the Psalms is named "Psalm" what created an extra bar above. Although another rule could be added to the `BibleExtractor` for now, let's just blame the dataset :)

Let's also split our data into New and Old testament so that we can have dedicated stats for each of them. We see that the last book is 2 Macabees. Since we are using python 3.8 (this behavior started in python 3.7, for the record) we can rely in the order that the keys have been kept in the dictionary, so we can safely assume that the indices after that one are all from the New Testament.

In [12]:
# Let's find the index of 2 Macabees
for index, chapter in chapters.items():
    if "2 Machabees" in chapter:
        my_beloved_index = index

# Here we break out our chapters in Net Test (nt) and Old Test (ot)
nt_chapters = {index: chapter for index, chapter in chapters.items() if index > my_beloved_index}
ot_chapters = {index: chapter for index, chapter in chapters.items() if index <= my_beloved_index}
nt_chapters

{4445726: 'Matthew Chapter 1',
 4450383: 'Matthew Chapter 2',
 4453840: 'Matthew Chapter 3',
 4456476: 'Matthew Chapter 4',
 4459989: 'Matthew Chapter 5',
 4468417: 'Matthew Chapter 6',
 4473282: 'Matthew Chapter 7',
 4476874: 'Matthew Chapter 8',
 4481372: 'Matthew Chapter 9',
 4486399: 'Matthew Chapter 10',
 4491999: 'Matthew Chapter 11',
 4496319: 'Matthew Chapter 12',
 4504697: 'Matthew Chapter 13',
 4512875: 'Matthew Chapter 14',
 4517319: 'Matthew Chapter 15',
 4524254: 'Matthew Chapter 16',
 4530900: 'Matthew Chapter 17',
 4534888: 'Matthew Chapter 18',
 4540736: 'Matthew Chapter 19',
 4545846: 'Matthew Chapter 20',
 4550087: 'Matthew Chapter 21',
 4556683: 'Matthew Chapter 22',
 4561968: 'Matthew Chapter 23',
 4568525: 'Matthew Chapter 24',
 4575064: 'Matthew Chapter 25',
 4580779: 'Matthew Chapter 26',
 4591895: 'Matthew Chapter 27',
 4600208: 'Matthew Chapter 28',
 4603930: 'Mark Chapter 1',
 4609265: 'Mark Chapter 2',
 4613260: 'Mark Chapter 3',
 4617223: 'Mark Chapter 4',
 

In [34]:
nt_grp_dict = count_books(nt_chapters)
ot_grp_dict = count_books(ot_chapters)

In [37]:
plot_chapters_distribution(nt_grp_dict, "New Testament books distribution by chapters", 25)

In [38]:
plot_chapters_distribution(ot_grp_dict, "Old Testament books distribution by chapters", 25)