# Institute Manual for the Book of Mormon

In [5]:
link = 'https://www.churchofjesuschrist.org/study/manual/book-of-mormon-student-manual?lang=eng'

In [2]:
def decode_str(value):
    return value.encode('latin-1').decode('utf-8', 'replace') if value is not None else None

In [1]:
import requests
from bs4 import BeautifulSoup
import re
import json

In [6]:
def get_all_links(url):
    try:
        response = requests.get(url)
        response.raise_for_status()
    except requests.exceptions.RequestException as e:
        print("Error fetching the webpage:", e)
        return []

    soup = BeautifulSoup(response.content, "html.parser")
    links = []
    for link in soup.find_all("a", href=True):
        links.append(link["href"])

    return links

# UNCOMMENT THIS TO GET ALL LINKS
# links = get_all_links(link)
links = []
for link in links:
    print(link)


/study/books-and-lessons/institute?lang=eng
/study/manual/book-of-mormon-student-manual?lang=eng
/study/manual/book-of-mormon-student-manual/title-page?lang=eng
/study/manual/book-of-mormon-student-manual/introduction?lang=eng
/study/manual/book-of-mormon-student-manual/chapter-1-keystone-of-our-religion?lang=eng
/study/manual/book-of-mormon-student-manual/chapter-2-1-nephi-1-5?lang=eng
/study/manual/book-of-mormon-student-manual/chapter-3-1-nephi-6-11?lang=eng
/study/manual/book-of-mormon-student-manual/chapter-4-1-nephi-12-15?lang=eng
/study/manual/book-of-mormon-student-manual/chapter-5-1-nephi-16-18?lang=eng
/study/manual/book-of-mormon-student-manual/chapter-6-1-nephi-19-22?lang=eng
/study/manual/book-of-mormon-student-manual/chapter-7-2-nephi-1-3?lang=eng
/study/manual/book-of-mormon-student-manual/chapter-8-2-nephi-4-8?lang=eng
/study/manual/book-of-mormon-student-manual/chapter-9-2-nephi-9-10?lang=eng
/study/manual/book-of-mormon-student-manual/chapter-10-2-nephi-11-16?lang=eng

In [8]:
filtered_links = [link for link in links if 'chapter' in link]
prefix = 'https://www.churchofjesuschrist.org/'
all_links = [prefix + link for link in filtered_links]

In [11]:
for link in all_links:
    print(link)

https://www.churchofjesuschrist.org//study/manual/book-of-mormon-student-manual/chapter-1-keystone-of-our-religion?lang=eng
https://www.churchofjesuschrist.org//study/manual/book-of-mormon-student-manual/chapter-2-1-nephi-1-5?lang=eng
https://www.churchofjesuschrist.org//study/manual/book-of-mormon-student-manual/chapter-3-1-nephi-6-11?lang=eng
https://www.churchofjesuschrist.org//study/manual/book-of-mormon-student-manual/chapter-4-1-nephi-12-15?lang=eng
https://www.churchofjesuschrist.org//study/manual/book-of-mormon-student-manual/chapter-5-1-nephi-16-18?lang=eng
https://www.churchofjesuschrist.org//study/manual/book-of-mormon-student-manual/chapter-6-1-nephi-19-22?lang=eng
https://www.churchofjesuschrist.org//study/manual/book-of-mormon-student-manual/chapter-7-2-nephi-1-3?lang=eng
https://www.churchofjesuschrist.org//study/manual/book-of-mormon-student-manual/chapter-8-2-nephi-4-8?lang=eng
https://www.churchofjesuschrist.org//study/manual/book-of-mormon-student-manual/chapter-9-2-

In [12]:
def get_page_source(url):
    try:
        response = requests.get(url)
        response.raise_for_status()  # Raise an exception if the response is not successful (status code >= 400)
        return response.text
    except requests.exceptions.RequestException as e:
        print(f"Error occurred while fetching {url}: {e}")
        return None

In [13]:
page_sources = []
for link in all_links:
    page_source = get_page_source(link)
    page_sources.append((link, page_source))

In [111]:
with open("manual.json", "w") as json_file:
    json.dump(page_sources, json_file)

In [3]:
with open("manual_page_source.json", "r") as f:
    page_sources = json.load(f)

In [4]:
def get_sections(page_source):
    soup = BeautifulSoup(page_source, "html.parser")
    # Find all <a> tags with href attributes that start with '/study/'
    a_tags = soup.find_all('a', href=True)

    # Replace the matching href attributes
    for a_tag in a_tags:
        if a_tag['href'].startswith('/study/'):
            a_tag['href'] = 'https://www.churchofjesuschrist.org' + a_tag['href']

    sections = []
    for header in soup.find_all("header"):
        if header.find("a", class_="scripture-ref"):
            section = header.find_parent("section")
            if section:
                a_elements = header.find_all("a", class_="scripture-ref")
                hrefs = [a.get("href") for a in a_elements]
                ref = [decode_str(a.get_text()) for a in a_elements]
                h3 = header.find("h3")
                if not h3:
                    continue
                for a in h3.find_all('a'):
                    a.extract()
                title = h3.get_text(strip=True)

                title = ''.join(h3.find_all(string=True, recursive=False))
                if title[0] == '.':
                    title = title[1:]

                ul = section.find("ul")
                content = ""
                if ul: #Institute manual has ul
                    content = ul.get_text()
                else:
                    p_elements = section.find_all('p')
                    content = '\n'.join([p.get_text() for p in p_elements])

                
                sections.append({
                    "hrefs": hrefs,
                    "ref": ref,
                    "title": decode_str(title),
                    "content": decode_str(content),
                    "html": decode_str(section.prettify())
                })
    return sections


In [5]:
chapters = []
for page in page_sources:
    sections = get_sections(page[1])
    chapters.append((page[0], sections))

In [6]:
chapters = chapters[0:56]

In [9]:
bom_abbreviations = {
    '1-ne' : '1 Nephi',
    '2-ne' : '2 Nephi',
    'jacob' : 'Jacob',
    'enos' : 'Enos',
    'jarom' : 'Jarom',
    'omni' : 'Omni',
    'w-of-m' : 'Words of Mormon',
    'mosiah' : 'Mosiah',
    'alma' : 'Alma',
    'hel' : 'Helaman',
    '3-ne' : '3 Nephi',
    '4-ne' : '4 Nephi',
    'morm' : 'Mormon',
    'ether' : 'Ether',
    'moro' : 'Moroni'
}

scripture_map = {
    'bom' : 'Book of Mormon',
    'ot' : 'Old Testament',
    'nt' : 'New Testament',
    'pgp' : 'Pearl of Great Price',
    'dc-testament' : 'Doctrine and Covenants'
}

file_path = 'scriptures-json/book-of-mormon.json'
bom_chapters = {}
with open(file_path, 'r') as json_file:
    book_of_mormon_data = json.load(json_file)

for book in book_of_mormon_data['books']:
    bom_chapters[book['book']] = len(book['chapters'])



In [10]:
def extract_scripture_info(link):
    # Remove the '?lang=eng' part from the link if it exists
    link = link.split('?')[0]

    # Split the link by '/'
    link_parts = link.split('/')

    # Extract the scripture, book, and the third element containing chapter and verses
    work = link_parts[5]
    book = link_parts[6]
    if book not in bom_abbreviations.keys():
        return None, None, None, None

    third_element = link_parts[7]
    part = third_element.split('.')
    chapter = part[0]
    if (len(part) < 2):
        return work, book, chapter, range(1, bom_chapters[bom_abbreviations[book]])
    verses = part[1]
    verses = verses.split(',')

    individual_verses = []
    for verse_range in verses:
        # Check if the verse range is in the form of '1-2'
        if '-' in verse_range:
            verse_range = verse_range.split('-')
            start_verse = verse_range[0]
            end_verse = verse_range[1]
            while int(start_verse)!= 0 or int(end_verse)!= 0:
                individual_verses.append(start_verse)
                start_verse = int(start_verse) + 1
                if int(start_verse) > int(end_verse):
                    break
        else:
            individual_verses.append(verse_range)

    return work, book, chapter, individual_verses

In [11]:
bom_manual_ref_map = {}
for chapter in chapters:
    for section in chapter[1]:
        for href in section['hrefs']:
            work, book, chapter_num, verses = extract_scripture_info(href)
            if (work == None):
                continue
            current_level = bom_manual_ref_map
            for level in (work, book, chapter_num):
                current_level = current_level.setdefault(level, {})
            for verse in verses:
                if verse not in current_level:
                    current_level[verse] = []
                current_level[verse].append({
                    'scripture': section['ref'],
                    'header': section['title'],
                    'content': section['content'],
                    'html': section['html']                  
                })
        

In [14]:
with open("svelte-app/src/jsons/bom_manual_map.json", 'w') as f:
    json.dump(bom_manual_ref_map, f)

# Come Follow Me for the Book of Mormon

In [None]:
cfm_link ='https://www.churchofjesuschrist.org/study/manual/come-follow-me-for-individuals-and-families-book-of-mormon-2020?lang=eng'
# UNCOMMENT THIS TO GET ALL LINKS
# cfm_links = get_all_links(link)
cfm_links = []
filtered_links = []
for link in cfm_links:
    if re.search(r'2020/\d+', link):
        filtered_links.append(link)

prefix_string = 'https://www.churchofjesuschrist.org'
links = [prefix_string + link for link in filtered_links]
page_sources = [get_page_source(link) for link in links]



Or load it in...

In [106]:
def load_page_sources_from_json(file_path):
    with open(file_path, "r") as json_file:
        return json.load(json_file)
page_sources = load_page_sources_from_json('resources/come_follow_me_2020_page_source.json')

In [89]:
print(page_sources[0])

['https://www.churchofjesuschrist.org/study/manual/come-follow-me-for-individuals-and-families-book-of-mormon-2020/01?lang=eng', '<!DOCTYPE html>\n<html lang="en">\n    <head>\n        <meta charset="utf-8">\n        <title data-react-helmet="true">December 30–January\xa05. Introductory Pages of the Book of Mormon: “Another Testament of Jesus Christ”</title>\n        <meta name="version" content="3.28.0">\n        <meta name="viewport" content="width=device-width, initial-scale=1, minimum-scale=1">\n        <meta data-react-helmet="true" name="Search.doc-aid" content="138276781"/><meta data-react-helmet="true" name="title" content="December 30–January\xa05. Introductory Pages of the Book of Mormon: “Another Testament of Jesus Christ”"/><meta data-react-helmet="true" name="description" content="December 30–January\xa05. Introductory Pages of the Book of Mormon: “Another Testament of Jesus Christ”"/><meta data-react-helmet="true" property="og:image" content="https://assets.churchofjesusc

In [107]:
chapters = [(page[0], get_sections(page[1])) for page in page_sources]

In [78]:
print(chapters[1][1][0]['ref'][0])

1�Nephi 1?6


In [108]:
bom_cfm_ref_map = {}
for chapter in chapters:
    for section in chapter[1]:
        for href in section['hrefs']:
            work, book, chapter, verses = extract_scripture_info(href)
            if (work == None):
                continue
            current_level = bom_cfm_ref_map
            for level in (work, book, chapter):
                current_level = current_level.setdefault(level, {})
            for verse in verses:
                if verse not in current_level:
                    current_level[verse] = []
                current_level[verse].append({
                    'scripture': section['ref'],
                    'header': section['title'],
                    'content': section['content'],  
                    'html': section['html']                 
                })

In [122]:
print(bom_cfm_ref_map['bofm']['1-ne']['1']['1']

[{'scripture': ['1\xa0Nephi 1–6'], 'header': 'The scriptures are of great worth.', 'content': '1\xa0Nephi 1–6\nThe first six chapters of the Book of Mormon contain many references to sacred books, sacred records, and the word of the Lord. As you read 1\xa0Nephi 1–6, what do you learn about why the word of God is “of great worth”? (1\xa0Nephi 5:21). What do these passages teach you about the scriptures? What do you find that inspires you to search the scriptures with greater commitment?\nSee also “Scriptures Legacy” (video, ChurchofJesusChrist.org).', 'html': '<section>\n <header>\n  <p class="scripture-title" data-aid="140517897" id="scripture_title1">\n   <a class="scripture-ref" href="https://www.churchofjesuschrist.org/study/scriptures/bofm/1-ne/1?lang=eng">\n    1\xa0Nephi 1–6\n   </a>\n  </p>\n  <h3 data-aid="140517898" id="title3">\n   The scriptures are of great worth.\n  </h3>\n </header>\n <p data-aid="140517899" id="p3">\n  The first six chapters of the Book of Mormon contain

In [113]:
with open("bom_cfm_map.json", 'w') as f:
    json.dump(bom_cfm_ref_map, f)