In [1]:
import time
import pandas as pd
import Lib.scriptures.scripture_volume_page as sv

In [2]:
# Root url
root_url = 'https://www.lds.org/scriptures'
# Scripture volume ids
ot_id = 'ot'
nt_id = 'nt'
bm_id = 'bofm'
dc_id = 'dc-testament'
pgp_id = 'pgp'
volume_ids = [ot_id, nt_id, bm_id, dc_id, pgp_id]

In [3]:
# Get book lists for all scripture volumes
book_lists = {}
for id_ in volume_ids:
    # Current url
    url = '/'.join([root_url, id_])
    # Get content from url
    page_soup = sv.get_content_from_url(url)
    # Parse all text from page
    all_text_from_page = sv.parse_all_text_from_page(page_soup)
    # Add book list to book_lists
    book_lists[id_] = all_text_from_page
    # Add pause
    time.sleep(2)

In [4]:
# Define function for printing out all information for a given scripture volume
def print_volume_information(volume_id):
    print('Name:', book_lists[volume_id]['name'])
    print('Title:', book_lists[volume_id]['title'])
    print('Book list:')
    for id_, book, url in book_lists[volume_id]['books']:
        print('{0}\t{1}\t{2}'.format(id_, book, url))

In [5]:
# Print results for bm_id
volume_id = bm_id
print_volume_information(volume_id)

Name: Book of Mormon
Title: The Book of Mormon Another Testament of Jesus Christ
Book list:
1-ne	1 Nephi	https://www.lds.org/scriptures/bofm/1-ne?lang=eng
2-ne	2 Nephi	https://www.lds.org/scriptures/bofm/2-ne?lang=eng
jacob	Jacob	https://www.lds.org/scriptures/bofm/jacob?lang=eng
enos	Enos	https://www.lds.org/scriptures/bofm/enos/1?lang=eng
jarom	Jarom	https://www.lds.org/scriptures/bofm/jarom/1?lang=eng
omni	Omni	https://www.lds.org/scriptures/bofm/omni/1?lang=eng
w-of-m	Words of Mormon	https://www.lds.org/scriptures/bofm/w-of-m/1?lang=eng
mosiah	Mosiah	https://www.lds.org/scriptures/bofm/mosiah?lang=eng
alma	Alma	https://www.lds.org/scriptures/bofm/alma?lang=eng
hel	Helaman	https://www.lds.org/scriptures/bofm/hel?lang=eng
3-ne	3 Nephi	https://www.lds.org/scriptures/bofm/3-ne?lang=eng
4-ne	4 Nephi	https://www.lds.org/scriptures/bofm/4-ne/1?lang=eng
morm	Mormon	https://www.lds.org/scriptures/bofm/morm?lang=eng
ether	Ether	https://www.lds.org/scriptures/bofm/ether?lang=eng
moro	Moroni	h

In [6]:
# Get volume book information in format that can be converted to dataframe
all_scripture_book_info = []
for volume_id in volume_ids:
    # Get volume name and title
    volume_name = book_lists[volume_id]['name']
    volume_title = book_lists[volume_id]['title']
    # Loop over books for current volume and store
    for book_id, book_name, book_url in book_lists[volume_id]['books']:
        # Construct the book url
        book_url = '/'.join([root_url, volume_id, book_id])
        # Append volume and book information to all_scripture_book_info
        info_to_append = (volume_id, volume_name, volume_title, book_id, book_name, book_url)
        all_scripture_book_info.append(info_to_append)

In [7]:
# Create a dataframe with all scripture/book information
column_labels = ['volume_id', 'volume_name', 'volume_title', 'book_id', 'book_name', 'book_url']
info_df = pd.DataFrame(all_scripture_book_info, columns = column_labels)
# Print head of dataframe
info_df.head()

Unnamed: 0,volume_id,volume_name,volume_title,book_id,book_name,book_url
0,ot,Old Testament,The Old Testament,gen,Genesis,https://www.lds.org/scriptures/ot/gen
1,ot,Old Testament,The Old Testament,ex,Exodus,https://www.lds.org/scriptures/ot/ex
2,ot,Old Testament,The Old Testament,lev,Leviticus,https://www.lds.org/scriptures/ot/lev
3,ot,Old Testament,The Old Testament,num,Numbers,https://www.lds.org/scriptures/ot/num
4,ot,Old Testament,The Old Testament,deut,Deuteronomy,https://www.lds.org/scriptures/ot/deut


In [8]:
# Write dataframe to file
output_path = 'data/scriptures'
info_df.to_csv('/'.join([output_path, 'all_scripture_books.csv']), index=False)