In [1]:
import requests
from bs4 import BeautifulSoup

In [2]:
url = 'https://www.biblestudytools.com/bible-versions/'

In [3]:
def fetch_html_from_url(url):
    try:
        # URL of the website you want to scrape
        
        # Send an HTTP GET request to the URL
        response = requests.get(url)

        # Check if the request was successful
        response.raise_for_status()

        # Parse the HTML content of the page
        soup = BeautifulSoup(response.content, 'html.parser')

        # Now, you can use BeautifulSoup to extract data
        # For example, let's extract and print all the links on the page:
    #     for link in soup.find_all('a'):
    #         print(link.get('href'))

    except requests.exceptions.RequestException as e:
        print(f"Failed to make the request: {e}")
    except Exception as e:
        print(f"An error occurred: {e}")
        
    return soup 

soup_obj = fetch_html_from_url(url)

In [4]:
version_body = soup_obj.find_all("div", class_="p-2 w-full")

In [5]:
len(version_body)

9

In [6]:
version_dict = {}
for version in version_body: 
    # Retrieving ceby
    version_info = version.find("a")
    version_name_list = version_info.text.strip().split()
    version_name = ' '.join(version_name_list[:-1])
    version_link = version_info.get('href')
    version_alias = version_name_list[-1]
    version_dict[version_alias] = [version_name, version_link]
    print(version_name)
    print(version_link)
    print(version_alias)
    print()

Holman Christian Standard Bible
https://www.biblestudytools.com/csb/
CSB

English Standard Version
https://www.biblestudytools.com/esv/
ESV

King James Version
https://www.biblestudytools.com/kjv/
KJV

The Message Bible
https://www.biblestudytools.com/msg/
MSG

New American Standard Bible
https://www.biblestudytools.com/nas/
NAS

New International Version
https://www.biblestudytools.com/niv/
NIV

New King James Version
https://www.biblestudytools.com/nkjv/
NKJV

New Living Translation
https://www.biblestudytools.com/nlt/
NLT

New Revised Standard
https://www.biblestudytools.com/nrs/
NRS



In [7]:
version_dict['ESV'][1]

'https://www.biblestudytools.com/esv/'

In [8]:
url_new = version_dict['ESV'][1]

In [9]:
esv_soup_obj = fetch_html_from_url(url_new)

In [10]:
books_in_esv = esv_soup_obj.find_all("div", class_="text-center")

In [11]:
books_dict={}

for alias in version_dict.keys():
    books_list = []
    # get URL for particular book
    alias_link =  version_dict[alias][1]
    
    # Load books page
    book_data = fetch_html_from_url(alias_link)
    
    # Grab book classes
    books_in_vers = book_data.find_all("div", class_="text-center")
    
    # Get book names and their links
    for book in books_in_vers:
        book_info = book.find('a')
        if book_info:
            book_link = book_info.get('href')
            book_name = book_info.text.strip()
            books_list.append((book_name,book_link))

    books_dict[alias] = books_list[:-1]

In [12]:
len(books_in_esv)

70

In [13]:
books_dict.keys()

dict_keys(['CSB', 'ESV', 'KJV', 'MSG', 'NAS', 'NIV', 'NKJV', 'NLT', 'NRS'])

In [14]:
books_dict['CSB'][:3]

[('Genesis', 'https://www.biblestudytools.com/csb/genesis/'),
 ('Exodus', 'https://www.biblestudytools.com/csb/exodus/'),
 ('Leviticus', 'https://www.biblestudytools.com/csb/leviticus/')]

In [17]:
books_dict['CSB'][0][0]

'Genesis'

In [112]:
import re

In [119]:
def get_chapters(book, book_link):
    chapter_store = []
    
    # Fetch book page and retrieve information on chapters
    book_info = fetch_html_from_url(book_link)
    
    # get number of chapters in book
    num_chaps = len(book_info.find('div', class_ = 'grid').find_all('a', class_='text-center'))
    
    for chapter in range(1,num_chaps + 1):
        
        # chapter link 
        temp_chap_link = f'{book_link}{chapter}.html'
        
        # fetch chapter data
        chap_data = fetch_html_from_url(temp_chap_link)
        
        # fetch chapter content
        main_chap_data = chap_data.find_all('div', class_='leading-8')
        chap_headings = main_chap_data[0].find_all('h3')
        chap_headings_text = [heading.text for heading in chap_headings]
        
        
        # fetch verses
        verses = [
            re.sub(' +', ' ', verse.text.replace('\n', ' ').strip())
            for verse in main_chap_data
        ]
        
        # Store chapter data
        chapter_store.append(verses)
    
    return chapter_store

In [120]:
book_name, book_link = books_dict['CSB'][1]

In [121]:
chaps_gotten = get_chapters(book_name, book_link)

In [88]:
chaps_gotten[1].find('div', class_='leading-8').find('a').text

'1'

In [94]:
chaps_gotten[1].find_all('div', class_='leading-8')[0].text.strip()

"Moses' Birth and Adoption 1 Now a man from the family of Levi married a Levite woman."

In [118]:
chaps_gotten[4]

IndexError: list index out of range