In [10]:
# !pip3 install biblescrapeway

In [11]:
# Import necessary libraries

import pandas as pd
from biblescrapeway import query
import re

In [12]:
#  Retrieve the texts of Genesis 5:1-32 and save in a dictionary

verses = query("Genesis 5:1-32", version="NIV")
verses_dict = {i+1: verse.text for i, verse in enumerate(verses)}

# Print the dictionary to verify
print(verses_dict)


{1: 'This is the written account of Adam’s family line. When God created mankind, he made them in the likeness of God.', 2: 'He created them male and female and blessed them. And he named them “Mankind” when they were created.', 3: 'When Adam had lived 130 years, he had a son in his own likeness, in his own image; and he named him Seth.', 4: 'After Seth was born, Adam lived 800 years and had other sons and daughters.', 5: 'Altogether, Adam lived a total of 930 years, and then he died.', 6: 'When Seth had lived 105 years, he became the father of Enosh.', 7: 'After he became the father of Enosh, Seth lived 807 years and had other sons and daughters.', 8: 'Altogether, Seth lived a total of 912 years, and then he died.', 9: 'When Enosh had lived 90 years, he became the father of Kenan.', 10: 'After he became the father of Kenan, Enosh lived 815 years and had other sons and daughters.', 11: 'Altogether, Enosh lived a total of 905 years, and then he died.', 12: 'When Kenan had lived 70 years

In [13]:
#  Create functions to extract the necessary data

# Patterns for different information
fatherhood_pattern = re.compile(r"When (\w+) had lived (\d+) years.*?(?:he named him (\w+)|he became the father of (\w+)|he had a son(?: named (\w+))?)")
after_fatherhood_pattern = re.compile(r"After (?:he became the father of (\w+)|(\w+) was born|he named him (\w+)), (\w+) lived (\d+) years|(\w+) walked faithfully with God (\d+) years and had other sons and daughters")
lifespan_pattern = re.compile(r"Altogether, (\w+) lived a total of (\d+) years")

# Function to extract fatherhood information
def extract_fatherhood(verse_text):
    if fatherhood_match := fatherhood_pattern.search(verse_text):
        groups = fatherhood_match.groups()
        name = groups[0]
        age_at_fatherhood = int(groups[1])
        son_named = groups[2]
        son_fathered = groups[3]
        son_named_explicitly = groups[4]
        son = son_named if son_named is not None else (son_fathered if son_fathered is not None else son_named_explicitly)
        return {"Name": name, "Son": son, "Age at Fatherhood": age_at_fatherhood}
    return None

# Function to extract after fatherhood information
def extract_after_fatherhood(verse_text):
    if after_fatherhood_match := after_fatherhood_pattern.search(verse_text):
        groups = after_fatherhood_match.groups()
        son_fathered = groups[0]
        son_born = groups[1]
        son_named = groups[2]
        name = groups[3] if groups[3] is not None else groups[5]
        lifespan_after = groups[4] if groups[4] is not None else groups[6]
        son = son_fathered if son_fathered is not None else (son_born if son_born is not None else son_named)
        other_children = "Yes" if any([son_fathered, son_born, son_named, name == "Enoch"]) else "No"
        return {"Name": name, "Son": son, "Lifespan After First Child": int(lifespan_after), "Other Children": other_children}
    return None

# Function to extract lifespan information
def extract_lifespan(verse_text):
    if lifespan_match := lifespan_pattern.search(verse_text):
        name, total_lifespan = lifespan_match.groups()
        return {"Name": name, "Total Lifespan": int(total_lifespan)}
    return None

# Process each verse in the dictionary
def process_verses(verses_dict):
    data = {}
    for verse_num, verse_text in verses_dict.items():
        fatherhood_info = extract_fatherhood(verse_text)
        if fatherhood_info:
            name = fatherhood_info["Name"]
            if name not in data:
                data[name] = fatherhood_info
            else:
                data[name].update(fatherhood_info)

        after_fatherhood_info = extract_after_fatherhood(verse_text)
        if after_fatherhood_info:
            name = after_fatherhood_info["Name"]
            if name in data:
                data[name].update(after_fatherhood_info)
            else:
                data[name] = after_fatherhood_info

        lifespan_info = extract_lifespan(verse_text)
        if lifespan_info:
            name = lifespan_info["Name"]
            if name in data:
                data[name].update(lifespan_info)
            else:
                data[name] = lifespan_info
    return data


# Process the verses
data = process_verses(verses_dict)

# Convert data to DataFrame
df = pd.DataFrame(data.values())
# Update the "Son" column where "Name" is "Enoch" 
df.loc[df["Name"] == "Enoch", "Son"] = "Methuselah"

In [14]:
df

Unnamed: 0,Name,Son,Age at Fatherhood,Lifespan After First Child,Other Children,Total Lifespan
0,Adam,Seth,130,800,Yes,930
1,Seth,Enosh,105,807,Yes,912
2,Enosh,Kenan,90,815,Yes,905
3,Kenan,Mahalalel,70,840,Yes,910
4,Mahalalel,Jared,65,830,Yes,895
5,Jared,Enoch,162,800,Yes,962
6,Enoch,Methuselah,65,300,Yes,365
7,Methuselah,Lamech,187,782,Yes,969
8,Lamech,Noah,182,595,Yes,777


In [15]:
#  Save to a csv file
Gen5_data = df.to_csv('Gen5_data.csv')