In [73]:
import requests
from bs4 import BeautifulSoup
from dotenv import load_dotenv

load_dotenv() 

def fetch_text(url):
    response = requests.get(url)
    soup = BeautifulSoup(response.text, 'html.parser')
    return soup.get_text()

def chunk_text(text):
    return text.split('\n')

In [None]:
from openai import OpenAI
import os

# Set up OpenAI API key
client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))

def generate_embedding(text):
    embeddings = []
    try:
        response = client.embeddings.create(
            model="text-embedding-3-large",
            input=text
        )
        
        embedding = response.data[0].embedding
        embeddings.append(embedding)
    except Exception as e:
        print(f"Error generating embedding for text: {text}. Error: {str(e)}")
    
    return embeddings

generate_embedding("Hello world")

In [91]:
import random

def print_random_verses(verses, num_verses=5):
    """
    Print a specified number of random verses from the list of verses.
    
    :param verses: List of tuples containing (verse_id, verse_text)
    :param num_verses: Number of random verses to print (default: 5)
    """
    selected_verses = random.sample(verses, min(num_verses, len(verses)))
    
    print(f'Printing {num_verses} random verses:')
    for verse_id, verse_text in selected_verses:
        print(f'{verse_id}:')
        print(verse_text)
        print('---')

# Christianity

In [92]:
import random

def get_bible_verses():
    bible_url = 'https://openbible.com/textfiles/kjv.txt'
    bible_text = fetch_text(bible_url)
    print(f'Bible text length: {len(bible_text)} characters')
    bible_chunks = chunk_text(bible_text)
    print(f'Number of Bible chunks: {len(bible_chunks)}')
    
    parsed_verses = []
    for chunk in bible_chunks[2:]:  # Skip the first two lines
        parts = chunk.split('\t', 1)
        if len(parts) == 2:
            verse, text = parts
            parsed_verses.append((verse.strip(), text.strip()))
    
    return parsed_verses

bible_verses = get_bible_verses()

print_random_verses(bible_verses)


Bible text length: 4606957 characters
Number of Bible chunks: 31105
Printing 5 random verses:
Joshua 18:15:
And the south quarter [was] from the end of Kirjathjearim, and the border went out on the west, and went out to the well of waters of Nephtoah:
---
Genesis 26:11:
And Abimelech charged all [his] people, saying, He that toucheth this man or his wife shall surely be put to death.
---
2 Samuel 8:18:
And Benaiah the son of Jehoiada [was over] both the Cherethites and the Pelethites; and Davidâs sons were chief rulers.
---
Exodus 6:17:
The sons of Gershon; Libni, and Shimi, according to their families.
---
Judges 9:6:
And all the men of Shechem gathered together, and all the house of Millo, and went, and made Abimelech king, by the plain of the pillar that [was] in Shechem.
---


In [93]:
import json

def save_verses_to_json(verses, filename='bible.json'):
    """
    Save the verses to a JSON file.
    
    :param verses: List of tuples containing (verse_id, verse_text)
    :param filename: Name of the JSON file to save the verses (default: 'bible.json')
    """
    verses_dict = [{"verse_id": verse_id, "verse_text": verse_text} for verse_id, verse_text in verses]
    
    with open(filename, 'w', encoding='utf-8') as f:
        json.dump(verses_dict, f, ensure_ascii=False, indent=4)
    
    print(f'Verses saved to {filename}')

# Save the Bible verses to a JSON file
save_verses_to_json(bible_verses)


Verses saved to bible.json


# Islam 

In [95]:
import requests

# Muhammad Asad translation, (surah) number followed by the verse (ayah) number.
def get_all_verses_in_english():
    base_url = 'http://api.alquran.cloud/v1/surah/{}/en.asad'
    verses = []
    
    for surah_number in range(1, 115):  # Surahs are numbered from 1 to 114
        url = base_url.format(surah_number)
        response = requests.get(url)
        
        if response.status_code != 200:
            print(f"Failed to retrieve Surah {surah_number}")
            continue
        
        data = response.json()
        if surah_number == 1:
            print(data)
        for ayah in data['data']['ayahs']:
            ayah_number = ayah['numberInSurah']
            verse_id = f"{surah_number}:{ayah_number}"
            text_content = ayah['text']
            verses.append((verse_id, text_content))
    return verses

islamic_verses = get_all_verses_in_english()

print_random_verses(islamic_verses)

{'code': 200, 'status': 'OK', 'data': {'number': 1, 'name': 'سُورَةُ ٱلْفَاتِحَةِ', 'englishName': 'Al-Faatiha', 'englishNameTranslation': 'The Opening', 'revelationType': 'Meccan', 'numberOfAyahs': 7, 'ayahs': [{'number': 1, 'text': 'In the name of God, The Most Gracious, The Dispenser of Grace:', 'numberInSurah': 1, 'juz': 1, 'manzil': 1, 'page': 1, 'ruku': 1, 'hizbQuarter': 1, 'sajda': False}, {'number': 2, 'text': 'All praise is due to God alone, the Sustainer of all the worlds,', 'numberInSurah': 2, 'juz': 1, 'manzil': 1, 'page': 1, 'ruku': 1, 'hizbQuarter': 1, 'sajda': False}, {'number': 3, 'text': 'The Most Gracious, the Dispenser of Grace,', 'numberInSurah': 3, 'juz': 1, 'manzil': 1, 'page': 1, 'ruku': 1, 'hizbQuarter': 1, 'sajda': False}, {'number': 4, 'text': 'Lord of the Day of Judgment!', 'numberInSurah': 4, 'juz': 1, 'manzil': 1, 'page': 1, 'ruku': 1, 'hizbQuarter': 1, 'sajda': False}, {'number': 5, 'text': 'Thee alone do we worship; and unto Thee alone do we turn for aid.

In [96]:
import json

def save_verses_to_json(verses, filename):
    verses_dict = [{"verse_id": verse_id, "verse_text": verse_text} for verse_id, verse_text in verses]
    
    with open(filename, 'w', encoding='utf-8') as f:
        json.dump(verses_dict, f, ensure_ascii=False, indent=4)
    
    print(f'Verses saved to {filename}')

# Save the Islamic verses to a JSON file
save_verses_to_json(islamic_verses, 'Quran.json')


Verses saved to Quran.json
