In [22]:
import requests
from bs4 import BeautifulSoup
import pinecone
from sentence_transformers import SentenceTransformer
from dotenv import load_dotenv
import os

load_dotenv() 

def fetch_text(url):
    response = requests.get(url)
    soup = BeautifulSoup(response.text, 'html.parser')
    return soup.get_text()

def chunk_text(text):
    return text.split('\n')

In [28]:
def get_bible_verses():
    bible_url = 'https://openbible.com/textfiles/kjv.txt'
    bible_text = fetch_text(bible_url)
    print(f'Bible text length: {len(bible_text)} characters')
    bible_chunks = chunk_text(bible_text)
    print(f'Number of Bible chunks: {len(bible_chunks)}')
    print(f'Sample Bible chunks:')
    for chunk in bible_chunks[2:5]:
        print(f'- {chunk}')
    return bible_chunks[2:]

bible_chunks = get_bible_verses()

Bible text length: 4606957 characters
Number of Bible chunks: 31105
Sample Bible chunks:
- Genesis 1:1	In the beginning God created the heaven and the earth.
- Genesis 1:2	And the earth was without form, and void; and darkness [was] upon the face of the deep. And the Spirit of God moved upon the face of the waters.
- Genesis 1:3	And God said, Let there be light: and there was light.


In [16]:
def get_buddhist():
    buddhist_url = 'https://www.sacred-texts.com/bud/sbe10/sbe1000.htm'
    buddhist_text = fetch_text(buddhist_url)
    print(f'Buddhist text length: {len(buddhist_text)} characters')
    buddhist_chunks = chunk_text(buddhist_text)
    print(f'Number of Buddhist chunks: {len(buddhist_chunks)}')
    print(f'Sample Buddhist chunk: {buddhist_chunks[0]}')
    return buddhist_text, buddhist_chunks
    

In [None]:
def get_muslim():
    muslim_url = 'https://www.sacred-texts.com/isl/quran.htm'
    muslim_text = fetch_text(muslim_url)
    print(f'Muslim text length: {len(muslim_text)} characters')
    muslim_chunks = chunk_text(muslim_text)
    print(f'Number of Muslim chunks: {len(muslim_chunks)}')
    print(f'Sample Muslim chunk: {muslim_chunks[0]}')
    return muslim_text, muslim_chunks

In [11]:

# Get Pinecone API key from environment variable
pinecone_api_key = os.environ.get('PINECONE_API_KEY')

if not pinecone_api_key:
    raise ValueError('PINECONE_API_KEY environment variable is not set')

print(f'Pinecone API key: {pinecone_api_key[:5]}...{pinecone_api_key[-5:]}')


# Initialize Pinecone (replace with your actual API key and environment)
pinecone.init(api_key='YOUR_PINECONE_API_KEY', environment='YOUR_PINECONE_ENVIRONMENT')
index_name = 'religious_texts'

# Create index if it doesn't exist
if index_name not in pinecone.list_indexes():
    pinecone.create_index(index_name, dimension=384)  # Adjust dimension based on your embedding model

# Connect to the index
index = pinecone.Index(index_name)

# Initialize the embedding model
model = SentenceTransformer('all-MiniLM-L6-v2')

# Prepare and store data
for source, text in [('Bible', bible_text), ('Buddhist', buddhist_text), ('Muslim', muslim_text)]:
    chunks = chunk_text(text)
    for i, chunk in enumerate(chunks):
        # Create embedding
        embedding = model.encode(chunk).tolist()

        # Prepare metadata
        metadata = {
            'source': source,
            'chunk_id': i
        }

        # Upsert to Pinecone
        index.upsert(vectors=[(f'{source}_{i}', embedding, metadata)])

print('Sacred texts have been retrieved, chunked, and stored in the Pinecone database.')



Pinecone API key: 88884...7aa7d


NameError: name 'pinecone' is not defined