In [2]:
from osman import Osman, OsmanConfig
import re

In [3]:
OS_HOST_URL="http://localhost:9200"
INDEX_NAME = "bible"
ID_KEY = "id"

In [4]:
# Open the file and read its contents
with open('bible.txt', 'r') as file:
    bible_text = file.read()

In [5]:

# Split the text using regular expressions
verses = re.split(r'(\d+:\d+)', bible_text)


In [6]:
bible_dict = {}
# Iterate in steps of 2 starting from 1, since 0 is likely an empty string before the first verse
for i in range(1, len(verses), 2):
    verse_number = verses[i]
    verse_text = verses[i + 1].strip()  # Use strip() to remove any leading/trailing whitespace
    bible_dict[verse_number] = verse_text


In [7]:
os_man = Osman(OsmanConfig(host_url=OS_HOST_URL))

In [8]:
mapping = {
    "mappings": {
        "properties": {
            "verse_id": {"type": "keyword"}, 
            "id": {"type": "integer"},
            "content": {"type": "text"}
        }
    }
}

settings = {
    "settings": {
        "number_of_shards": 3,
        "number_of_replicas": 1  
    }
}


os_man.create_index(name=INDEX_NAME, mapping=mapping, settings=settings)

{'error': {'root_cause': [{'type': 'invalid_index_name_exception',
    'reason': 'Invalid index name [bible], already exists as alias',
    'index': 'bible',
    'index_uuid': '_na_'}],
  'type': 'invalid_index_name_exception',
  'reason': 'Invalid index name [bible], already exists as alias',
  'index': 'bible',
  'index_uuid': '_na_'},
 'status': 400}

In [9]:
def generate_id(verse_id):
    chapter, verse = verse_id.split(':')
    return int(chapter) * 1000 + int(verse)  # Convert chapter:verse into a unique integer ID

documents = [
    {"verse_id": verse_id, "content": content, "id": generate_id(verse_id)}
    for verse_id, content in bible_dict.items()
]


In [10]:
documents

[{'verse_id': '1:1',
  'content': 'The Revelation of Jesus Christ, which God gave unto him, to shew\nunto his servants things which must shortly come to pass; and he sent\nand signified it by his angel unto his servant John:',
  'id': 1001},
 {'verse_id': '1:2',
  'content': 'Who bare\nrecord of the word of God, and of the testimony of Jesus Christ, and\nof all things that he saw.',
  'id': 1002},
 {'verse_id': '1:3',
  'content': 'Blessed is he that readeth, and they that hear the words of this\nprophecy, and keep those things which are written therein: for the\ntime is at hand.',
  'id': 1003},
 {'verse_id': '1:4',
  'content': 'John to the seven churches which are in Asia: Grace be unto you,\nand peace, from him which is, and which was, and which is to come; and\nfrom the seven Spirits which are before his throne;',
  'id': 1004},
 {'verse_id': '1:5',
  'content': 'And from Jesus\nChrist, who is the faithful witness, and the first begotten of the\ndead, and the prince of the kings o

In [11]:
# Insert data into OpenSearch index
response = os_man.add_data_to_index(
    index_name=INDEX_NAME,
    documents=documents,
    id_key=ID_KEY,
    refresh=True  # Set to True for immediate availability, but be cautious in production
)
