### testing with the bible

In [1]:
#reading a text file
with open('bible.txt', 'r', encoding='utf-8') as file:
    text = file.read()

In [7]:
# import re
# import json

# def preprocess_bible_text(text):
#     # Remove the first two lines (CPDV and Catholic Public Domain Version)
#     lines = text.split('\n')[2:]
    
#     structured_text = []
#     current_book = ""
#     current_chapter = ""
    
#     for line in lines:
#         # Use regex to extract book, chapter, verse, and content
#         match = re.match(r'(\w+)\s+(\d+):(\d+)\s+(.+)', line)
#         if match:
#             book, chapter, verse, content = match.groups()
            
#             # If we've encountered a new book, add it to the structure
#             if book != current_book:
#                 structured_text.append({"type": "Book", "name": book, "chapters": []})
#                 current_book = book
            
#             # If we've encountered a new chapter, add it to the current book
#             if chapter != current_chapter:
#                 structured_text[-1]["chapters"].append({"number": int(chapter), "verses": []})
#                 current_chapter = chapter
            
#             # Ensure structured_text has chapters before accessing
#             if structured_text and structured_text[-1]["chapters"]:
#                 structured_text[-1]["chapters"][-1]["verses"].append({
#                     "number": int(verse),
#                     "text": content
#                 })
    
#     return structured_text

# def split_structured_text(structured_text):
#     chunks = []
#     current_chunk = []
#     verse_count = 0

#     for book in structured_text:
#         for chapter in book["chapters"]:
#             for verse in chapter["verses"]:
#                 current_chunk.append({
#                     "book": book["name"],
#                     "chapter": chapter["number"],
#                     "verse": verse["number"],
#                     "text": verse["text"]
#                 })
#                 chunks.append(current_chunk)
#                 current_chunk = []

#     if current_chunk:
#         chunks.append(current_chunk)

#     return chunks

# # Preprocess the Bible text
# preprocessed_text = preprocess_bible_text(text)

# #write to json
# with open('bible_structured.json', 'w') as f:
#     json.dump(preprocessed_text, f)

# #read from json
# with open('bible_structured.json', 'r') as f:
#     preprocessed_text = json.load(f)

In [8]:
import os
import re
from langchain_core.documents import Document
from langchain_experimental.graph_transformers import LLMGraphTransformer
from langchain_groq import ChatGroq
from langchain_core.rate_limiters import InMemoryRateLimiter
from langchain_community.graphs import Neo4jGraph
from langchain_core.prompts import ChatPromptTemplate
from langchain_community.tools import WikipediaQueryRun
from langchain_community.utilities import WikipediaAPIWrapper
from langchain.agents import initialize_agent, Tool, AgentType
from langchain.memory import ConversationBufferMemory
from langchain_community.utilities import SerpAPIWrapper

os.environ["NEO4J_URI"] = "bolt://localhost:7687"
os.environ["NEO4J_USERNAME"]= "gds"
os.environ["NEO4J_PASSWORD"] = "password"

graph = Neo4jGraph()

os.environ["GROQ_API_KEY"] = "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"

rate_limiter = InMemoryRateLimiter(
    requests_per_second=1,
    max_bucket_size=10687,
)

llm = ChatGroq(
    temperature=0.25, 
    model_name="llama3-groq-70b-8192-tool-use-preview",
    rate_limiter=rate_limiter
)

allowed_nodes = [
    "Person", "Concept", "Place", "Event"

]

allowed_relationships = [
    "MENTIONS",
    "SPEAKS_TO",
    "IS_LOCATED_IN",
    "HAPPENS_WITH",
    "HAPPENS_AT"

]

def create_custom_prompt(verse):
    return ChatPromptTemplate.from_template(f"""
You are a biblical scholar tasked with extracting detailed information from the Bible to create a comprehensive knowledge graph.
Given the following structured Bible text, please identify and extract the following elements:

1. People mentioned
2. Places mentioned
3. Events described
4. Key concepts or themes
5. Important artifacts or objects
6. Ethnic Groups or collective entities
8. Relationships between these elements

Structured Bible text:
{verse}

For each element, provide the following information:
- Node type (Person, Place, Event, Concept, Book, Chapter, Verse, Artifact, Ethnic Group)
- Specific subtype if applicable
- Relationships to other nodes, using the expanded relationship types
""")

llm_transformer = LLMGraphTransformer(
    llm=llm,
    allowed_nodes=allowed_nodes,
    allowed_relationships=allowed_relationships,
    strict_mode=True,
    node_properties = True,
    relationship_properties = True,
    ignore_tool_usage = False
)

def preprocess_bible_text(text):
    lines = text.split('\n')[2:]
    structured_text = []
    current_book = ""
    current_chapter = ""
    
    for line in lines:
        match = re.match(r'(\w+)\s+(\d+):(\d+)\s+(.+)', line)
        if match:
            book, chapter, verse, content = match.groups()
            if book != current_book:
                structured_text.append({"type": "Book", "name": book, "chapters": []})
                current_book = book
            if chapter != current_chapter:
                structured_text[-1]["chapters"].append({"number": int(chapter), "verses": []})
                current_chapter = chapter
            if structured_text and structured_text[-1]["chapters"]:
                structured_text[-1]["chapters"][-1]["verses"].append({
                    "number": int(verse),
                    "text": content
                })
    return structured_text

preprocessed_text = preprocess_bible_text(text)

selected_books = ["Matthew", "Mark", "Luke", "John"]
filtered_books = []

for book in preprocessed_text:
    if book["name"] in selected_books:
        filtered_books.append(book)

verses = []
for book in filtered_books:
    for keys in book.keys():
        if keys == 'chapters':
            book_name = book['name']
            for chapter in book[keys]:
                chapter_name = chapter['number']
                verses_in_chapter = []
                for verse in chapter['verses']:
                    verses_in_chapter.append(verse['text'])
                verses.append(verses_in_chapter)

paste_verses = []
for verse in verses:
    verse = ' '.join(verse)
    paste_verses.append(verse)

api_wrapper = WikipediaAPIWrapper(top_k_results=1, doc_content_chars_max=100)
wikipedia_tool = WikipediaQueryRun(api_wrapper=api_wrapper)



# Initialize SerpAPI
serpapi = SerpAPIWrapper(serpapi_api_key='f557ae61841ea5fcc1aba4df302b7c7808530e57')

tools = [
    Tool(
        name="Search",
        func=serpapi.run,
        description="Useful for when you need to answer questions about current events or the world. Ask specific questions."
    ),
    Tool(
        name="Wikipedia",
        func=wikipedia_tool.run,
        description="Useful for when you need detailed information about a topic. Ask specific questions."
    )
]

memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)

agent = initialize_agent(
    tools,
    llm,
    agent=AgentType.CHAT_CONVERSATIONAL_REACT_DESCRIPTION,
    verbose=True,
    memory=memory,
    handle_parsing_errors=True
)

import time
for verse in paste_verses:
    time.sleep(1)
    custom_prompt = create_custom_prompt(verse)
    llm_transformer.prompt = custom_prompt
    document = Document(page_content=verse)
    document_list = [document]
    
    graph_docs = llm_transformer.convert_to_graph_documents(document_list)
    
    try:
        for graph_doc in graph_docs:

                # Process nodes
            for node in graph_doc.nodes:
                # Add node to the graph
                graph.query(
                    f"MERGE (n:{node.type} {{id: $id}}) "
                    f"SET n += $properties",
                    {"id": node.id, "properties": node.properties or {}}
                )
            
            # Process relationships
            for rel in graph_doc.relationships:
                # Add relationship to the graph
                graph.query(
                    f"MATCH (a {{id: $source_id}}), (b {{id: $target_id}}) "
                    f"MERGE (a)-[r:{rel.type}]->(b) "
                    f"SET r += $properties",
                    {
                        "source_id": rel.source.id,
                        "target_id": rel.target.id,
                        "properties": rel.properties or {}
                    }
                )
            print(f"Added graph document to Neo4j: {len(graph_doc.nodes)} nodes, {len(graph_doc.relationships)} relationships")

    except Exception as e:
        print(f"An error occurred: {e}")
        continue

  rate_limiter = InMemoryRateLimiter(
  agent = initialize_agent(


Added graph document to Neo4j: 42 nodes, 0 relationships
Error processing document page_content='And so, when Jesus had been born in Bethlehem of Judah, in the days of king Herod, behold, Magi from the east arrived in Jerusalem, saying: “Where is he who was born king of the Jews? For we have seen his star in the east, and we have come to adore him.” Now king Herod, hearing this, was disturbed, and all Jerusalem with him. And gathering together all the leaders of the priests, and the scribes of the people, he consulted with them as to where the Christ would be born. And they said to him: “In Bethlehem of Judea. For so it has been written by the prophet: ‘And you, Bethlehem, the land of Judah, are by no means least among the leaders of Judah. For from you shall go forth the ruler who shall guide my people Israel.’ ” Then Herod, quietly calling the Magi, diligently learned from them the time when the star appeared to them. And sending them into Bethlehem, he said: “Go and diligently ask q

: 