In [6]:
import streamlit as st
st.write(st.__version__)

2024-09-26 14:51:27.480 
  command:

    streamlit run c:\Users\makos2tamas911\Documents\dev\mm\graphrag_langchain\virtual\Lib\site-packages\ipykernel_launcher.py [ARGUMENTS]


In [1]:
import streamlit as st
from langchain_community.graphs import Neo4jGraph


NEO4J_URI = st.secrets["NEO4J_URI"]
NEO4J_USERNAME = st.secrets["NEO4J_USERNAME"]
NEO4J_PASSWORD = st.secrets["NEO4J_PASSWORD"]
NEO4J_DATABASE = st.secrets["NEO4J_DATABASE"]

graph = Neo4jGraph(
    url=NEO4J_URI,
    username=NEO4J_USERNAME,
    password=NEO4J_PASSWORD,
    database=NEO4J_DATABASE
)

In [2]:
from transformers import pipeline
import transformers
transformers.logging.set_verbosity_error()

def perform_ner(text):
    pipe = pipeline("token-classification", model="dslim/bert-base-NER")
    results = pipe(text)

    entities = []
    for entity in results:
        entities.append({
            "entity": entity["entity"],
            "word": entity["word"],
        })

    return entities

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
from sentence_transformers import SentenceTransformer

def embed_verse(text):
    model = SentenceTransformer('all-MiniLM-L6-v2')
    embeddings = model.encode(text)
    return embeddings.tolist()

In [5]:
import re
import json

def preprocess_bible_text(text):
    lines = text.split('\n')
    structured_bible = []

    for line in lines:
        match = re.match(r'(\w+)\s+(\d+):(\d+)\s+(.+)', line)
        if match:
            book, chapter, verse, content = match.groups()
            chapter = int(chapter)
            verse = int(verse)
            selected_books = ["Matthew", "Mark", "Luke", "John"]
            if book in selected_books:
                verse_entities = perform_ner(content)
                verse_embeddings = embed_verse(content)

                verse_data = {
                    "book": book,
                    "chapter": chapter,
                    "verse": verse,
                    "text": content,
                    "entities": [{"entity": e["entity"], "word": e["word"]} for e in verse_entities],
                    "embeddings": verse_embeddings
                }
                structured_bible.append(verse_data) 

                # write to json
                with open('structured_bible.json', 'w') as file:
                    json.dump(structured_bible, file)
            else:
                continue
    return structured_bible

def write_to_neo4j(structured_bible):
    # Clear existing data
    graph.query("MATCH (n) DETACH DELETE n")

    for verse_data in structured_bible:
        book = verse_data['book']
        chapter = verse_data['chapter']
        verse = verse_data['verse']
        text = verse_data['text']
        embeddings = verse_data['embeddings']
        entities = verse_data['entities']

        # Create or merge Book node
        graph.query("""
            MERGE (b:Book {name: $book})
        """, {"book": book})

        # Create or merge Chapter node and link to Book
        graph.query("""
            MATCH (b:Book {name: $book})
            MERGE (c:Chapter {number: $chapter, book: $book})
            MERGE (c)-[:IS_IN_BOOK]->(b)
        """, {"book": book, "chapter": chapter})

        # Create Verse node with properties and link to Chapter
        graph.query("""
            MATCH (c:Chapter {number: $chapter, book: $book})
            CREATE (v:Verse {
                name: $verse,
                chapter: $chapter,
                book: $book,
                text: $text,
                embeddings: $embeddings,
                caption: toString($verse)
            })
            CREATE (v)-[:IS_IN_CHAPTER]->(c)
        """, {"book": book, "chapter": chapter, "verse": verse, "text": text, "embeddings": embeddings})

        # Create Entity nodes and link to Verse
        entity_params = [
            {
                'book': book,
                'chapter': chapter,
                'verse': verse,
                'word': entity['word'],
                'entity_type': entity['entity']
            }
            for entity in entities
        ]

        graph.query("""
            UNWIND $entities AS entity
            MATCH (v:Verse {name: entity.verse, chapter: entity.chapter, book: entity.book})
            MERGE (e:Entity {
                name: entity.word, 
                type: entity.entity_type,
                caption: entity.word
            })
            CREATE (v)-[:APPEARS_IN]->(e)
        """, {"entities": entity_params})

    print("Data written to Neo4j")


# Main process
def process_and_store_bible():
    # Load the Bible text
    #reading a text file
    with open(r'C:\Users\makos2tamas911\Documents\dev\mm\graphrag_langchain\knowledge_graph_generation_experiment\bible.txt', 'r', encoding='ascii', errors='ignore') as file:
        text = file.read()

    # Preprocess the Bible text
    structured_bible = preprocess_bible_text(text)

    # Write the processed data to Neo4j
    write_to_neo4j(structured_bible)

    print("Bible text has been processed and stored in Neo4j.")

# Run the main process
if __name__ == "__main__":
    process_and_store_bible()

Failed to write data to connection IPv4Address(('3.219.247.88', 7687)) (ResolvedIPv4Address(('3.219.247.88', 7687)))
Failed to read from defunct connection IPv4Address(('3.219.247.88', 7687)) (ResolvedIPv4Address(('3.219.247.88', 7687)))


ServiceUnavailable: Failed to read from defunct connection IPv4Address(('3.219.247.88', 7687)) (ResolvedIPv4Address(('3.219.247.88', 7687)))