In [26]:
import kuzu;
db = kuzu.Database("db")
conn = kuzu.Connection(db)
conn.execute("INSTALL sqlite")
 
conn.execute("LOAD sqlite")
conn.execute("ATTACH './raw_data/ayah.sqlite' as ayah (dbtype sqlite)")

<kuzu.query_result.QueryResult at 0x169e12110>

In [27]:
response = conn.execute("LOAD FROM ayah.verses RETURN *")
print(response.get_as_df())

        id  surah_number  ayah_number verse_key  \
0     2049            17           20     17:20   
1     2050            17           21     17:21   
2     2051            17           22     17:22   
3     2052            17           23     17:23   
4     2053            17           24     17:24   
...    ...           ...          ...       ...   
6231  6232           114            2     114:2   
6232  6233           114            3     114:3   
6233  6234           114            4     114:4   
6234  6235           114            5     114:5   
6235  6236           114            6     114:6   

                                                   text  
0     كُلّٗا نُّمِدُّ هَٰٓؤُلَآءِ وَهَٰٓؤُلَآءِ مِ...  
1     ٱنظُرۡ كَيۡفَ فَضَّلۡنَا بَعۡضَهُمۡ عَلَىٰ بَع...  
2     لَّا تَجۡعَلۡ مَعَ ٱللَّهِ إِلَٰهًا ءَاخَرَ فَ...  
3     ۞ وَقَضَىٰ رَبُّكَ أَلَّا تَعۡبُدُوٓاْ إِلَّآ...  
4     وَٱخۡفِضۡ لَهُمَا جَنَاحَ ٱلذُّلِّ مِنَ ٱلرَّح...  
...                                    

In [28]:
conn.execute("""
CREATE NODE TABLE Verse (
    id INT64,
    surah_number INT64,
    ayah_number INT64,
    verse_key STRING PRIMARY KEY,
    text STRING)
""")

RuntimeError: Binder exception: Verse already exists in catalog.

In [None]:
conn.execute("COPY Verse from ayah.verses")

<kuzu.query_result.QueryResult at 0x127501a50>

In [None]:
result = conn.execute("MATCH (v:Verse) RETURN v.*")
print(result.get_as_df())

      v.id  v.surah_number  v.ayah_number v.verse_key  \
0     6145              99              7        99:7   
1     6146              99              8        99:8   
2     6147             100              1       100:1   
3     6148             100              2       100:2   
4     6149             100              3       100:3   
...    ...             ...            ...         ...   
6231  4092              39             34       39:34   
6232  4093              39             35       39:35   
6233  4094              39             36       39:36   
6234  4095              39             37       39:37   
6235  4096              39             38       39:38   

                                                 v.text  
0     فَمَن يَعۡمَلۡ مِثۡقَالَ ذَرَّةٍ خَيۡرٗا يَرَه...  
1     وَمَن يَعۡمَلۡ مِثۡقَالَ ذَرَّةٖ شَرّٗا يَرَهُۥ ٨  
2                             وَٱلۡعَٰدِيَٰتِ ضَبۡحٗا ١  
3                             فَٱلۡمُورِيَٰتِ قَدۡحٗا ٢  
4                        

In [33]:
conn.execute("ATTACH 'raw_data/topics.sqlite' AS topics (dbtype sqlite)")

<kuzu.query_result.QueryResult at 0x104622f90>

In [None]:
conn.execute("""
CREATE NODE TABLE Topic (
    topic_id INT64 PRIMARY KEY,
    name STRING,
    arabic_name STRING,
    parent_id INT64,
    thematic_parent_id INT64,
    ontology_parent_id INT64,
    description STRING,
    wiki_link STRING,
    thematic INT64,
    ontology INT64,
    ayahs STRING,
    related_topics STRING
)
""")

conn.execute("""
CREATE REL TABLE HAS_TOPIC (
    FROM Verse TO Topic
)
""")

conn.execute("""
CREATE REL TABLE PARENT_TOPIC (
    FROM Topic TO Topic
)
""")

<kuzu.query_result.QueryResult at 0x169ee23d0>

In [None]:
topics_response = conn.execute("LOAD FROM topics.topics RETURN *;")
print(topics_response.get_as_df())

      topic_id                                name   arabic_name  parent_id  \
0            1                               Allah          الله        NaN   
1            2                 Day of Resurrection   يوم القيامة        NaN   
2            3                               Heart           قلب        NaN   
3            4                            Last Day  اليوم الآخر        NaN   
4            5                               Earth           أرض        NaN   
...        ...                                 ...           ...        ...   
2507      2508                     false testimony           NaN        NaN   
2508      2509                               magic           NaN        NaN   
2509      2510                Doors to forgiveness           NaN        NaN   
2510      2511                           wide open           NaN        NaN   
2511      2512  evil deeds changed into good deeds           NaN        NaN   

      thematic_parent_id  ontology_parent_id  \
0  

In [None]:
# conn.execute("DROP TABLE HAS_TOPIC");
# conn.execute("DROP TABLE Topic");

# conn.execute("""
# CREATE REL TABLE Transfer(
#     FROM Verse TO Verse,
#     amount INT
# )
# """)

# conn.execute("DROP TABLE Transfer")

<kuzu.query_result.QueryResult at 0x169e3c210>

In [35]:
conn.execute("COPY Topic FROM topics.topics")



RuntimeError: Copy exception: Found duplicated primary key value 2255, which violates the uniqueness constraint of the primary key column.

In [31]:
parent_result = conn.execute("""
MATCH (child:Topic), (parent:Topic)
WHERE child.parent_id = parent.topic_id
CREATE (child)-[:PARENT_TOPIC]->(parent)
RETURN count(*) AS parent_relationships
""")
print(f"Created {parent_result.get_as_df().iloc[0]['parent_relationships']} regular parent relationships.")


Created 732 regular parent relationships.


In [37]:
conn.execute("MATCH ()-[p:PARENT_TOPIC]->() DELETE p")

<kuzu.query_result.QueryResult at 0x1697fb5d0>

## Step 2: Create Topic-Topic Relationships

Now, let's create parent-child relationships between topics based on the parent_id fields.

In [None]:
# Create parent-child relationships based on parent_id
parent_result = conn.execute("""
MATCH (child:Topic), (parent:Topic)
WHERE child.parent_id = parent.topic_id
CREATE (child)-[:PARENT_TOPIC]->(parent)
RETURN count(*) AS parent_relationships
""")
print(f"Created {parent_result.get_as_df().iloc[0]['parent_relationships']} regular parent relationships.")

# Create thematic parent relationships
thematic_result = conn.execute("""
MATCH (child:Topic), (parent:Topic)
WHERE child.thematic_parent_id = parent.topic_id
CREATE (child)-[:PARENT_TOPIC {type: 'thematic'}]->(parent)
RETURN count(*) AS thematic_parent_relationships
""")
print(f"Created {thematic_result.get_as_df().iloc[0]['thematic_parent_relationships']} thematic parent relationships.")

# Create ontology parent relationships
ontology_result = conn.execute("""
MATCH (child:Topic), (parent:Topic)
WHERE child.ontology_parent_id = parent.topic_id
CREATE (child)-[:PARENT_TOPIC {type: 'ontology'}]->(parent)
RETURN count(*) AS ontology_parent_relationships
""")
print(f"Created {ontology_result.get_as_df().iloc[0]['ontology_parent_relationships']} ontology parent relationships.")

# Count all parent-child relationships
total_result = conn.execute("MATCH ()-[r:PARENT_TOPIC]->() RETURN count(r) AS total_relationships")
print(f"Total: {total_result.get_as_df().iloc[0]['total_relationships']} parent-child relationships between topics.")

## Step 3: Create Verse-Topic Relationships

Now, let's parse the ayahs field and create relationships between verses and topics.

In [None]:
import pandas as pd
import time

# Get all topics with their ayahs field
topics_df = conn.execute("MATCH (t:Topic) RETURN t.topic_id, t.name, t.ayahs").get_as_df()

# Initialize counter for total relationships
total_relationships = 0
start_time = time.time()

# More efficient approach: process topics in batches
batch_size = 50  # Process 50 topics at a time
num_topics = len(topics_df)
num_batches = (num_topics + batch_size - 1) // batch_size  # Ceiling division

print(f"Processing {num_topics} topics in {num_batches} batches...")

for batch_idx in range(num_batches):
    start_idx = batch_idx * batch_size
    end_idx = min(start_idx + batch_size, num_topics)
    batch = topics_df.iloc[start_idx:end_idx]
    
    print(f"Processing batch {batch_idx + 1}/{num_batches} (topics {start_idx}-{end_idx-1})...")
    batch_start_time = time.time()
    
    for _, row in batch.iterrows():
        topic_id = row['t.topic_id']
        topic_name = row['t.name']
        ayahs_str = row['t.ayahs']
        
        # Skip if ayahs is empty or NaN
        if pd.isna(ayahs_str) or not ayahs_str.strip():
            continue
        
        # Split the ayahs string by comma and clean up whitespace
        verse_keys = [key.strip() for key in ayahs_str.split(',') if key.strip()]
        
        if not verse_keys:  # Skip if no valid verse keys
            continue
            
        # Create relationships for all verse keys in one query (more efficient)
        verse_keys_str = str(verse_keys).replace("'", "\"")
        
        query = f"""
        MATCH (t:Topic), (v:Verse)
        WHERE t.topic_id = {topic_id} AND v.verse_key IN {verse_keys_str}
        CREATE (v)-[:HAS_TOPIC]->(t)
        RETURN count(*) AS relationships_created
        """
        
        result = conn.execute(query)
        relationships_created = result.get_as_df().iloc[0]['relationships_created']
        total_relationships += relationships_created
    
    batch_time = time.time() - batch_start_time
    print(f"  Batch completed in {batch_time:.2f} seconds")

total_time = time.time() - start_time
print(f"Created {total_relationships} verse-topic relationships in {total_time:.2f} seconds.")

## Step 4: Verify the Graph

Let's verify that our graph has been properly constructed by running some example queries.

In [None]:
# Count the number of nodes and relationships
print("Graph Statistics:")
verse_count = conn.execute("MATCH (v:Verse) RETURN count(v) AS count").get_as_df().iloc[0]['count']
topic_count = conn.execute("MATCH (t:Topic) RETURN count(t) AS count").get_as_df().iloc[0]['count']
has_topic_count = conn.execute("MATCH ()-[r:HAS_TOPIC]->() RETURN count(r) AS count").get_as_df().iloc[0]['count']
parent_topic_count = conn.execute("MATCH ()-[r:PARENT_TOPIC]->() RETURN count(r) AS count").get_as_df().iloc[0]['count']

print(f"Verses: {verse_count}")
print(f"Topics: {topic_count}")
print(f"HAS_TOPIC relationships: {has_topic_count}")
print(f"PARENT_TOPIC relationships: {parent_topic_count}")

# Example query 1: Find verses related to a specific topic
print("
Example 1: Verses about 'Patience'")
result = conn.execute("""
MATCH (v:Verse)-[:HAS_TOPIC]->(t:Topic)
WHERE t.name = 'Patience'
RETURN v.verse_key, v.text
LIMIT 5
""")
print(result.get_as_df())

# Example query 2: Find topics related to a specific verse
print("
Example 2: Topics for verse 2:255 (Ayatul Kursi)")
result = conn.execute("""
MATCH (v:Verse)-[:HAS_TOPIC]->(t:Topic)
WHERE v.verse_key = '2:255'
RETURN t.name, t.description
""")
print(result.get_as_df())

# Example query 3: Find subtopics of a main topic
print("
Example 3: Subtopics of 'Allah'")
result = conn.execute("""
MATCH (child:Topic)-[:PARENT_TOPIC]->(parent:Topic)
WHERE parent.name = 'Allah'
RETURN child.name
LIMIT 10
""")


print(result.get_as_df())
