<a href="https://colab.research.google.com/github/mahalingamagesthian/hybrid-search-demo/blob/main/CreateFlowChart.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
pip install graphviz



In [2]:
import graphviz

# Create a directed graph
dot_ingest = graphviz.Digraph(comment='Ingestion Flowchart for Couchbase Vector Store', format='png')
dot_ingest.attr(rankdir='TB', size='10,8', overlap='false', splines='ortho') # TB = Top to Bottom

# Define nodes with shapes
dot_ingest.node('A', 'Start', shape='Mdiamond', style='filled', fillcolor='lightgreen')
dot_ingest.node('B', 'Load Environment Variables', shape='box')
dot_ingest.node('C', 'Connect to Couchbase\n(Cluster, Bucket, Scope, Collection)', shape='box')
dot_ingest.node('D', 'Load Documents\n(e.g., ArXivLoader)', shape='box')
dot_ingest.node('E', 'Split Documents\ninto Chunks', shape='box')
dot_ingest.node('F', 'Initialize Embedding Model\n(e.g., OpenAIEmbeddings)', shape='box')
dot_ingest.node('G', 'Iterate through Chunks', shape='ellipse')
dot_ingest.node('H', 'Generate Vector Embedding\nfor Chunk Text', shape='box')
dot_ingest.node('I', 'Store Chunk & Embedding\nin Couchbase Collection', shape='box')
dot_ingest.node('J', 'Ingestion Complete!', shape='box', style='filled', fillcolor='lightblue')
dot_ingest.node('K', 'Close Couchbase Connection', shape='box')
dot_ingest.node('L', 'End', shape='Msquare', style='filled', fillcolor='lightcoral')

# Define edges (transitions)
dot_ingest.edge('A', 'B')
dot_ingest.edge('B', 'C')
dot_ingest.edge('C', 'D')
dot_ingest.edge('D', 'E')
dot_ingest.edge('E', 'F')
dot_ingest.edge('F', 'G')
dot_ingest.edge('G', 'H', label='For each chunk')
dot_ingest.edge('H', 'I')
dot_ingest.edge('I', 'G', label='More chunks?') # Loop back for more chunks
dot_ingest.edge('G', 'J', label='All chunks processed') # Exit loop
dot_ingest.edge('J', 'K')
dot_ingest.edge('K', 'L')

# Render the graph to a file
dot_ingest.render('ingest_flowchart', view=False) # view=True opens the file automatically
print("Ingest flowchart saved as ingest_flowchart.png")



Ingest flowchart saved as ingest_flowchart.png


In [4]:
import graphviz

# Create a directed graph
dot_search = graphviz.Digraph(comment='Hybrid Search Flowchart for Couchbase Vector Store', format='png')
dot_search.attr(rankdir='TB', size='10,8', overlap='false', splines='ortho') # LR = Left to Right

# Define nodes with shapes
dot_search.node('A', 'Start', shape='Mdiamond', style='filled', fillcolor='lightgreen')
dot_search.node('B', 'Load Environment Variables', shape='box')
dot_search.node('C', 'Connect to Couchbase\n(Cluster, Bucket, Scope, Collection)', shape='box')
dot_search.node('D', 'Initialize Embedding Model\n(e.g., OpenAIEmbeddings)', shape='box')
dot_search.node('E', 'Initialize CouchbaseVectorStore\nfor Search', shape='box')
dot_search.node('F', 'Receive User Query', shape='parallelogram', style='filled', fillcolor='lightblue')

# Define a subgraph for the Hybrid Search Process
with dot_search.subgraph(name='cluster_hybrid_search') as c:
    c.attr(label='Hybrid Search Process', style='filled', fillcolor='lightgray')
    c.attr(rankdir='LR') # Ensure internal subgraph flow is also LR

    c.node('G1', 'Embed User Query', shape='box')
    c.node('H1', 'Perform Vector Similarity Search\n(using Vector Index)', shape='box')
    c.node('I1', 'Get Top K Semantic Results', shape='cylinder')

    c.node('G2', 'Use User Query Text', shape='box')
    c.node('H2', 'Perform Keyword Full-Text Search\n(using FTS Index)', shape='box')
    c.node('I2', 'Get Top K Keyword Results', shape='cylinder')

    c.edge('G1', 'H1')
    c.edge('H1', 'I1')
    c.edge('G2', 'H2')
    c.edge('H2', 'I2')

dot_search.node('J', 'Combine/Present Results', shape='box', style='filled', fillcolor='lightblue')
dot_search.node('K', 'Close Couchbase Connection', shape='box')
dot_search.node('L', 'End', shape='Msquare', style='filled', fillcolor='lightcoral')

# Define edges for the main graph
dot_search.edge('A', 'B')
dot_search.edge('B', 'C')
dot_search.edge('C', 'D')
dot_search.edge('D', 'E')
dot_search.edge('E', 'F')

# Connect the query to the hybrid search branches
dot_search.edge('F', 'G1', label='For vector search')
dot_search.edge('F', 'G2', label='For keyword search')

# Connect results back to combining step
dot_search.edge('I1', 'J')
dot_search.edge('I2', 'J')

dot_search.edge('J', 'K')
dot_search.edge('K', 'L')

# Render the graph to a file
dot_search.render('hybrid_search_flowchart', view=False) # view=True opens the file automatically
print("Hybrid search flowchart saved as hybrid_search_flowchart.png")



Hybrid search flowchart saved as hybrid_search_flowchart.png
