In [3]:
import os
import webbrowser
import ast
from dotenv import load_dotenv
from langchain_experimental.graph_transformers import LLMGraphTransformer
from langchain.graphs import Neo4jGraph
from langchain_neo4j import GraphCypherQAChain, Neo4jGraph
from langchain_core.documents import Document
from langchain.prompts import PromptTemplate
from langchain.callbacks.base import BaseCallbackHandler
from langchain_core.prompts import ChatPromptTemplate
from langchain_anthropic import ChatAnthropic
from langchain_openai import ChatOpenAI
from langchain.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from pyvis.network import Network

In [4]:
# Load the .env file
load_dotenv()
# Get API key from environment variable 
api_key = os.getenv("ANTHROPIC_API_KEY")

if not api_key:
    print("Warning: ANTHROPIC_API_KEY not found in environment variables")
    print("Please add ANTHROPIC_API_KEY=your_api_key_here to your .env file")
else:
    print("Anthropic API key loaded successfully")

Anthropic API key loaded successfully


### LLM Setup

In [5]:
# Use Claude 3.5 Sonnet with increased max_tokens
llm = ChatAnthropic(
    model="claude-3-5-sonnet-20241022",
    temperature=0.1,  # Very low temperature for consistent extraction
    max_tokens=8192,  # Increased max_tokens to avoid truncation
    anthropic_api_key=api_key
)

llm_openai = ChatOpenAI(
    openai_api_base='https://models.github.ai/inference',
    openai_api_key=os.getenv("GITHUB_API_KEY"),
    model="openai/gpt-4.1",
    temperature=0.4,  
    #max_tokens=8192  # Increased max_tokens to avoid truncation
)

### Graph transformer Setup

In [None]:
# Create graph transformer with the fixed LLM
graph_transformer = LLMGraphTransformer(
    llm=llm,
    node_properties=False,  # Disable to reduce token usage
    relationship_properties=False  # Disable to reduce token usage
)

print(f"✓ Claude LLM initialized with max_tokens=8192")
print(f"✓ Graph transformer created")

### Data Input

Sample-test

In [None]:
text = """
Albert Einstein[a] (14 March 1879 – 18 April 1955) was a German-born theoretical physicist who is best known for developing the theory of relativity. Einstein also made important contributions to quantum mechanics.[1][5] His mass–energy equivalence formula E = mc2, which arises from special relativity, has been called "the world's most famous equation".[6] He received the 1921 Nobel Prize in Physics for his services to theoretical physics, and especially for his discovery of the law of the photoelectric effect.[7]

Born in the German Empire, Einstein moved to Switzerland in 1895, forsaking his German citizenship (as a subject of the Kingdom of Württemberg)[note 1] the following year. In 1897, at the age of seventeen, he enrolled in the mathematics and physics teaching diploma program at the Swiss federal polytechnic school in Zurich, graduating in 1900. He acquired Swiss citizenship a year later, which he kept for the rest of his life, and afterwards secured a permanent position at the Swiss Patent Office in Bern. In 1905, he submitted a successful PhD dissertation to the University of Zurich. In 1914, he moved to Berlin to join the Prussian Academy of Sciences and the Humboldt University of Berlin, becoming director of the Kaiser Wilhelm Institute for Physics in 1917; he also became a German citizen again, this time as a subject of the Kingdom of Prussia.[note 1] In 1933, while Einstein was visiting the United States, Adolf Hitler came to power in Germany. 
Horrified by the Nazi persecution of his fellow Jews,[8] he decided to remain in the US, and was granted American citizenship in 1940.[9] On the eve of World War II, he endorsed a letter to President Franklin D. Roosevelt alerting him to the potential German nuclear weapons program and recommending that the US begin similar research.

In 1905, sometimes described as his annus mirabilis (miracle year), he published four groundbreaking papers.[10] In them, he outlined a theory of the photoelectric effect, explained Brownian motion, introduced his special theory of relativity, and demonstrated that if the special theory is correct, mass and energy are equivalent to each other. In 1915, he proposed a general theory of relativity that extended his system of mechanics to incorporate gravitation. A cosmological paper that he published the following year laid out the implications of general relativity for the modeling of the structure and evolution of the universe as a whole.[11][12] In 1917, Einstein wrote a paper which introduced the concepts of spontaneous emission and stimulated emission, the latter of which is the core mechanism behind the laser and maser, and which contained a trove of information that would be beneficial to developments in physics later on, such as quantum electrodynamics and quantum optics.[13]

In the middle part of his career, Einstein made important contributions to statistical mechanics and quantum theory. Especially notable was his work on the quantum physics of radiation, in which light consists of particles, subsequently called photons. With physicist Satyendra Nath Bose, he laid the groundwork for Bose–Einstein statistics. For much of the last phase of his academic life, Einstein worked on two endeavors that ultimately proved unsuccessful. First, he advocated against quantum theory's introduction of fundamental randomness into science's picture of the world, objecting that God does not play dice.[14] Second, he attempted to devise a unified field theory by generalizing his geometric theory of gravitation to include electromagnetism. As a result, he became increasingly isolated from mainstream modern physics.
"""

Actual-use

In [54]:
# File uploader
uploaded_file = r'/Users/kathisnehith/Downloads/prompt_engineer_sample_book.pdf'
        # Load and split the PDF
loader = PyPDFLoader(uploaded_file)
pages = loader.load_and_split()

text_splitter = RecursiveCharacterTextSplitter(chunk_size=2200, chunk_overlap=40)
docs = text_splitter.split_documents(pages)

lc_docs = []
for i, doc in enumerate(docs):
    # Get page number from metadata, default to 0 if not available
    page_number = doc.metadata.get('page', 0)
    
    # Create document with cleaned content and preserved metadata
    lc_docs.append(Document(
        page_content=doc.page_content.replace("\n", ""), 
        metadata={'page': page_number, 'source': uploaded_file}
    ))
    
    # Print progress and chunk info
    print(f"Chunk {i+1}/{len(docs)} processed - Page {page_number}")
    print(f"Content: {lc_docs[-1].page_content[:100]}...")
    print("-" * 50)

Chunk 1/69 processed - Page 0
Content: Prompt  EngineeringAuthor: Lee Boonstra...
--------------------------------------------------
Chunk 2/69 processed - Page 1
Content: Prompt EngineeringFebruary 20252AcknowledgementsContent contributorsMichael ShermanYuan CaoErick Arm...
--------------------------------------------------
Chunk 3/69 processed - Page 2
Content: Introduction 6Prompt engineering 7LLM output configuration 8Output length 8Sampling controls 9Temper...
--------------------------------------------------
Chunk 4/69 processed - Page 3
Content: Step-back prompting 25Chain of Thought (CoT) 29Self-consistency 32Tree of Thoughts (ToT) 36ReAct (re...
--------------------------------------------------
Chunk 5/69 processed - Page 4
Content: JSON Repair 61Working with Schemas 62Experiment together with other prompt engineers 63CoT Best prac...
--------------------------------------------------
Chunk 6/69 processed - Page 5
Content: Prompt EngineeringFebruary 20256IntroductionWhen thi

Sample-test-transfroamtion

In [154]:
documents = [Document(page_content=text)]
graph_documents =await graph_transformer.aconvert_to_graph_documents(documents)

Actual-Graphtransformation

In [47]:
graph_documents_lc = graph_transformer.convert_to_graph_documents(lc_docs)

In [16]:
print(lc_docs)



In [128]:
# nodes and relationships extracted from the second document chunk
print(f"Nodes:{graph_documents_lc[1].nodes}")
print(f"Relationships:{graph_documents_lc[1].relationships}")

Nodes:[Node(id='Michael Sherman', type='Person', properties={}), Node(id='Yuan Cao', type='Person', properties={}), Node(id='Erick Armbrust', type='Person', properties={}), Node(id='Anant Nawalgaria', type='Person', properties={}), Node(id='Antonio Gulli', type='Person', properties={}), Node(id='Simone Cammel', type='Person', properties={}), Node(id='Grace Mollison', type='Person', properties={}), Node(id='Joey Haymaker', type='Person', properties={}), Node(id='Michael Lanning', type='Person', properties={}), Node(id='Prompt Engineering', type='Document', properties={})]
Relationships:[Relationship(source=Node(id='Michael Sherman', type='Person', properties={}), target=Node(id='Prompt Engineering', type='Document', properties={}), type='CONTRIBUTOR', properties={}), Relationship(source=Node(id='Yuan Cao', type='Person', properties={}), target=Node(id='Prompt Engineering', type='Document', properties={}), type='CONTRIBUTOR', properties={}), Relationship(source=Node(id='Erick Armbrust', 

### Graph extration to Neo4jaura(DB)storage

In [6]:
graph = Neo4jGraph(url=os.getenv("NEO4J_URI"), 
                username=os.getenv("NEO4J_USERNAME"), 
                password=os.getenv("NEO4J_PASSWORD"),
                enhanced_schema=True)

In [None]:
# cypher is neo4j query language similar to SQL but for graph databases.
# Cypher query to clear the graph database
cypher = """ MATCH (n)
DETACH DELETE n;
                """
graph.query(cypher)

[]

In [None]:
# add the graph documents to the Neo4j graph
print("Adding graph documents to Neo4j...")
graph.add_graph_documents(graph_documents_lc, include_source=True)

In [5]:
# Get the schema of the graph
schema = graph.get_schema
print("Graph schema: \n", schema)


Graph schema: 
 Node properties:
- **Document**
  - `id`: STRING Example: "52852b3fe189de1d135bea41e887857e"
  - `text`: STRING Example: "Prompt  EngineeringAuthor: Lee Boonstra"
  - `page`: INTEGER Min: 0, Max: 67
  - `source`: STRING Available options: ['/Users/kathisnehith/Downloads/22365_3_Prompt Engin']
- **Person**
  - `id`: STRING Example: "Lee Boonstra"
- **Book**
  - `id`: STRING Available options: ['Prompt Engineering']
- **Topic**
  - `id`: STRING Example: "Introduction"
- **Technique**
  - `id`: STRING Example: "Step-Back Prompting"
- **Concept**
  - `id`: STRING Example: "Best Practices"
- **Practice**
  - `id`: STRING Available options: ['Provide Examples', 'Design With Simplicity', 'Be Specific About Output', 'Use Instructions Over Constraints', 'Control Max Token Length', 'Use Variables In Prompts', 'Experiment With Input Formats', 'Mix Up Classes In Few-Shot Prompting', 'Adapt To Model Updates', 'Experiment With Output Formats']
- **Profession**
  - `id`: STRING Availa

### viz nodes from Neo(DB)

In [None]:
from pyvis.network import Network
import json
from IPython.display import HTML, display
import os

def visualize_neo4j_graph_fixed(graph, output_file="neo4j_graph.html", height="600px", width="100%"):
    """
    Fixed Neo4j visualization with proper error handling
    """
    
    try:
        print("🔄 Fetching nodes from Neo4j...")
        
        # Get all nodes with their labels and properties
        nodes_query = """
        MATCH (n)
        RETURN 
            id(n) as node_id,
            labels(n) as labels,
            properties(n) as properties
        LIMIT 200
        """
        
        nodes_result = graph.query(nodes_query)
        print(f"✅ Found {len(nodes_result)} nodes")
        
        if not nodes_result:
            print("❌ No nodes found in the database")
            return None
        
        # Create PyVis network with simpler configuration
        net = Network(
            height=height, 
            width=width, 
            bgcolor="#1a1a1a", 
            font_color="white",
            directed=True
        )
        
        # Simplified physics configuration
        net.barnes_hut()
        
        # Color mapping for different node types
        color_map = {
            'Person': '#FF6B6B',
            'Concept': '#4ECDC4', 
            'Technology': '#45B7D1',
            'Organization': '#96CEB4',
            'Publication': '#FFEAA7',
            'Method': '#DDA0DD',
            'Framework': '#98D8C8',
            'Research': '#FF8C94',
            'Document': '#A8E6CF',
            'default': '#CCCCCC'
        }
        
        # Add nodes to PyVis
        added_nodes = set()
        for node in nodes_result:
            node_id = str(node['node_id'])
            labels = node['labels']
            properties = node['properties']
            
            # Get primary label (first label)
            primary_label = labels[0] if labels else 'Unknown'
            
            # Get node title (use 'id' property if available)
            title = properties.get('id', f"Node {node_id}")
            
            # Choose color based on label
            color = color_map.get(primary_label, color_map['default'])
            
            # Add node
            net.add_node(
                node_id,
                label=title[:20] + "..." if len(title) > 20 else title,  # Truncate long labels
                title=f"Type: {primary_label}\nID: {title}",
                color=color,
                size=25
            )
            added_nodes.add(node_id)
        
        print("🔄 Fetching relationships from Neo4j...")
        
        # Get relationships
        relationships_query = """
        MATCH (a)-[r]->(b)
        RETURN 
            id(a) as source_id,
            id(b) as target_id,
            type(r) as relationship_type
        LIMIT 300
        """
        
        relationships_result = graph.query(relationships_query)
        print(f"✅ Found {len(relationships_result)} relationships")
        
        # Add relationships to PyVis
        for rel in relationships_result:
            source = str(rel['source_id'])
            target = str(rel['target_id'])
            rel_type = rel['relationship_type']
            
            # Only add edge if both nodes exist
            if source in added_nodes and target in added_nodes:
                net.add_edge(
                    source, 
                    target, 
                    label=rel_type,
                    color='#666666',
                    width=2
                )
        
        print(f"🎨 Generating visualization...")
        
        # Use the safer write_html method directly
        try:
            # Generate HTML content manually if needed
            net.write_html(output_file, open_browser=False, notebook=False)
            
            print(f"✅ Visualization saved as '{output_file}'")
            print(f"📊 Graph Statistics:")
            print(f"   • Nodes: {len(nodes_result)}")
            print(f"   • Relationships: {len(relationships_result)}")
            
            # Return HTML for display
            if os.path.exists(output_file):
                return HTML(filename=output_file)
            else:
                print("❌ HTML file was not created successfully")
                return None
                
        except Exception as e:
            print(f"❌ Error generating HTML: {e}")
            return None
            
    except Exception as e:
        print(f"❌ Error in visualization: {e}")
        return None

# Create the fixed visualization
visualization = visualize_neo4j_graph_fixed(graph, "my_neo4j_graph.html")
if visualization:
    display(visualization)
else:
    print("❌ Visualization failed, trying alternative method...")

In [119]:
from pyvis.network import Network

def visualize_graph(graph_documents):

    # Create network
    net = Network(height="1200px", width="100%", directed=True,
                      notebook=False, bgcolor="#222222", font_color="white")
    
    nodes = graph_documents[0].nodes
    relationships = graph_documents[0].relationships

    # Build lookup for valid nodes
    node_dict = {node.id: node for node in nodes}
    
    # Filter out invalid edges and collect valid node IDs
    valid_edges = []
    valid_node_ids = set()
    for rel in relationships:
        if rel.source.id in node_dict and rel.target.id in node_dict:
            valid_edges.append(rel)
            valid_node_ids.update([rel.source.id, rel.target.id])


    # Track which nodes are part of any relationship
    connected_node_ids = set()
    for rel in relationships:
        connected_node_ids.add(rel.source.id)
        connected_node_ids.add(rel.target.id)

    # Add valid nodes
    for node_id in valid_node_ids:
        node = node_dict[node_id]
        try:
            net.add_node(node.id, label=node.id, title=node.type, group=node.type)
        except:
            continue  # skip if error

    # Add valid edges
    for rel in valid_edges:
        try:
            net.add_edge(rel.source.id, rel.target.id, label=rel.type.lower())
        except:
            continue  # skip if error

    # Configure physics
    net.set_options("""
            {
                "physics": {
                    "forceAtlas2Based": {
                        "gravitationalConstant": -100,
                        "centralGravity": 0.01,
                        "springLength": 200,
                        "springConstant": 0.08
                    },
                    "minVelocity": 0.75,
                    "solver": "forceAtlas2Based"
                }
            }
            """)
        
    output_file = "knowledge_graph.html"
    net.save_graph(output_file)
    print(f"Graph saved to {os.path.abspath(output_file)}")

    # Try to open in browser
    try:
        import webbrowser
        webbrowser.open(f"file://{os.path.abspath(output_file)}")
    except:
        print("Could not open browser automatically")
        
# Run the function
visualize_graph(graph_documents_lc)

Graph saved to /Users/kathisnehith/Desktop/Med-GraphRAG-project/knowledge_graph.html


WORKING-VIZUALIZATION

In [122]:
from pyvis.network import Network
import os

def visualize_neo4j_graph(graph, max_nodes=100, max_relationships=200):
    """
    Visualize Neo4j database using PyVis (similar to your existing function structure)
    """
    
    print("🔄 Fetching data from Neo4j...")
    
    # Get nodes from Neo4j
    nodes_query = f"""
    MATCH (n)
    RETURN 
        id(n) as node_id,
        labels(n) as labels,
        properties(n) as properties
    LIMIT {max_nodes}
    """
    
    # Get relationships from Neo4j
    relationships_query = f"""
    MATCH (a)-[r]->(b)
    RETURN 
        id(a) as source_id,
        id(b) as target_id,
        type(r) as relationship_type,
        properties(r) as rel_properties
    LIMIT {max_relationships}
    """
    
    try:
        nodes_result = graph.query(nodes_query)
        relationships_result = graph.query(relationships_query)
        
        print(f"✅ Retrieved {len(nodes_result)} nodes and {len(relationships_result)} relationships")
        
        if not nodes_result:
            print("❌ No nodes found in database")
            return
        
        # Create network (using your same configuration)
        net = Network(height="1200px", width="100%", directed=True,
                      notebook=False, bgcolor="#222222", font_color="white")
        
        # Build lookup for valid nodes (similar to your approach)
        node_dict = {}
        for node_data in nodes_result:
            node_id = str(node_data['node_id'])
            labels = node_data['labels']
            properties = node_data['properties']
            
            # Create a node object-like structure
            node_dict[node_id] = {
                'id': properties.get('id', f"Node_{node_id}"),
                'type': labels[0] if labels else 'Unknown',
                'neo4j_id': node_id,
                'properties': properties
            }
        
        # Filter out invalid edges and collect valid node IDs (your same logic)
        valid_edges = []
        valid_node_ids = set()
        
        for rel_data in relationships_result:
            source_id = str(rel_data['source_id'])
            target_id = str(rel_data['target_id'])
            
            if source_id in node_dict and target_id in node_dict:
                valid_edges.append({
                    'source_id': source_id,
                    'target_id': target_id,
                    'type': rel_data['relationship_type'],
                    'properties': rel_data.get('rel_properties', {})
                })
                valid_node_ids.update([source_id, target_id])
        
        print(f"📊 Valid edges: {len(valid_edges)}, Valid nodes: {len(valid_node_ids)}")
        
        # Add valid nodes (using your same approach)
        for node_id in valid_node_ids:
            node = node_dict[node_id]
            try:
                # Color mapping for different node types
                color_map = {
                    'Concept': '#4ECDC4',
                    'Person': '#FF6B6B', 
                    'Research': '#FF8C94',
                    'Technology': '#45B7D1',
                    'Organization': '#96CEB4',
                    'Publication': '#FFEAA7',
                    'Method': '#DDA0DD',
                    'Framework': '#98D8C8',
                    'Document': '#A8E6CF'
                }
                
                node_color = color_map.get(node['type'], '#CCCCCC')
                
                net.add_node(
                    node_id, 
                    label=node['id'], 
                    title=f"Type: {node['type']}\nID: {node['id']}", 
                    group=node['type'],
                    color=node_color,
                    size=25
                )
            except Exception as e:
                print(f"⚠️ Error adding node {node_id}: {e}")
                continue  # skip if error
        
        # Add valid edges (using your same approach)
        for rel in valid_edges:
            try:
                net.add_edge(
                    rel['source_id'], 
                    rel['target_id'], 
                    label=rel['type'].lower(),
                    title=f"Relationship: {rel['type']}",
                    color={'color': '#666666'},
                    width=2
                )
            except Exception as e:
                print(f"⚠️ Error adding edge: {e}")
                continue  # skip if error
        
        # Configure physics (using your same configuration)
        net.set_options("""
        {
            "physics": {
                "forceAtlas2Based": {
                    "gravitationalConstant": -100,
                    "centralGravity": 0.01,
                    "springLength": 200,
                    "springConstant": 0.08
                },
                "minVelocity": 0.75,
                "solver": "forceAtlas2Based"
            }
        }
        """)
        
        # Save graph (using your same approach)
        output_file = "neo4j_knowledge_graph.html"
        net.save_graph(output_file)
        print(f"✅ Graph saved to {os.path.abspath(output_file)}")
        
        # Try to open in browser (your same approach)
        try:
            import webbrowser
            webbrowser.open(f"file://{os.path.abspath(output_file)}")
            print("🌐 Opened in browser")
        except:
            print("⚠️ Could not open browser automatically")
            print(f"📁 Manually open: {os.path.abspath(output_file)}")
        
        # Return some stats
        return {
            'nodes_count': len(valid_node_ids),
            'relationships_count': len(valid_edges),
            'output_file': output_file
        }
        
    except Exception as e:
        print(f"❌ Error visualizing Neo4j graph: {e}")
        import traceback
        traceback.print_exc()
        return None

# Run the function with your Neo4j graph
print("🚀 Creating Neo4j visualization...")
result = visualize_neo4j_graph(graph, max_nodes=500, max_relationships=1000)

if result:
    print(f"\n🎉 Visualization completed!")
    print(f"   📊 Nodes: {result['nodes_count']}")
    print(f"   🔗 Relationships: {result['relationships_count']}")
    print(f"   📁 File: {result['output_file']}")

🚀 Creating Neo4j visualization...
🔄 Fetching data from Neo4j...




✅ Retrieved 498 nodes and 903 relationships
📊 Valid edges: 903, Valid nodes: 496
✅ Graph saved to /Users/kathisnehith/Desktop/Med-GraphRAG-project/neo4j_knowledge_graph.html
🌐 Opened in browser

🎉 Visualization completed!
   📊 Nodes: 496
   🔗 Relationships: 903
   📁 File: neo4j_knowledge_graph.html


### Query from Neo(DB)

In [33]:
# All-in-one solution tracker for graph nodes and relationships Retriever

class SimpleGraphTracker(BaseCallbackHandler):
    def __init__(self):
        self.nodes_count = 0
        self.context = ""
        self.cypher = ""
        
    def on_text(self, text: str, **kwargs):
        if "MATCH" in text or "RETURN" in text:
            self.cypher = text.strip()
        if text.startswith("[{") and "}]" in text:
            self.context = text.strip()
            try:
                self.nodes_count = len(eval(text))
            except:
                self.nodes_count = 0

In [34]:
# Create tracker and chain
tracker = SimpleGraphTracker()


custom_cypher_prompt = PromptTemplate.from_template("""
You are a Cypher expert working with a knowledge graph about prompt engineering concepts, techniques, parameters, and best practices.
with this Neo4j schema:
{schema}
Generate a Cypher query to answer the following question:
{question}

Guidelines:
- Use flexible matching with `toLower(n.id)` or `CONTAINS` instead of exact `id` matches.
- Prefer `MATCH (a)-[r]->(b)` patterns to explore relationships.
- Use `LIMIT` to avoid overly large result sets.
- If the question involves multiple concepts (e.g., temperature, top-k, top-p), try to find how they relate via shared concepts or recommendations.
- Return relevant properties like `id`, `description`, `recommendation`, or `example`.

Only return the Cypher query. Do not include explanations.
""")



# Create the GraphCypherQAChain with the tracker
chain = GraphCypherQAChain.from_llm(
    llm=llm_openai,                             # Use OpenAI LLM for question answering
    graph=graph,                                # Use the Neo4j graph
    #cypher_prompt=custom_cypher_prompt,
    verbose=True,                               # Enable verbose logging
    top_k=15,                                    # Return top 5 results
    allow_dangerous_requests=True,
    callbacks=[tracker]                         # Add the tracker to the callbacks
)

In [None]:
# Run query
# LLM model choice makes a difference in the cypher query results,
question = "what all parameters can I use to control the randomness of the model?"
result = chain.invoke({"query": question})

# Print results
print(f"\n🔍 ANALYSIS:")
print(f"📊 Retrieved Nodes: {tracker.nodes_count}")
print(f"📋 Context Data: {tracker.context}")
print(f"🔧 Cypher Query: {tracker.cypher}")
print(f"💬 Answer \n: {result['result']}")



[1m> Entering new GraphCypherQAChain chain...[0m
Generated Cypher:
[32;1m[1;3mMATCH (p:Parameter)-[:REQUIRES_VALUE]->(:Output {id:"Creative_Results"})
RETURN p.id[0m
Full Context:
[32;1m[1;3m[{'p.id': 'Temperature'}, {'p.id': 'Top_K'}, {'p.id': 'Top_P'}][0m

[1m> Finished chain.[0m

🔍 ANALYSIS:
📊 Retrieved Nodes: 3
📋 Context Data: [{'p.id': 'Temperature'}, {'p.id': 'Top_K'}, {'p.id': 'Top_P'}]
🔧 Cypher Query: MATCH (p:Parameter)-[:REQUIRES_VALUE]->(:Output {id:"Creative_Results"})
RETURN p.id
💬 Answer 
: You can use Temperature, Top_K, and Top_P to control the randomness of the model.


Test-Query

In [95]:
from langchain.callbacks.base import BaseCallbackHandler
from langchain.schema import LLMResult
import json

class GraphVisualizationCallback(BaseCallbackHandler):
    def __init__(self):
        self.retrieved_nodes = []
        self.cypher_queries = []
        self.results = []
    
    def on_text(self, text: str, **kwargs) -> None:
        # Capture Cypher queries and results
        if "Generated Cypher:" in text:
            self.cypher_queries.append(text)
        elif "Full Context:" in text:
            self.results.append(text)
    
    def get_summary(self):
        return {
            "cypher_queries": self.cypher_queries,
            "results": self.results
        }

# Create callback handler
callback_handler = GraphVisualizationCallback()

# Update your chain with the callback
chain = GraphCypherQAChain.from_llm(
    llm,
    graph=graph,
    verbose=True,  # This shows the Cypher queries
    top_k=5,  # Retrieve more nodes for better visualization
    allow_dangerous_requests=True,
    callbacks=[callback_handler]
)

# Test query with visualization
question = "What are the main concepts in prompt engineering?"
response = chain.invoke({"query": question})

print("=== RESPONSE ===")
print(response['result'])

print("\n=== RETRIEVED INFORMATION ===")
summary = callback_handler.get_summary()
for i, query in enumerate(summary['cypher_queries']):
    print(f"Cypher Query {i+1}:")
    print(query)
    print("-" * 50)



[1m> Entering new GraphCypherQAChain chain...[0m
Generated Cypher:
[32;1m[1;3mMATCH (d:Document)-[:MENTIONS]->(c:Concept)
RETURN DISTINCT c.id
ORDER BY c.id[0m
Full Context:
[32;1m[1;3m[{'c.id': 'Action Verbs'}, {'c.id': 'Age Problem'}, {'c.id': 'Answer Placement'}, {'c.id': 'Automatic Prompt Engineering'}, {'c.id': 'Background Knowledge'}][0m

[1m> Finished chain.[0m
=== RESPONSE ===
The main concepts in prompt engineering include Action Verbs, Age Problem, Answer Placement, Automatic Prompt Engineering, and Background Knowledge.

=== RETRIEVED INFORMATION ===
Cypher Query 1:
Generated Cypher:
--------------------------------------------------


In [None]:
from langchain.callbacks.base import BaseCallbackHandler
import json
import re

class EnhancedGraphVisualizationCallback(BaseCallbackHandler):
    def __init__(self):
        self.cypher_queries = []
        self.full_context = []
        self.generated_cypher = ""
        self.context_data = ""
    
    def on_text(self, text: str, **kwargs) -> None:
        # Capture the actual Cypher query (the green text)
        if text.startswith("MATCH") or text.startswith("RETURN") or "MATCH" in text:
            self.generated_cypher = text.strip()
            self.cypher_queries.append(text.strip())
        
        # Capture context data (the results from Neo4j)
        if text.startswith("[{") and "}]" in text:
            self.context_data = text.strip()
            self.full_context.append(text.strip())
    
    def get_detailed_summary(self):
        return {
            "cypher_query": self.generated_cypher,
            "context_data": self.context_data,
            "all_queries": self.cypher_queries,
            "all_contexts": self.full_context
        }
    
    def parse_retrieved_nodes(self):
        """Parse the context data to extract node information"""
        if not self.context_data:
            return []
        
        try:
            # Clean and parse the context data
            context_clean = self.context_data.strip()
            if context_clean.startswith('[') and context_clean.endswith(']'):
                data = eval(context_clean)  # Safe since we control the input
                return data
        except:
            return []
        
        return []

# Create enhanced callback handler
enhanced_callback = EnhancedGraphVisualizationCallback()

# Update chain with enhanced callback
chain = GraphCypherQAChain.from_llm(
    llm_openai,
    graph=graph,
    verbose=True,
    top_k=10,  # Get more results
    allow_dangerous_requests=True,
    callbacks=[enhanced_callback]
)

# Test query
question = "What is the top_p,k"
response = chain.invoke({"query": question})

print("=" * 60)
print("🤖 GRAPH QA ANALYSIS")
print("=" * 60)

print(f"❓ QUESTION: {question}")
print(f"💬 ANSWER: {response['result']}")

print("\n" + "=" * 60)
print("🔍 DETAILED RETRIEVAL ANALYSIS")
print("=" * 60)

# Get detailed summary
summary = enhanced_callback.get_detailed_summary()

print("🔧 Generated Cypher Query:")
print(f"```cypher\n{summary['cypher_query']}\n```")

print("\n📊 Raw Retrieved Data:")
retrieved_nodes = enhanced_callback.parse_retrieved_nodes()
print(f"Retrieved {len(retrieved_nodes)} records:")

for i, node in enumerate(retrieved_nodes):
    print(f"{i+1}. {node}")

print("\n" + "=" * 60)
print("📈 VISUALIZATION OF RETRIEVED CONCEPTS")
print("=" * 60)

# Create a simple text-based visualization
if retrieved_nodes:
    concept_ids = [list(node.values())[0] for node in retrieved_nodes]
    
    print("🎯 Retrieved Concept Network:")
    print("┌" + "─" * 50 + "┐")
    
    for i, concept in enumerate(concept_ids):
        prefix = "├─" if i < len(concept_ids) - 1 else "└─"
        print(f"│ {prefix} 📝 {concept}")
    
    print("└" + "─" * 50 + "┘")
    
    print(f"\n📊 Summary:")
    print(f"• Total concepts retrieved: {len(concept_ids)}")
    print(f"• Query complexity: {'Simple' if 'MATCH' in summary['cypher_query'] and 'JOIN' not in summary['cypher_query'] else 'Complex'}")
    print(f"• Data completeness: {'✅ Complete' if len(concept_ids) > 0 else '❌ Incomplete'}")





[1m> Entering new GraphCypherQAChain chain...[0m
Generated Cypher:
[32;1m[1;3mMATCH (p:Parameter) WHERE p.id IN ['Top_P', 'Top_K'] RETURN p[0m
Full Context:
[32;1m[1;3m[{'p': {'id': 'Top_K'}}, {'p': {'id': 'Top_P'}}][0m

[1m> Finished chain.[0m
🤖 GRAPH QA ANALYSIS
❓ QUESTION: What is the top_p,k
💬 ANSWER: The top_p and top_k parameters are referred to as Top_P and Top_K.

🔍 DETAILED RETRIEVAL ANALYSIS
🔧 Generated Cypher Query:
```cypher
MATCH (p:Parameter) WHERE p.id IN ['Top_P', 'Top_K'] RETURN p
```

📊 Raw Retrieved Data:
Retrieved 2 records:
1. {'p': {'id': 'Top_K'}}
2. {'p': {'id': 'Top_P'}}

📈 VISUALIZATION OF RETRIEVED CONCEPTS
🎯 Retrieved Concept Network:
┌──────────────────────────────────────────────────┐
│ ├─ 📝 {'id': 'Top_K'}
│ └─ 📝 {'id': 'Top_P'}
└──────────────────────────────────────────────────┘

📊 Summary:
• Total concepts retrieved: 2
• Query complexity: Simple
• Data completeness: ✅ Complete

🔗 RELATIONSHIP ANALYSIS


### viz-retrival of docs

In [None]:
import ast
# --- Visualization Section ---
# Parse the context data (safe for your own generated data)
print("=== DEBUG: tracker.context ===")
print(repr(tracker.context))

try:
    context_list = ast.literal_eval(tracker.context)
except Exception as e:
    print(f"Error parsing context: {e}")
    context_list = []
print("=== DEBUG: context_list ===")
print(repr(context_list))
print("=== DEBUG: context_list type and length ===")
print(type(context_list), len(context_list))
# Create a PyVis network
net = Network(height="600px", width="100%", bgcolor="#222222", font_color="white", directed=True)

# Add nodes and edges based on context
node_ids = set()
for item in context_list:
    for key, value in item.items():
        if isinstance(value, dict) and 'id' in value:
            node_id = value['id']
            node_ids.add(node_id)
            color = '#FF6B6B' if key.lower() == 'react' else '#4ECDC4'
            net.add_node(node_id, label=node_id, color=color)

# Add edges between all nodes in the same item (if more than one node per item)
for item in context_list:
    ids = [v['id'] for v in item.values() if isinstance(v, dict) and 'id' in v]
    if len(ids) > 1:
        for i in range(len(ids)):
            for j in range(i+1, len(ids)):
                net.add_edge(ids[i], ids[j], label="related", color='#AAAAAA')

# Save and display the HTML
output_file = "qa_chain_dynamic_result.html"
net.save_graph(output_file)
print(f"Visualization saved to {output_file}")

# Optionally open in browser
webbrowser.open(f"file://{os.path.abspath(output_file)}")

In [None]:
from pyvis.network import Network

# Extract node info from your context data
context = [{'react': {'id': 'React'}, 'cot': {'id': 'Chain Of Thought Prompting'}}]

# Create a PyVis network
net = Network(height="400px", width="100%", bgcolor="#222222", font_color="white", directed=True)

# Add nodes
for item in context:
    react_id = item['react']['id']
    cot_id = item['cot']['id']
    net.add_node(react_id, label=react_id, color='#FF6B6B')
    net.add_node(cot_id, label=cot_id, color='#4ECDC4')
    # Add a generic edge (since the relationship type is not specified in context)
    net.add_edge(react_id, cot_id, label="related", color='#AAAAAA')

# Save and display the HTML
output_file = "qa_chain_result.html"
net.save_graph(output_file)
print(f"Visualization saved to {output_file}")

# Optionally open in browser
import webbrowser, os
webbrowser.open(f"file://{os.path.abspath(output_file)}")