# KG-RAG: Knowledge Graph + Vector Store + LLM (Autonomous RAG)
This notebook shows how to:
- Extract entity triples from text using an LLM
- Store them in a knowledge graph (Neo4j or in-memory)
- Store document chunks in a vector database (FAISS)
- Query both sources using a user prompt
- Feed facts + documents to LLM for contextual answers

In [1]:
# Install dependencies (if running in Colab)
!pip install langchain openai faiss-cpu tiktoken neo4j networkx

Collecting langchain
  Downloading langchain-0.3.21-py3-none-any.whl.metadata (7.8 kB)
Collecting openai
  Downloading openai-1.69.0-py3-none-any.whl.metadata (25 kB)
Collecting faiss-cpu
  Downloading faiss_cpu-1.10.0-cp313-cp313-win_amd64.whl.metadata (4.5 kB)
Collecting tiktoken
  Downloading tiktoken-0.9.0-cp313-cp313-win_amd64.whl.metadata (6.8 kB)
Collecting neo4j
  Downloading neo4j-5.28.1-py3-none-any.whl.metadata (5.9 kB)
Collecting networkx
  Downloading networkx-3.4.2-py3-none-any.whl.metadata (6.3 kB)
Collecting langchain-core<1.0.0,>=0.3.45 (from langchain)
  Downloading langchain_core-0.3.49-py3-none-any.whl.metadata (5.9 kB)
Collecting langchain-text-splitters<1.0.0,>=0.3.7 (from langchain)
  Downloading langchain_text_splitters-0.3.7-py3-none-any.whl.metadata (1.9 kB)
Collecting langsmith<0.4,>=0.1.17 (from langchain)
  Downloading langsmith-0.3.19-py3-none-any.whl.metadata (15 kB)
Collecting pydantic<3.0.0,>=2.7.4 (from langchain)
  Downloading pydantic-2.11.0-py3-none

In [None]:
from langchain.embeddings import OpenAIEmbeddings
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.vectorstores import FAISS
from langchain.schema import Document
from langchain.chat_models import ChatOpenAI
from langchain.chains import GraphCypherQAChain
from langchain.graphs import Neo4jGraph
import os
from langchain_community.llms import Ollama

llm = Ollama(model="phi3")

## Step 1: Simulate Documents

In [None]:
documents = [
    {
        "title": "Spring Fling 10K Overview",
        "url": "https://runclub.com/spring-fling",
        "content": "The Spring Fling 10K is led by Alan and takes place in Prospect Park."
    },
    {
        "title": "Alan's Profile",
        "url": "https://runclub.com/alan",
        "content": "Alan is a runner and site leader for Spring Fling 10K. See details here: https://runclub.com/spring-fling"
    }
]

## Step 2: Extract Triples from Documents Using LLM

In [None]:
from langchain.prompts import PromptTemplate
from langchain.chains import LLMChain

triple_extraction_prompt = PromptTemplate(
    input_variables=["text"],
    template="""
Extract factual triples from the following text.
Return as a list of JSON dicts: [{'subject': ..., 'predicate': ..., 'object': ...}]

Text: {text}
Triples:
"""
)

llm = ChatOpenAI(temperature=0)
chain = LLMChain(llm=llm, prompt=triple_extraction_prompt)

all_triples = []
for doc in documents:
    result = chain.run(text=doc['content'])
    print(f"Triples from {doc['title']}:\n{result}\n")
    all_triples.append(result)

## Step 3: (Optional) Store Triples in Graph Database (Neo4j or NetworkX)

In [None]:
# Example for Neo4j (optional)
# graph = Neo4jGraph(url="bolt://localhost:7687", username="neo4j", password="your-password")
# graph.query("Cypher query here")

## Step 4: Vector Store (FAISS)

In [None]:
lc_documents = [
    Document(
        page_content=doc['content'],
        metadata={"title": doc['title'], "url": doc['url']}
    ) for doc in documents
]

splitter = RecursiveCharacterTextSplitter(chunk_size=300, chunk_overlap=50)
chunks = splitter.split_documents(lc_documents)

embeddings = OpenAIEmbeddings()
vectorstore = FAISS.from_documents(chunks, embeddings)
vectorstore.save_local("faiss_kg_rag")

## Step 5: Final Query - Combine KG and Vector Context

In [None]:
query = "Where does Alan lead events?"

# Simulated KG result
facts = [
    "Alan leads the Spring Fling 10K",
    "Spring Fling 10K takes place in Prospect Park"
]

# Vector DB search
results = vectorstore.similarity_search("Spring Fling 10K", k=2)

print("\nGraph Facts:")
for f in facts:
    print("-", f)

print("\nVector Search Results:")
for r in results:
    print(f"[{r.metadata['title']}]: {r.page_content}\n")