run-llama · logan-markewich · Jun 7, 2023 · May 4, 2023 · Jun 7, 2023 · Jun 7, 2023
diff --git a/docs/examples/index_structs/knowledge_graph/KnowledgeGraphDemo.ipynb b/docs/examples/index_structs/knowledge_graph/KnowledgeGraphDemo.ipynb
diff --git a/docs/examples/index_structs/knowledge_graph/NebulaGraphKGIndexDemo.ipynb b/docs/examples/index_structs/knowledge_graph/NebulaGraphKGIndexDemo.ipynb
diff --git a/docs/examples/index_structs/knowledge_graph/storage/graph_store.json b/docs/examples/index_structs/knowledge_graph/storage/graph_store.json
@@ -0,0 +1 @@
+{"graph_dict": {"I": [["wrote", "short stories"], ["wrote", "programs"], ["applied to", "grad schools"], ["realized that", "AI was a hoax"], ["took art classes at", "Harvard"], ["was in a PhD program in", "computer science"], ["applied to", "RISD"], ["applied to", "Accademia di Belli Arti"], ["remember that I answered", "essay question by writing about Cezanne"], ["was quitting to", "paint"], ["had burned", "4 years"], ["resumed", "life"], ["was", "rich"], ["would tell", "them"], ["went straight from", "batch processing"], ["went straight from", "microcomputers"]], "microcomputers": [["changed", "everything"]], "TRS-80": [["was", "gold standard"]], "SHRDLU": [["is", "intelligent computer"]], "Terry Winograd": [["using", "SHRDLU"]], "Its brokenness": [["generated", "opportunities"]], "Computer Science": [["is", "uneasy alliance"]], "building things": [["is", "more exciting"]], "programs": [["are", "obsolete"]], "paintings": [["last", "hundreds of years"]], "The students and faculty": [["would adhere outwardly", "to the conventions of a 19th century atelier"]], "you": [["want to", "paint"]], "still life": [["you can copy", "pixel by pixel"]], "Interleaf": [["is", "company"], ["makes software for", "creating documents"]], "technology companies": [["is better run by", "product people"]], "code": [["is edited by too many people", "leads to bugs"]], "cheap office space": [["is no bargain if", "depressing"]], "planned meetings": [["are inferior to", "corridor conversations"]], "Asterix comics": [["begin with", "zooming in on a tiny corner"], ["turn out not to be controlled by", "the Romans"]], "Yorkville": [["is", "tiny corner"], ["wasn't", "rich"]], "web": [["would be", "big deal"], ["would", "do the same for the internet"]], "software": [["ran on", "server"], ["had three main parts", "editor"], ["was written by", "I"], ["was written by", "Robert"], ["was written by", "Trevor"], ["can launch", "as soon as it's done"]], "web app": [["common", "now"]], "graduate student stipend": [["is", "seed funding"]], "WYSIWYG site builder": [["is", "site builder"]], "growth rate": [["is ultimate test of", "startup"], ["takes care of", "absolute number"]], "Yahoo": [["bought", "Viaweb"]], "Viaweb": [["was", "profitable"], ["was", "growing rapidly"]], "real estate agents": [["is", "Cambridge of New York"]], "Lisp": [["would be", "dialect"], ["would hide", "parentheses"], ["has dialects", ""], ["has dialects", ""], ["has dialects", ""], ["is", "language"], ["defined by", "writing an interpreter"], ["missing a lot of things", "added"]], "O'Reilly": [["publishes", "essays"], ["started publishing", "essays"], ["essays", "online"]], "Hackers & Painters": [["is", "book"]], "Maria Daniels": [["is", "friend"]], "Jessica Livingston": [["is", "author"]], "waste their time": [["is great spur to", "imagination"]], "the Harvard Computer Society": [["asked", "me"]], "YC": [["is", "fund"], ["is", "organization"], ["helps", "startups"], ["grew", "quickly"], ["started to notice", "other advantages"], ["became", "full-time job"], ["is", "cool thing"], ["Sam Altman took over", "2013"], ["was controlled by LLC", "2013"], ["became fund for", "couple years"], ["became fund for", "2009"], ["became fund for", "because getting so big"], ["went back to being", "self-funded"], ["went back to being", "after Heroku got bought"]], "SFP": [["is", "Summer Founders Program"], ["posted an announcement on my site", "inviting undergrads to apply"]], "Hacker News": [["was founded by", "Paul Graham"], ["is", "a website"]], "Robert Morris": [["offered", "unsolicited advice"]], "McCarthy": [["tested", "interpreter"], ["wrote", "Bel"]], "Paul Graham": [["wrote", "essay"], ["moved to", "England"], ["worked on", "Bel"]], "online store builder": [["has to launch", "privately"]], "Lisp expressions": [["editing", "code editor"]], "Y Combinator": [["renamed to", "Y Combinator"], ["renamed to", "lambda calculus"]], "HN": [["edge case", "bizarre"], ["edge case", "when you both write essays and run a forum"]]}}
diff --git a/docs/reference/query/retrievers/kg.rst b/docs/reference/query/retrievers/kg.rst
@@ -1,7 +1,7 @@
 Knowledge Graph Retriever
 ==========================
 
-.. automodule:: llama_index.indices.knowledge_graph.retrievers
+.. automodule:: llama_index.indices.knowledge_graph.retriever
    :members:
    :inherited-members:
 ..    :exclude-members: index_struct, query, set_llm_predictor, set_prompt_helper
diff --git a/example.html b/example.html
diff --git a/llama_index/__init__.py b/llama_index/__init__.py
@@ -25,6 +25,10 @@
     GPTRAKEKeywordTableIndex,
     GPTSimpleKeywordTableIndex,
 )
+from llama_index.indices.knowledge_graph import (
+    KnowledgeGraphIndex,
+    GPTKnowledgeGraphIndex,
+)
 from llama_index.indices.list import ListIndex, GPTListIndex
 from llama_index.indices.tree import TreeIndex, GPTTreeIndex
 from llama_index.indices.vector_store import VectorStoreIndex, GPTVectorStoreIndex
@@ -148,8 +152,10 @@
     "PandasIndex",
     "EmptyIndex",
     "DocumentSummaryIndex",
+    "KnowledgeGraphIndex",
     # indices - legacy names
     "GPTKeywordTableIndex",
+    "GPTKnowledgeGraphIndex",
     "GPTSimpleKeywordTableIndex",
     "GPTRAKEKeywordTableIndex",
     "GPTListIndex",

diff --git a/llama_index/constants.py b/llama_index/constants.py
@@ -14,5 +14,6 @@
 TYPE_KEY = "__type__"
 DATA_KEY = "__data__"
 VECTOR_STORE_KEY = "vector_store"
+GRAPH_STORE_KEY = "graph_store"
 INDEX_STORE_KEY = "index_store"
 DOC_STORE_KEY = "doc_store"
diff --git a/llama_index/data_structs/data_structs.py b/llama_index/data_structs/data_structs.py
@@ -7,7 +7,7 @@
 import uuid
 from abc import abstractmethod
 from dataclasses import dataclass, field
-from typing import Dict, List, Optional, Sequence, Set, Tuple
+from typing import Dict, List, Optional, Sequence, Set
 
 from dataclasses_json import DataClassJsonMixin
 
@@ -204,9 +204,14 @@ class KG(IndexStruct):
 
     # Unidirectional
 
+    # table of keywords to node ids
     table: Dict[str, Set[str]] = field(default_factory=dict)
-    # text_chunks: Dict[str, Node] = field(default_factory=dict)
-    rel_map: Dict[str, List[Tuple[str, str]]] = field(default_factory=dict)
+
+    # TODO: legacy attribute, remove in future releases
+    rel_map: Dict[str, List[List[str]]] = field(default_factory=dict)
+
+    # TBD, should support vector store, now we just persist the embedding memory
+    # maybe chainable abstractions for *_stores could be designed
     embedding_dict: Dict[str, List[float]] = field(default_factory=dict)
 
     @property
@@ -218,56 +223,19 @@ def add_to_embedding_dict(self, triplet_str: str, embedding: List[float]) -> Non
         """Add embedding to dict."""
         self.embedding_dict[triplet_str] = embedding
 
-    def upsert_triplet(self, triplet: Tuple[str, str, str]) -> None:
-        """Upsert a knowledge triplet to the graph."""
-        subj, relationship, obj = triplet
-        if subj not in self.rel_map:
-            self.rel_map[subj] = []
-        self.rel_map[subj].append((obj, relationship))
-
     def add_node(self, keywords: List[str], node: Node) -> None:
         """Add text to table."""
         node_id = node.get_doc_id()
         for keyword in keywords:
             if keyword not in self.table:
                 self.table[keyword] = set()
             self.table[keyword].add(node_id)
-        # self.text_chunks[node_id] = node
-
-    def get_rel_map_texts(self, keyword: str) -> List[str]:
-        """Get the corresponding knowledge for a given keyword."""
-        # NOTE: return a single node for now
-        if keyword not in self.rel_map:
-            return []
-        texts = []
-        for obj, rel in self.rel_map[keyword]:
-            texts.append(str((keyword, rel, obj)))
-        return texts
-
-    def get_rel_map_tuples(self, keyword: str) -> List[Tuple[str, str]]:
-        """Get the corresponding knowledge for a given keyword."""
-        # NOTE: return a single node for now
-        if keyword not in self.rel_map:
-            return []
-        return self.rel_map[keyword]
 
-    def get_node_ids(self, keyword: str, depth: int = 1) -> List[str]:
-        """Get the corresponding knowledge for a given keyword."""
-        if depth > 1:
-            raise ValueError("Depth > 1 not supported yet.")
+    def search_node_by_keyword(self, keyword: str) -> List[str]:
+        """Search for nodes by keyword."""
         if keyword not in self.table:
             return []
-        keywords = [keyword]
-        # some keywords may correspond to a leaf node, may not be in rel_map
-        if keyword in self.rel_map:
-            keywords.extend([child for child, _ in self.rel_map[keyword]])
-
-        node_ids: List[str] = []
-        for keyword in keywords:
-            for node_id in self.table.get(keyword, set()):
-                node_ids.append(node_id)
-            # TODO: Traverse (with depth > 1)
-        return node_ids
+        return list(self.table[keyword])
 
     @classmethod
     def get_type(cls) -> IndexStructType:

diff --git a/llama_index/data_structs/struct_type.py b/llama_index/data_structs/struct_type.py
@@ -83,6 +83,8 @@ class IndexStructType(str, Enum):
     SQL = "sql"
     # for KG index
     KG = "kg"
+    SIMPLE_KG = "simple_kg"
+    NEBULAGRAPH = "nebulagraph"
 
     # EMPTY
     EMPTY = "empty"

diff --git a/llama_index/graph_stores/__init__.py b/llama_index/graph_stores/__init__.py
@@ -0,0 +1,9 @@
+"""Graph stores."""
+
+from llama_index.graph_stores.nebulagraph import NebulaGraphStore
+from llama_index.graph_stores.simple import SimpleGraphStore
+
+__all__ = [
+    "SimpleGraphStore",
+    "NebulaGraphStore",
+]