In [1]:
import pyTigerGraph as tg

conn = tg.TigerGraphConnection("http://34.134.83.57", "SupportAI")

In [2]:
print(conn.gsql("USE GRAPH SupportAI\nLS"))

Using graph 'SupportAI'
---- Graph SupportAI
Vertex Types: 
  - VERTEX DocumentChunkEntrypoint(PRIMARY_ID id STRING, M_max UINT, M_max0 UINT, M UINT, m_l_normalize DOUBLE, date_added DATETIME) WITH STATS="OUTDEGREE_BY_EDGETYPE", PRIMARY_ID_AS_ATTRIBUTE="true"
  - VERTEX EntityEntrypoint(PRIMARY_ID id STRING, M_max UINT, M_max0 UINT, M UINT, m_l_normalize DOUBLE, date_added DATETIME) WITH STATS="OUTDEGREE_BY_EDGETYPE", PRIMARY_ID_AS_ATTRIBUTE="true"
  - VERTEX DocumentChunk(PRIMARY_ID id STRING, content STRING, embedding LIST<DOUBLE>, date_added DATETIME) WITH STATS="OUTDEGREE_BY_EDGETYPE", PRIMARY_ID_AS_ATTRIBUTE="true"
  - VERTEX Document(PRIMARY_ID id STRING, embedding LIST<DOUBLE>, date_added DATETIME) WITH STATS="OUTDEGREE_BY_EDGETYPE", PRIMARY_ID_AS_ATTRIBUTE="true"
  - VERTEX Concept(PRIMARY_ID id STRING, description STRING, embedding LIST<DOUBLE>, concept_type STRING, date_added DATETIME) WITH STATS="OUTDEGREE_BY_EDGETYPE", PRIMARY_ID_AS_ATTRIBUTE="true"
  - VERTEX Entity(PRIMAR

In [3]:
docs = "../pytigergraph-docs/modules/"

In [4]:
from langchain_community.document_loaders import TextLoader

In [5]:
from langchain_community.document_loaders import DirectoryLoader
loader = DirectoryLoader(docs, glob="**/*.adoc", loader_cls=TextLoader)

In [6]:
docs = loader.load()

In [7]:
len(docs)

48

In [8]:
from langchain_community.graphs.graph_document import (
    Node as BaseNode,
    Relationship as BaseRelationship
)
from typing import List, Dict, Any, Optional
from langchain_core.pydantic_v1 import Field, BaseModel

#class Property(BaseModel):
#  """A single property consisting of key and value"""
#  key: str = Field(..., description="key")
#  value: str = Field(..., description="value")

class Node(BaseNode):
    #properties: Optional[List[Property]] = Field(
    #    None, description="List of node properties")
    definition: str = Field(description="Definition of the node. Describe what the entity is.")

class Relationship(BaseRelationship):
    #properties: Optional[List[Property]] = Field(
    #    None, description="List of relationship properties"
    #)
    definition: str = Field(description="Definition of the relationship. Describe what the entity is.")

In [9]:
class KnowledgeGraph(BaseModel):
    """Generate a knowledge graph with entities and relationships."""
    nodes: List[Node] = Field(
        ..., description="List of nodes in the knowledge graph")
    rels: List[Relationship] = Field(
        ..., description="List of relationships in the knowledge graph"
    )

In [10]:
import os
from langchain.chains import LLMChain
from langchain.llms.base import LLM
from langchain.chat_models import ChatOpenAI

os.environ["OPENAI_API_KEY"] = "OPEN_AI_KEY"
llm = ChatOpenAI(model="gpt-4-0125-preview", temperature=0)

  warn_deprecated(


In [11]:
from langchain.prompts import ChatPromptTemplate

In [12]:
from langchain.output_parsers import PydanticOutputParser

parser = PydanticOutputParser(pydantic_object=KnowledgeGraph)

In [13]:

prompt = ChatPromptTemplate.from_messages(
    [(
      "system",
      f"""# Knowledge Graph Instructions for GPT-4
## 1. Overview
You are a top-tier algorithm designed for extracting information in structured formats to build a knowledge graph.
- **Nodes** represent entities and concepts. They're akin to Wikipedia nodes.
- The aim is to achieve simplicity and clarity in the knowledge graph, making it accessible for a vast audience.
## 2. Labeling Nodes
- **Consistency**: Ensure you use basic or elementary types for node labels.
  - For example, when you identify an entity representing a person, always label it as **"person"**. Avoid using more specific terms like "mathematician" or "scientist".
- **Node IDs**: Never utilize integers as node IDs. Node IDs should be names or human-readable identifiers found in the text.
## 3. Handling Numerical Data and Dates
- Numerical data, like age or other related information, should be incorporated as attributes or properties of the respective nodes.
- **No Separate Nodes for Dates/Numbers**: Do not create separate nodes for dates or numerical values. Always attach them as attributes or properties of nodes.
- **Property Format**: Properties must be in a key-value format. Only use properties for dates and numbers, string properties should be new nodes.
- **Quotation Marks**: Never use escaped single or double quotes within property values.
- **Naming Convention**: Use camelCase for property keys, e.g., `birthDate`.
## 4. Coreference Resolution
- **Maintain Entity Consistency**: When extracting entities, it's vital to ensure consistency.
If an entity, such as "John Doe", is mentioned multiple times in the text but is referred to by different names or pronouns (e.g., "Joe", "he"), 
always use the most complete identifier for that entity throughout the knowledge graph. In this example, use "John Doe" as the entity ID.  
Remember, the knowledge graph should be coherent and easily understandable, so maintaining consistency in entity references is crucial. 
## 5. Strict Compliance
Adhere to the rules strictly. Non-compliance will result in termination, including poor formatting. """),
        ("human", "Use the given format to extract information from the following input: {input}"),
        ("human", "Mandatory: Make sure to answer in the correct format, specified here: {format_instructions}"),
    ])

In [14]:
chain = prompt | llm #| parser

In [15]:
import json

def extract_kg_from_doc(doc, chain):
    out = chain.invoke({"input": doc, "format_instructions": parser.get_format_instructions()})
    try:
        json_out = json.loads(out.content.strip("```").strip("json"))
        return json_out
    except:
        print("Error Processing: ", out)
    return {"nodes": [], "rels": []}


In [16]:
for doc in docs[21:]:
    print(doc.metadata)
    nodes_rels = extract_kg_from_doc(doc.page_content, chain)

    if nodes_rels["nodes"] != []:
        try:
            conn.upsertVertices("Entity", [(x["id"], {"definition": x["definition"]}) for x in nodes_rels["nodes"]])
        except:
            print(nodes_rels["nodes"])
    
    if nodes_rels["rels"] != []:
        try:
            conn.upsertVertices("Relationship", [(x["source"]+":"+x["type"]+":"+x["target"], {"definition": x["definition"], "short_name": x["type"]}) for x in nodes_rels["rels"]])
            conn.upsertEdges("Entity", "IS_HEAD_OF", "Relationship", [(x["source"], x["source"]+":"+x["type"]+":"+x["target"], {}) for x in nodes_rels["rels"]])
            conn.upsertEdges("Relationship", "HAS_TAIL", "Entity", [(x["source"]+":"+x["type"]+":"+x["target"], x["target"], {}) for x in nodes_rels["rels"]])
        except:
            print(nodes_rels["rels"])

{'source': '../pytigergraph-docs/modules/gds/pages/gds.adoc'}
{'source': '../pytigergraph-docs/modules/gds/pages/models.adoc'}
{'source': '../pytigergraph-docs/modules/gds/pages/nodepiece_transforms.adoc'}
{'source': '../pytigergraph-docs/modules/gds/pages/pyg_transforms.adoc'}
{'source': '../pytigergraph-docs/modules/gds/pages/factory-functions.adoc'}
[{'source': {'id': 'FactoryFunctions', 'type': 'concept'}, 'target': {'id': 'GDS', 'type': 'class'}, 'type': 'belongsTo', 'properties': {}, 'definition': 'Factory Functions are methods of the GDS class.'}, {'source': {'id': 'GDS', 'type': 'class'}, 'target': {'id': 'NeighborLoader', 'type': 'class'}, 'type': 'createsInstance', 'properties': {}, 'definition': 'GDS class can create an instance of NeighborLoader.'}, {'source': {'id': 'GDS', 'type': 'class'}, 'target': {'id': 'EdgeLoader', 'type': 'class'}, 'type': 'createsInstance', 'properties': {}, 'definition': 'GDS class can create an instance of EdgeLoader.'}, {'source': {'id': 'GDS', 