# Using TigerGraph CoPilot for Document Question Answering

In [1]:
from pyTigerGraph import TigerGraphConnection

# We first create a connection to the database
conn = TigerGraphConnection(
    host="https://YOUR_DATABASE_HOST",
    username="tigergraph", 
    password="tigergraph")

# And then add CoPilot's address to the connection. This address 
# is the host's address where the CoPilot container is running.
conn.ai.configureCoPilotHost("http://YOUR_COPILOT_HOST")

## Create a Graph and Ingest Data

In [2]:
conn.gsql("""CREATE GRAPH pyTigerGraphRAG()""")

'The graph pyTigerGraphRAG is created.'

In [2]:
conn.graphname = "pyTigerGraphRAG"

In [3]:
conn.ai.initializeSupportAI()

{'schema_creation_status': '"Using graph \'pyTigerGraphRAG\'\\nSuccessfully created schema change jobs: [add_supportai_schema].\\nKick off schema change job add_supportai_schema\\nDoing schema change on graph \'pyTigerGraphRAG\' (current version: 0)\\nTrying to add local vertex \'DocumentChunk\' to the graph \'pyTigerGraphRAG\'.\\nTrying to add local vertex \'Document\' to the graph \'pyTigerGraphRAG\'.\\nTrying to add local vertex \'Concept\' to the graph \'pyTigerGraphRAG\'.\\nTrying to add local vertex \'Entity\' to the graph \'pyTigerGraphRAG\'.\\nTrying to add local vertex \'Relationship\' to the graph \'pyTigerGraphRAG\'.\\nTrying to add local vertex \'DocumentCollection\' to the graph \'pyTigerGraphRAG\'.\\nTrying to add local vertex \'Content\' to the graph \'pyTigerGraphRAG\'.\\nTrying to add local edge \'HAS_CONTENT\' and its reverse edge \'reverse_HAS_CONTENT\' to the graph \'pyTigerGraphRAG\'.\\nTrying to add local edge \'IS_CHILD_OF\' and its reverse edge \'reverse_IS_CHIL

In [4]:
res = conn.ai.createDocumentIngest(
    data_source = "s3",
    data_source_config = {"aws_access_key": "YOUR_AWS_ACCESS_KEY", "aws_secret_key": "YOUR_AWS_SECRET_KEY"},
    loader_config = {"doc_id_field": "url", "content_field": "content"},
    file_format = "json"
)

In [7]:
conn.ai.runDocumentIngest(res["load_job_id"], res["data_source_id"], "s3://tg-documentation/pytg_current/pytg_current.jsonl")

{'job_name': 'load_documents_content_json_435a3be2c0f840e781906361fdf4d35f',
 'job_id': 'pyTigerGraphRAG.load_documents_content_json_435a3be2c0f840e781906361fdf4d35f.stream.SupportAI_pyTigerGraphRAG_13655a7445234d14a85024943c782103.1712758937368',
 'log_location': '/home/tigergraph/tigergraph/log/kafkaLoader/pyTigerGraphRAG.load_documents_content_json_435a3be2c0f840e781906361fdf4d35f.stream.SupportAI_pyTigerGraphRAG_13655a7445234d14a85024943c782103.1712758937368'}

In [4]:
conn.ai.forceConsistencyUpdate()

{'status': 'success'}

## Comparing Document Search Methods

In [3]:
query = "How do I get a count of vertices in Python?"

### HNSW Index Overlap in Graph

In [6]:
conn.ai.searchDocuments(query,
                        method="hnswoverlap",
                        method_parameters = {"indices": ["Document", "DocumentChunk", "Entity", "Relationship"],
                                             "top_k": 2,
                                             "num_hops": 2,
                                             "num_seen_min": 2})

[{'@@final_retrieval': {'EntityInfo': {'getVertexType': 'Function to retrieve the details of the specified vertex type.',
    'getVertexStats': 'Function to return vertex attribute statistics.',
    'getVertexCount': 'Function to retrieve the number of vertices of the specified type.',
    'pyTigerGraph': 'Python package for connecting to TigerGraph databases.'},
   'RelationshipInfo': {'pyTigerGraph:HAS_FUNCTION:getVertexType': 'Defines the relationship between the package and the function'},
   'DocumentChunkInfo': {'https://docs.tigergraph.com/pytigergraph/current/contributing/_chunk_0': "Contributing to pyTigerGraph\\nDownload the Repository\\nYou can download the repository from GitHub by:\\ngit clone https://github.com/tigergraph/pyTigerGraph.git\\nInstall the Local Version of the Package\\nOnce downloaded, you can install the local version of the package (without GDS support) by:\\npip install '.'\\nWithin the\\npyTigerGraph\\ndirectory, you can run the following command to inst

### Document Chunk Vector Search

In [7]:
conn.ai.searchDocuments(query,
                        method="vdb",
                        method_parameters={"index": "DocumentChunk",
                                           "top_k": 5,
                                           "withHyDE": False})

[{'@@final_retrieval': {'https://docs.tigergraph.com/pytigergraph/current/core-functions/schema_chunk_2': 'If the value is\\nFalse\\n, the request will always insert new\\nedges and create the necessary vertices with default values for their attributes.\\nNote that this parameter does not affect vertices.\\nupdateVertexOnly\\n: If\\nTrue\\n, the request will only update existing vertices and not insert new\\nvertices.\\nReturns:\\nThe result of upsert (number of vertices and edges accepted/upserted).\\nEndpoint:\\nPOST /graph/{graph_name}\\nSee\\nUpsert data to graph\\ngetEndpoints()\\ngetEndpoints(builtin: bool = False, dynamic: bool = False, static: bool = False) \\u2192 dict\\nLists the REST++ endpoints and their parameters.\\nParameters:\\nbuiltin\\n: List the TigerGraph-provided REST++ endpoints.\\ndynamic\\n: List endpoints for user-installed queries.\\nstatic\\n: List static endpoints.\\nIf none of the above arguments are specified, all endpoints are listed.\\nEndpoint:\\nGET /e

### Sibling Document Chunk Vector Search

In [8]:
conn.ai.searchDocuments(query,
                        method="sibling",
                        method_parameters={"index": "DocumentChunk",
                                           "top_k": 5,
                                           "lookahead": 3,
                                           "lookback": 3,
                                           "withHyDE": False})

[{'@@sibling_set': {'https://docs.tigergraph.com/pytigergraph/current/core-functions/schema_chunk_2': {'https://docs.tigergraph.com/pytigergraph/current/core-functions/schema_chunk_1': {'distance': '-1',
     'content': 'Otherwise, the request will return immediately after RESTPP processes the POST.\\nnewVertexOnly\\n: If\\nTrue\\n, the request will only insert new vertices and not update existing ones.\\nvertexMustExist\\n: If\\nTrue\\n, the request will only insert an edge if both the\\nFROM\\nand\\nTO\\nvertices\\nof the edge already exist.'},
    'https://docs.tigergraph.com/pytigergraph/current/core-functions/schema_chunk_2': {'distance': '0',
     'content': 'If the value is\\nFalse\\n, the request will always insert new\\nedges and create the necessary vertices with default values for their attributes.\\nNote that this parameter does not affect vertices.\\nupdateVertexOnly\\n: If\\nTrue\\n, the request will only update existing vertices and not insert new\\nvertices.\\nReturns:\

## Comparing LLM Generated Responses

In [5]:
resp = conn.ai.answerQuestion(query,
                        method="hnswoverlap",
                        method_parameters = {"indices": ["Document", "DocumentChunk", "Entity", "Relationship"],
                                             "top_k": 2,
                                             "num_hops": 2,
                                             "num_seen_min": 2})

In [6]:
print(resp["response"])

You can get a count of vertices in Python using the `getVertexCount` function from the `pyTigerGraph` package. This function retrieves the number of vertices of the specified type.


In [16]:
print(resp["retrieved"])

[{'@@final_retrieval': {'EntityInfo': {'pyTigerGraph': 'Python package for connecting to TigerGraph databases.', 'getVertexType': 'Function to retrieve the details of the specified vertex type.', 'getVertexStats': 'Function to return vertex attribute statistics.', 'getVertexCount': 'Function to retrieve the number of vertices of the specified type.'}, 'RelationshipInfo': {'pyTigerGraph:HAS_FUNCTION:getVertexType': 'Defines the relationship between the package and the function'}, 'DocumentChunkInfo': {'https://docs.tigergraph.com/pytigergraph/current/contributing/_chunk_2': 'The docstrings should be formatted as follows:\\ndef getVertexType(self, vertexType: str, force: bool = False) -> dict:\\n    \\"\\"\\"Returns the details of the specified vertex type.\\n\\n    Args:\\n        vertexType:\\n            The name of of the vertex type.\\n        force:\\n            If `True`, forces the retrieval the schema metadata again, otherwise returns a\\n            cached copy of vertex type 

In [17]:
resp = conn.ai.answerQuestion(query,
                        method="vdb",
                        method_parameters={"index": "DocumentChunk",
                                           "top_k": 5,
                                           "withHyDE": False})

In [18]:
print(resp["response"])

To get a count of vertices in Python, you can use the following code:

```python
# Assuming you have a list of vertices
vertices = [{'@@final_retrieval': {'https://docs.tigergraph.com/pytigergraph/current/core-functions/schema_chunk_2': 'If the value is\\nFalse\\n, the request will always insert new\\nedges and create the necessary vertices with default values for their attributes.\\nNote that this parameter does not affect vertices.\\nupdateVertexOnly\\n: If\\nTrue\\n, the request will only update existing vertices and not insert new\\nvertices.\\nReturns:\\nThe result of upsert (number of vertices and edges accepted/upserted).\\nEndpoint:\\nPOST /graph/{graph_name}\\nSee\\nUpsert data to graph\\ngetEndpoints()\\ngetEndpoints(builtin: bool = False, dynamic: bool = False, static: bool = False) \\u2192 dict\\nLists the REST++ endpoints and their parameters.\\nParameters:\\nbuiltin\\n: List the TigerGraph-provided REST++ endpoints.\\ndynamic\\n: List endpoints for user-installed queries.

In [19]:
print(resp["retrieved"])

[{'@@final_retrieval': {'https://docs.tigergraph.com/pytigergraph/current/core-functions/schema_chunk_2': 'If the value is\\nFalse\\n, the request will always insert new\\nedges and create the necessary vertices with default values for their attributes.\\nNote that this parameter does not affect vertices.\\nupdateVertexOnly\\n: If\\nTrue\\n, the request will only update existing vertices and not insert new\\nvertices.\\nReturns:\\nThe result of upsert (number of vertices and edges accepted/upserted).\\nEndpoint:\\nPOST /graph/{graph_name}\\nSee\\nUpsert data to graph\\ngetEndpoints()\\ngetEndpoints(builtin: bool = False, dynamic: bool = False, static: bool = False) \\u2192 dict\\nLists the REST++ endpoints and their parameters.\\nParameters:\\nbuiltin\\n: List the TigerGraph-provided REST++ endpoints.\\ndynamic\\n: List endpoints for user-installed queries.\\nstatic\\n: List static endpoints.\\nIf none of the above arguments are specified, all endpoints are listed.\\nEndpoint:\\nGET /e

In [24]:
resp = conn.ai.answerQuestion(query,
                        method="sibling",
                        method_parameters={"index": "DocumentChunk",
                                           "top_k": 5,
                                           "lookahead": 3,
                                           "lookback": 3,
                                           "withHyDE": False})

In [25]:
print(resp["response"])

To get a count of vertices in Python, you can use the following code:

```python
data = [{'@@sibling_set': {'https://docs.tigergraph.com/pytigergraph/current/gds/factory-functions_chunk_49': {'https://docs.tigergraph.com/pytigergraph/current/gds/factory-functions_chunk_50': {'distance': '1', 'content': 'If there is\\nonly one batch of data to load, it will give you the batch directly instead\\nof an iterator, which might make more sense in that case.'}, 'https://docs.tigergraph.com/pytigergraph/current/gds/factory-functions_chunk_52': {'distance': '3', 'content': 'Defaults to None.\\nbatch_size (int, optional)\\n: Number of vertices in each batch.\\nDefaults to None.\\nnum_batches (int, optional)\\n: Number of batches to split the vertices.\\nDefaults to 1.\\nshuffle (bool, optional)\\n: Whether to shuffle the vertices before loading data.\\nDefaults to False.\\nfilter_by (str, optional)\\n: A boolean attribute used to indicate which vertices\\ncan be included.'}, 'https://docs.tigergr

In [26]:
print(resp["retrieved"])

[{'@@sibling_set': {'https://docs.tigergraph.com/pytigergraph/current/gds/factory-functions_chunk_49': {'https://docs.tigergraph.com/pytigergraph/current/gds/factory-functions_chunk_50': {'distance': '1', 'content': 'If there is\\nonly one batch of data to load, it will give you the batch directly instead\\nof an iterator, which might make more sense in that case.'}, 'https://docs.tigergraph.com/pytigergraph/current/gds/factory-functions_chunk_52': {'distance': '3', 'content': 'Defaults to None.\\nbatch_size (int, optional)\\n: Number of vertices in each batch.\\nDefaults to None.\\nnum_batches (int, optional)\\n: Number of batches to split the vertices.\\nDefaults to 1.\\nshuffle (bool, optional)\\n: Whether to shuffle the vertices before loading data.\\nDefaults to False.\\nfilter_by (str, optional)\\n: A boolean attribute used to indicate which vertices\\ncan be included.'}, 'https://docs.tigergraph.com/pytigergraph/current/gds/factory-functions_chunk_47': {'distance': '-2', 'conten