In [20]:
from pyArango.connection import Connection
import os
from dotenv import load_dotenv
import logging

class SpokeWrapper:
    def __init__(self):
        load_dotenv(os.path.join(os.getcwd(), '..', '.env'))
        
        self.host = os.getenv('ARANGO_HOST')
        self.db_name = os.getenv('ARANGO_DB')
        self.username = os.getenv('ARANGO_USERNAME')
        self.password = os.getenv('ARANGO_PASSWORD')
        
        try:
            self.conn = Connection(arangoURL=self.host, username=self.username, password=self.password)
            self.db = self.conn[self.db_name]
            logging.info(f"Connected to database: {self.db_name}")
        except Exception as e:
            logging.error(f"Failed to connect to database: {e}")
            raise

    def list_collections(self):
        return list(self.db.collections.keys())

    def execute_aql(self, query, bind_vars=None):
        try:
            return list(self.db.AQLQuery(query, bindVars=bind_vars, rawResults=True))
        except Exception as e:
            logging.error(f"AQL Query Error: {e}")
            return []

    def get_entity(self, collection, key):
        try:
            return self.db[collection][key]
        except Exception as e:
            logging.error(f"Failed to retrieve entity: {e}")
            return None

    def get_connected_entities(self, start_id, edge_label=None):
        query = """
        FOR v, e IN 1..1 OUTBOUND @start_id @@edge_collection
        FILTER e.label == @edge_label
        RETURN {entity: v, edge: e}
        """
        bind_vars = {
            'start_id': start_id,
            '@edge_collection': 'Edges',
            'edge_label': edge_label
        }
        return self.execute_aql(query, bind_vars=bind_vars)

def test_spoke_wrapper():
    spoke = SpokeWrapper()
    
    logging.info("Listing all collections:")
    logging.info(spoke.list_collections())
    
    logging.info("\nFetching a single entity:")
    entity = spoke.get_entity('Nodes', '100005')  # Adjust the key if needed
    logging.info(entity)
    
    if entity:
        logging.info("\nFetching connected entities:")
        connected = spoke.get_connected_entities(entity._id, "PARTICIPATES_GpPW")
        logging.info(f"Number of connected entities: {len(connected)}")
        for item in connected[:5]:  # Print the first 5 connected entities
            logging.info(f"Entity: {item['entity']}")
            logging.info(f"Edge: {item['edge']}")
            logging.info("---")

if __name__ == "__main__":
    logging.basicConfig(level=logging.INFO)
    test_spoke_wrapper()

INFO:root:Connected to database: spoke23_human
INFO:root:Listing all collections:
INFO:root:['_analyzers', '_jobs', 'Nodes', '_apps', '_aqlfunctions', 'Edges', '_appbundles', '_graphs', '_queues', '_frontend']
INFO:root:
Fetching a single entity:
INFO:root:ArangoDoc '_id: Nodes/100005, _key: 100005, _rev: _iYaMlBe---': <store: {'type': 'node', 'id': '100005', 'labels': ['Gene'], 'properties': <store: {'license': 'CC0 1.0', 'identifier': 7054, 'ensembl': 'ENSG00000180176', 'Mate_Version': 'V1', 'chromosome': '11', 'name': 'TH', 'description': 'tyrosine hydroxylase', 'source': 'Entrez Gene', 'chembl_id': 'CHEMBL1969'}>}>
INFO:root:
Fetching connected entities:
INFO:root:Number of connected entities: 48
INFO:root:Entity: {'_key': '35575', '_id': 'Nodes/35575', '_rev': '_iYaKmnC---', 'type': 'node', 'id': '35575', 'labels': ['Pathway'], 'properties': {'identifier': 'WP4220_r120526', 'license': 'CC BY 4.0', 'database': 'WikiPathways', 'Mate_Version': 'V1', 'name': 'Neurotransmitter disorder

In [21]:
import os
from dotenv import load_dotenv
from arango import ArangoClient

# Load environment variables
load_dotenv()

# Connection details from .env
base_url = os.getenv('SPOKE_HOST')
username = os.getenv('SPOKE_USERNAME')
password = os.getenv('SPOKE_PASSWORD')
main_db = os.getenv('SPOKE_DB')

# Establish a connection
client = ArangoClient(hosts=base_url)
db = client.db(main_db, username=username, password=password)

def execute_aql(aql_query, bind_vars=None):
    cursor = db.aql.execute(aql_query, bind_vars=bind_vars)
    return [doc for doc in cursor]

# Get available collections
collections = db.collections()
print("Available Collections:")
for collection in collections:
    print(collection['name'])

# Assuming 'Entity' is the collection name for nodes
# Get entity types
aql_query = """
FOR doc IN Entity
    COLLECT entityType = doc.entityType WITH COUNT INTO count
    RETURN {entityType: entityType, count: count}
"""
entity_types = execute_aql(aql_query)
print("\nEntity Types:")
for et in entity_types:
    print(f"{et['entityType']}: {et['count']}")

# Get data elements
aql_query = """
FOR doc IN Entity
    FILTER doc.entityType == 'DATA_ELEMENT'
    LIMIT 5
    RETURN doc
"""
data_elements = execute_aql(aql_query)
print("\nData Elements (first 5):")
for element in data_elements:
    print(element)

# Get connections for a specific entity
entity_uuid = '5afd2304-f739-4f06-b1f8-61ddda331ff6'  # UUID of MEDICATION_DOSAGES
aql_query = """
FOR edge IN contains
    FILTER edge._from == @entity_id
    RETURN edge
"""
connections = execute_aql(aql_query, bind_vars={'entity_id': f'Entity/{entity_uuid}'})
print("\nConnections for MEDICATION_DOSAGES:")
for connection in connections:
    print(connection)

# Traverse the graph
aql_query = """
FOR v, e, p IN 1..2 OUTBOUND @start_vertex contains
    RETURN {
        vertex: v,
        edge: e,
        path: p
    }
"""
traversal_result = execute_aql(aql_query, bind_vars={'start_vertex': f'Entity/{entity_uuid}'})
print("\nGraph Traversal Result:")
print(traversal_result)

Available Collections:
INCREASEDIN_PiD
INCLUDES_PCiC
ISA_PWiPW
INCLUDES_OiPW
INCLUDES_OiEC
ISIN_ACTiiA
ENCODES_GeP
INCLUDES_PWiEC
DOWNREGULATES_OGdG
DOWNREGULATES_KGdG
TREATS_CtD
DECREASEDIN_PdD
DOWNREGULATES_GPdG
AFFECTS_CamG
LOCALIZES_DlA
CAUSES_OcD
CONSUMES_RcC
CATALYZES_ECcR
ASSOCIATES_DaG
CONTAINS_FcN
CONTAINS_PWcPW
EC
SARSCov2
PRODUCES_RpC
contains
Compound
EXPRESSES_ACTeG
MolecularFunction
_appbundles
PARTICIPATES_GpBP
ISA_DiD
Pathway
_apps
CellType
_jobs
Organism
MEMBEROF_PDmPF
_queues
Disease
ENCODES_OeP
Nutrient
_analyzers
RESEMBLES_DrD
PARTOF_PDpP
DOWNREGULATES_AdG
PharmacologicClass
UPREGULATES_KGuG
Anatomy
AnatomyCellType
ProteinFamily
BINDS_CbP
Symptom
Reaction
_graphs
PARTICIPATES_GpMF
BiologicalProcess
SideEffect
ISA_AiA
Protein
INTERACTS_PiP
Food
ISA_PiEC
CellularComponent
Gene
ProteinDomain
CONTAINS_DcD
UPREGULATES_OGuG
UPREGULATES_GPuG
Entity
UPREGULATES_CuG
PRESENTS_DpS
PARTICIPATES_GpCC
ISIN_ACTiiCT
UPREGULATES_AuG
INTERACTS_CPiP
_aqlfunctions
CAUSES_CcSE
PARTOF_Ap

In [22]:
edge_collections = ['INCLUDES_PCiC', 'ASSOCIATES_DaG', 'CONTAINS_FcC']  # Add more as needed

for edge_collection in edge_collections:
    aql_query = f"""
    FOR edge IN {edge_collection}
        FILTER edge._from == @entity_id OR edge._to == @entity_id
        RETURN edge
    """
    connections = execute_aql(aql_query, bind_vars={'entity_id': f'Entity/{entity_uuid}'})
    print(f"\nConnections for MEDICATION_DOSAGES in {edge_collection}:")
    for connection in connections:
        print(connection)


aql_query = """
FOR doc IN Entity
    FILTER doc.entityType == 'DATA_TYPE'
    LIMIT 10
    RETURN doc
"""
data_types = execute_aql(aql_query)
print("\nSample DATA_TYPE entities:")
for dt in data_types:
    print(dt)


aql_query = """
FOR elem IN Entity
    FILTER elem.entityType == 'DATA_ELEMENT'
    FOR edge IN contains
        FILTER edge._from == elem._id
        FOR dtype IN Entity
            FILTER dtype._id == edge._to AND dtype.entityType == 'DATA_TYPE'
            RETURN {dataElement: elem.name, dataType: dtype.name, relationship: edge}
"""
element_type_relations = execute_aql(aql_query)
print("\nRelationships between DATA_ELEMENT and DATA_TYPE:")
for relation in element_type_relations:
    print(relation)


aql_query = """
FOR doc IN Entity
    FILTER doc.entityType == 'DATA_SET'
    RETURN doc
"""
data_set = execute_aql(aql_query)
print("\nDATA_SET entity:")
print(data_set)


Connections for MEDICATION_DOSAGES in INCLUDES_PCiC:

Connections for MEDICATION_DOSAGES in ASSOCIATES_DaG:

Connections for MEDICATION_DOSAGES in CONTAINS_FcC:

Sample DATA_TYPE entities:
{'_key': 'DATA_TYPE.DATA_TYPE_BLOOD_PRESSURE', '_id': 'Entity/DATA_TYPE.DATA_TYPE_BLOOD_PRESSURE', '_rev': '_emidfX----', 'uuid': 'ec344e6d-4222-4cc3-b102-1d657bd96e97', 'name': 'Blood Pressure', 'enum': 'DATA_TYPE_BLOOD_PRESSURE', 'basicEnum': 'BLOOD_PRESSURE', 'system': True, 'creator': '29abf4b0-8d11-4eed-81d1-9ec501cc2ee7', 'creatorType': 'USER', 'entityType': 'DATA_TYPE', 'candidateId': '4689f863-3977-4f4f-bcca-965d9136cbcb', 'verified': True}
{'_key': 'DATA_TYPE.DATA_TYPE_MEDICATION_DOSAGES', '_id': 'Entity/DATA_TYPE.DATA_TYPE_MEDICATION_DOSAGES', '_rev': '_emidfXa---', 'uuid': '09eeff38-6550-4bab-97c1-47354f1ed83c', 'name': 'Medication Dosages', 'enum': 'DATA_TYPE_MEDICATION_DOSAGES', 'basicEnum': 'MEDICATION_DOSAGES', 'system': True, 'creator': '29abf4b0-8d11-4eed-81d1-9ec501cc2ee7', 'creato

In [23]:
import pprint
from knowledge_engine_sdk import *

# Initialize the SDK
sdk = KnowledgeEngine()

# Pretty printer for structured output
pp = pprint.PrettyPrinter(indent=1)

# 1. Get list of Entity Types
try:
    entity_types = sdk.get_entity_types()
    print("Successfully connected to SPOKE database")
    pp.pprint(entity_types)
except Exception as e:
    print(f"Failed to connect to SPOKE database: {e}")

# 2. Get list of entities for a specific entity type (e.g., DATA_ELEMENT)
query_params = GetEntityListParams(entity_type_enum="DATA_ELEMENT")

try:
    data_elements = sdk.get_entities(params=query_params)
    pp.pprint(data_elements)
except Exception as e:
    print(f"Error retrieving data elements: {e}")

# 3. Get a particular entity by uuid, enum, and entity_type_enum
if data_elements:
    entity_uuid = data_elements[0]['uuid']  # Example: fetching the uuid of the first entity
    entity_enum = data_elements[0]['enum']  # Fetching the enum of the first entity
    entity_params = GetEntityParams(uuid=entity_uuid, enum=entity_enum, entity_type_enum="DATA_ELEMENT")

    try:
        entity = sdk.get_entity(params=entity_params)
        pp.pprint(entity)
    except Exception as e:
        print(f"Error retrieving entity: {e}")

    # 4. Traverse a graph starting from this entity
    try:
        traversal_params = TraverseParams(
            start_entity_uuid=entity['uuid'],
            direction="out",
            max_depth=1
        )
        traversal = sdk.traverse_graph(params=traversal_params)
        pp.pprint(traversal)
    except Exception as e:
        print(f"Error traversing the graph: {e}")

# 5. Get list of connection types
try:
    connection_types = sdk.get_connection_types()
    pp.pprint(connection_types)
except Exception as e:
    print(f"Error retrieving connection types: {e}")

# 6. Get list of connections for the entity
if entity:
    connection_params = GetConnectionListParams(
        connection_type_name="contains",
        entity_uuid=entity["uuid"],
        direction="out"
    )

    try:
        connections = sdk.get_connections(params=connection_params)
        pp.pprint(connections)
    except Exception as e:
        print(f"Error retrieving connections: {e}")


Successfully connected to SPOKE database
['DATA_SET', 'DATA_TYPE', 'DATA_ELEMENT', 'PH_ID']
[{'_id': 'Entity/DATA_ELEMENT.DATA_ELEMENT_METABOLOMICS_CORRECTED',
  '_key': 'DATA_ELEMENT.DATA_ELEMENT_METABOLOMICS_CORRECTED',
  '_rev': '_emidfZW---',
  'basicEnum': 'METABOLOMICS_CORRECTED',
  'candidateId': '576d7aba-6cf0-4f27-8a19-aa01b38a5ef2',
  'creator': '29abf4b0-8d11-4eed-81d1-9ec501cc2ee7',
  'creatorType': 'USER',
  'entityType': 'DATA_ELEMENT',
  'enum': 'DATA_ELEMENT_METABOLOMICS_CORRECTED',
  'name': 'metabolomics_corrected.csv',
  'system': True,
  'uuid': '53812404-886e-4cd2-8e14-395a0fe8fb6c',
  'verified': False},
 {'_id': 'Entity/DATA_ELEMENT.DATA_ELEMENT_SALIVA',
  '_key': 'DATA_ELEMENT.DATA_ELEMENT_SALIVA',
  '_rev': '_emigPha---',
  'basicEnum': 'SALIVA',
  'candidateId': '576d7aba-6cf0-4f27-8a19-aa01b38a5ef2',
  'creator': '29abf4b0-8d11-4eed-81d1-9ec501cc2ee7',
  'creatorType': 'USER',
  'entityType': 'DATA_ELEMENT',
  'enum': 'DATA_ELEMENT_SALIVA',
  'name': 'saliva.

In [24]:
import knowledge_engine_sdk
print(dir(knowledge_engine_sdk))
print(knowledge_engine_sdk.__doc__)
print(dir(KnowledgeEngine))
help(knowledge_engine_sdk)
print(knowledge_engine_sdk.__file__)

from knowledge_engine_sdk import KnowledgeEngine
help(KnowledgeEngine)

import inspect
from knowledge_engine_sdk import KnowledgeEngine

sdk = KnowledgeEngine()
print(inspect.signature(sdk.get_entities))

import inspect
from knowledge_engine_sdk import KnowledgeEngine

print(inspect.getsource(KnowledgeEngine))


['Any', 'BaseModel', 'Connection', 'ConnectionDirection', 'ConnectionTypes', 'CreateConnection', 'CreateEntity', 'DeleteConnection', 'DeleteEntity', 'Entity', 'EntityTypes', 'Enum', 'GetConnectionListParams', 'GetConnectionParams', 'GetEntityListParams', 'GetEntityParams', 'GetEntityTypeParams', 'Knowledge', 'KnowledgeEngine', 'List', 'Optional', 'OrderBy', 'Pagination', 'TraverseAlgorithm', 'TraverseParams', 'Union', 'ValidationError', '__builtins__', '__cached__', '__doc__', '__file__', '__loader__', '__name__', '__package__', '__path__', '__spec__', 'datetime', 'errors', 'knowledge_engine_sdk', 'models', 'utils']
None
['__class__', '__delattr__', '__dict__', '__dir__', '__doc__', '__eq__', '__format__', '__ge__', '__getattribute__', '__gt__', '__hash__', '__init__', '__init_subclass__', '__le__', '__lt__', '__module__', '__ne__', '__new__', '__reduce__', '__reduce_ex__', '__repr__', '__setattr__', '__sizeof__', '__str__', '__subclasshook__', '__weakref__', '_init_db', 'create_candid

In [16]:
from knowledge_engine_sdk import *
import pprint
import pandas as pd

pp = pprint.PrettyPrinter(indent=1)

sdk = KnowledgeEngine()

# Get list of entities
query_params = GetEntityListParams(
    entity_type_enum = EntityTypes.DATA_ELEMENT
)

data_elements = sdk.get_entities(params=query_params)

query_params = GetEntityListParams(
    entity_type_enum = EntityTypes.DATA_SET
)

data_sets = sdk.get_entities(params=query_params)
pp.pprint(data_sets)

# Get a particular entity
medication_dosages_entity = next((entity for entity in data_elements if entity['enum'] == 'DATA_ELEMENT_MEDICATION_DOSAGES'), None)

if medication_dosages_entity:
    query_params = GetEntityParams(
        uuid = medication_dosages_entity['uuid'],
        enum = medication_dosages_entity['basicEnum'],
        entity_type_enum = EntityTypes.DATA_ELEMENT
    )
else:
    query_params = GetEntityParams(
        uuid = None,
        enum = "MEDICATION_DOSAGES",
        entity_type_enum = EntityTypes.DATA_ELEMENT
    )

entity = sdk.get_entity(params=query_params)
pp.pprint(entity)

pd.DataFrame.from_dict(entity, orient='index')

[{'_id': 'Entity/DATA_SET.DATA_SET_SCIENTIFIC_WELLNESS_COHORT',
  '_key': 'DATA_SET.DATA_SET_SCIENTIFIC_WELLNESS_COHORT',
  '_rev': '_emidfWW---',
  'basicEnum': 'SCIENTIFIC_WELLNESS_COHORT',
  'candidateId': '4689f863-3977-4f4f-bcca-965d9136cbcb',
  'creator': '29abf4b0-8d11-4eed-81d1-9ec501cc2ee7',
  'creatorType': 'USER',
  'entityType': 'DATA_SET',
  'enum': 'DATA_SET_SCIENTIFIC_WELLNESS_COHORT',
  'name': 'Scientific Wellness Cohort',
  'system': True,
  'uuid': '5308b32a-4e51-489f-8adb-7c842152d161',
  'verified': True}]
{'_id': 'Entity/DATA_ELEMENT.DATA_ELEMENT_MEDICATION_DOSAGES',
 '_key': 'DATA_ELEMENT.DATA_ELEMENT_MEDICATION_DOSAGES',
 '_rev': '_emigRCG---',
 'basicEnum': 'MEDICATION_DOSAGES',
 'candidateId': '576d7aba-6cf0-4f27-8a19-aa01b38a5ef2',
 'creator': '29abf4b0-8d11-4eed-81d1-9ec501cc2ee7',
 'creatorType': 'USER',
 'entityType': 'DATA_ELEMENT',
 'enum': 'DATA_ELEMENT_MEDICATION_DOSAGES',
 'name': 'medication_dosages.csv',
 'system': True,
 'uuid': '5afd2304-f739-4f06

Unnamed: 0,0
_key,DATA_ELEMENT.DATA_ELEMENT_MEDICATION_DOSAGES
_id,Entity/DATA_ELEMENT.DATA_ELEMENT_MEDICATION_DO...
_rev,_emigRCG---
uuid,5afd2304-f739-4f06-b1f8-61ddda331ff6
name,medication_dosages.csv
enum,DATA_ELEMENT_MEDICATION_DOSAGES
basicEnum,MEDICATION_DOSAGES
system,True
creator,29abf4b0-8d11-4eed-81d1-9ec501cc2ee7
creatorType,USER


In [21]:
# Connections

# Get list of connection types
connection_types = sdk.get_connection_types()
# pp.pprint(connection_types)

# Get list of connections for an entity
query_params = GetConnectionListParams(
    connection_type_name = "contains",
    entity_uuid = entity["uuid"],
    direction = "out"
)

connections = sdk.get_connections(params=query_params)
# pp.pprint(connections)

# Get a particular connection between 2 entities
query_params = GetConnectionParams(
    from_entity_uuid = data_sets[0]["uuid"],
    to_entity_uuid = data_elements[0]["uuid"]
)

connection = sdk.get_connection(params=query_params)
pp.pprint(connection)


pd.DataFrame.from_dict(connections).transpose()

from knowledge_engine_sdk import TraverseAlgorithm

# Graphs

# Traverse a graph
query_params = TraverseParams(
    start_entity_uuid = entity["uuid"],
    direction = "out",
    max_depth = 1,
    min_depth = 0,
    algorithm = TraverseAlgorithm.BREADTH  # Changed from BREADTH_FIRST to BREADTH
)

traversal = sdk.traverse_graph(params=query_params)
pp.pprint(traversal)

[{'_from': 'Entity/DATA_SET.DATA_SET_SCIENTIFIC_WELLNESS_COHORT',
  '_id': 'contains/DATA_SET.DATA_SET_SCIENTIFIC_WELLNESS_COHORT.DATA_ELEMENT.DATA_ELEMENT_METABOLOMICS_CORRECTED',
  '_key': 'DATA_SET.DATA_SET_SCIENTIFIC_WELLNESS_COHORT.DATA_ELEMENT.DATA_ELEMENT_METABOLOMICS_CORRECTED',
  '_rev': '_emidfaC---',
  '_to': 'Entity/DATA_ELEMENT.DATA_ELEMENT_METABOLOMICS_CORRECTED',
  'candidateId': '576d7aba-6cf0-4f27-8a19-aa01b38a5ef2',
  'fromUuid': '5308b32a-4e51-489f-8adb-7c842152d161',
  'toUuid': '53812404-886e-4cd2-8e14-395a0fe8fb6c',
  'verified': False}]
{'paths': [{'edges': [],
            'vertices': [{'_id': 'Entity/DATA_ELEMENT.DATA_ELEMENT_MEDICATION_DOSAGES',
                          '_key': 'DATA_ELEMENT.DATA_ELEMENT_MEDICATION_DOSAGES',
                          '_rev': '_emigRCG---',
                          'basicEnum': 'MEDICATION_DOSAGES',
                          'candidateId': '576d7aba-6cf0-4f27-8a19-aa01b38a5ef2',
                          'creator': '29abf4b0-8

In [25]:
from knowledge_engine_sdk import *
import pprint
import pandas as pd

class KnowledgeEngineWrapper:
    def __init__(self):
        self.sdk = KnowledgeEngine()

    def get_entity_types(self):
        return self.sdk.get_entity_types()

    def get_entities(self, entity_type_enum):
        query_params = GetEntityListParams(entity_type_enum=entity_type_enum)
        return self.sdk.get_entities(params=query_params)

    def get_entity(self, enum, entity_type_enum):
        # First, we need to find the entity's UUID
        entities = self.get_entities(entity_type_enum)
        entity = next((e for e in entities if e['enum'] == enum), None)
        if not entity:
            raise ValueError(f"No entity found with enum '{enum}' and type '{entity_type_enum}'")
        
        query_params = GetEntityParams(
            uuid=entity['uuid'],
            enum=enum,
            entity_type_enum=entity_type_enum
        )
        return self.sdk.get_entity(params=query_params)

# Usage
ke_wrapper = KnowledgeEngineWrapper()
pp = pprint.PrettyPrinter(indent=1)

# Get list of Entity Types
entity_types = ke_wrapper.get_entity_types()
print("Entity Types:")
pp.pprint(entity_types)

# Get list of entities
data_elements = ke_wrapper.get_entities(EntityTypes.DATA_ELEMENT)
print("\nData Elements (first 2):")
pp.pprint(data_elements[:2])

data_sets = ke_wrapper.get_entities(EntityTypes.DATA_SET)
print("\nData Sets (first 2):")
pp.pprint(data_sets[:2])

# Get a particular entity
try:
    entity = ke_wrapper.get_entity("DATA_ELEMENT_MEDICATION_DOSAGES", EntityTypes.DATA_ELEMENT)
    print("\nSpecific Entity:")
    pp.pprint(entity)

    # Convert to DataFrame
    df = pd.DataFrame.from_dict(entity, orient='index')
    print("\nEntity as DataFrame:")
    print(df)
except ValueError as e:
    print(f"Error: {e}")

Entity Types:
['DATA_SET', 'DATA_TYPE', 'DATA_ELEMENT', 'PH_ID']

Data Elements (first 2):
[{'_id': 'Entity/DATA_ELEMENT.DATA_ELEMENT_METABOLOMICS_CORRECTED',
  '_key': 'DATA_ELEMENT.DATA_ELEMENT_METABOLOMICS_CORRECTED',
  '_rev': '_emidfZW---',
  'basicEnum': 'METABOLOMICS_CORRECTED',
  'candidateId': '576d7aba-6cf0-4f27-8a19-aa01b38a5ef2',
  'creator': '29abf4b0-8d11-4eed-81d1-9ec501cc2ee7',
  'creatorType': 'USER',
  'entityType': 'DATA_ELEMENT',
  'enum': 'DATA_ELEMENT_METABOLOMICS_CORRECTED',
  'name': 'metabolomics_corrected.csv',
  'system': True,
  'uuid': '53812404-886e-4cd2-8e14-395a0fe8fb6c',
  'verified': False},
 {'_id': 'Entity/DATA_ELEMENT.DATA_ELEMENT_SALIVA',
  '_key': 'DATA_ELEMENT.DATA_ELEMENT_SALIVA',
  '_rev': '_emigPha---',
  'basicEnum': 'SALIVA',
  'candidateId': '576d7aba-6cf0-4f27-8a19-aa01b38a5ef2',
  'creator': '29abf4b0-8d11-4eed-81d1-9ec501cc2ee7',
  'creatorType': 'USER',
  'entityType': 'DATA_ELEMENT',
  'enum': 'DATA_ELEMENT_SALIVA',
  'name': 'saliva.c

In [26]:
# Connections

# Get list of connection types
connection_types = sdk.get_connection_types()
# pp.pprint(connection_types)

# Get list of connections for an entity
query_params = GetConnectionListParams(
    connection_type_name = "contains",
    entity_uuid = entity["uuid"],
    direction = "out"
)

connections = sdk.get_connections(params=query_params)
# pp.pprint(connections)

# Get a particular connection between 2 entities
query_params = GetConnectionParams(
    from_entity_uuid = data_sets[0]["uuid"],
    to_entity_uuid = data_elements[0]["uuid"]
)

connection = sdk.get_connection(params=query_params)
pp.pprint(connection)


pd.DataFrame.from_dict(connections).transpose()

[{'_from': 'Entity/DATA_SET.DATA_SET_SCIENTIFIC_WELLNESS_COHORT',
  '_id': 'contains/DATA_SET.DATA_SET_SCIENTIFIC_WELLNESS_COHORT.DATA_ELEMENT.DATA_ELEMENT_METABOLOMICS_CORRECTED',
  '_key': 'DATA_SET.DATA_SET_SCIENTIFIC_WELLNESS_COHORT.DATA_ELEMENT.DATA_ELEMENT_METABOLOMICS_CORRECTED',
  '_rev': '_emidfaC---',
  '_to': 'Entity/DATA_ELEMENT.DATA_ELEMENT_METABOLOMICS_CORRECTED',
  'candidateId': '576d7aba-6cf0-4f27-8a19-aa01b38a5ef2',
  'fromUuid': '5308b32a-4e51-489f-8adb-7c842152d161',
  'toUuid': '53812404-886e-4cd2-8e14-395a0fe8fb6c',
  'verified': False}]


Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,14,15,16,17,18,19,20,21,22,23
_key,DATA_ELEMENT.DATA_ELEMENT_MEDICATION_DOSAGES.D...,DATA_ELEMENT.DATA_ELEMENT_MEDICATION_DOSAGES.D...,DATA_ELEMENT.DATA_ELEMENT_MEDICATION_DOSAGES.D...,DATA_ELEMENT.DATA_ELEMENT_MEDICATION_DOSAGES.D...,DATA_ELEMENT.DATA_ELEMENT_MEDICATION_DOSAGES.P...,DATA_ELEMENT.DATA_ELEMENT_MEDICATION_DOSAGES.P...,DATA_ELEMENT.DATA_ELEMENT_MEDICATION_DOSAGES.P...,DATA_ELEMENT.DATA_ELEMENT_MEDICATION_DOSAGES.P...,DATA_ELEMENT.DATA_ELEMENT_MEDICATION_DOSAGES.P...,DATA_ELEMENT.DATA_ELEMENT_MEDICATION_DOSAGES.P...,...,DATA_ELEMENT.DATA_ELEMENT_MEDICATION_DOSAGES.P...,DATA_ELEMENT.DATA_ELEMENT_MEDICATION_DOSAGES.P...,DATA_ELEMENT.DATA_ELEMENT_MEDICATION_DOSAGES.P...,DATA_ELEMENT.DATA_ELEMENT_MEDICATION_DOSAGES.P...,DATA_ELEMENT.DATA_ELEMENT_MEDICATION_DOSAGES.P...,DATA_ELEMENT.DATA_ELEMENT_MEDICATION_DOSAGES.P...,DATA_ELEMENT.DATA_ELEMENT_MEDICATION_DOSAGES.P...,DATA_ELEMENT.DATA_ELEMENT_MEDICATION_DOSAGES.P...,DATA_ELEMENT.DATA_ELEMENT_MEDICATION_DOSAGES.P...,DATA_ELEMENT.DATA_ELEMENT_MEDICATION_DOSAGES.P...
_id,contains/DATA_ELEMENT.DATA_ELEMENT_MEDICATION_...,contains/DATA_ELEMENT.DATA_ELEMENT_MEDICATION_...,contains/DATA_ELEMENT.DATA_ELEMENT_MEDICATION_...,contains/DATA_ELEMENT.DATA_ELEMENT_MEDICATION_...,contains/DATA_ELEMENT.DATA_ELEMENT_MEDICATION_...,contains/DATA_ELEMENT.DATA_ELEMENT_MEDICATION_...,contains/DATA_ELEMENT.DATA_ELEMENT_MEDICATION_...,contains/DATA_ELEMENT.DATA_ELEMENT_MEDICATION_...,contains/DATA_ELEMENT.DATA_ELEMENT_MEDICATION_...,contains/DATA_ELEMENT.DATA_ELEMENT_MEDICATION_...,...,contains/DATA_ELEMENT.DATA_ELEMENT_MEDICATION_...,contains/DATA_ELEMENT.DATA_ELEMENT_MEDICATION_...,contains/DATA_ELEMENT.DATA_ELEMENT_MEDICATION_...,contains/DATA_ELEMENT.DATA_ELEMENT_MEDICATION_...,contains/DATA_ELEMENT.DATA_ELEMENT_MEDICATION_...,contains/DATA_ELEMENT.DATA_ELEMENT_MEDICATION_...,contains/DATA_ELEMENT.DATA_ELEMENT_MEDICATION_...,contains/DATA_ELEMENT.DATA_ELEMENT_MEDICATION_...,contains/DATA_ELEMENT.DATA_ELEMENT_MEDICATION_...,contains/DATA_ELEMENT.DATA_ELEMENT_MEDICATION_...
_from,Entity/DATA_ELEMENT.DATA_ELEMENT_MEDICATION_DO...,Entity/DATA_ELEMENT.DATA_ELEMENT_MEDICATION_DO...,Entity/DATA_ELEMENT.DATA_ELEMENT_MEDICATION_DO...,Entity/DATA_ELEMENT.DATA_ELEMENT_MEDICATION_DO...,Entity/DATA_ELEMENT.DATA_ELEMENT_MEDICATION_DO...,Entity/DATA_ELEMENT.DATA_ELEMENT_MEDICATION_DO...,Entity/DATA_ELEMENT.DATA_ELEMENT_MEDICATION_DO...,Entity/DATA_ELEMENT.DATA_ELEMENT_MEDICATION_DO...,Entity/DATA_ELEMENT.DATA_ELEMENT_MEDICATION_DO...,Entity/DATA_ELEMENT.DATA_ELEMENT_MEDICATION_DO...,...,Entity/DATA_ELEMENT.DATA_ELEMENT_MEDICATION_DO...,Entity/DATA_ELEMENT.DATA_ELEMENT_MEDICATION_DO...,Entity/DATA_ELEMENT.DATA_ELEMENT_MEDICATION_DO...,Entity/DATA_ELEMENT.DATA_ELEMENT_MEDICATION_DO...,Entity/DATA_ELEMENT.DATA_ELEMENT_MEDICATION_DO...,Entity/DATA_ELEMENT.DATA_ELEMENT_MEDICATION_DO...,Entity/DATA_ELEMENT.DATA_ELEMENT_MEDICATION_DO...,Entity/DATA_ELEMENT.DATA_ELEMENT_MEDICATION_DO...,Entity/DATA_ELEMENT.DATA_ELEMENT_MEDICATION_DO...,Entity/DATA_ELEMENT.DATA_ELEMENT_MEDICATION_DO...
_to,Entity/DATA_TYPE.DATA_TYPE_MEDICATION_ID,Entity/DATA_TYPE.DATA_TYPE_NAME,Entity/DATA_TYPE.DATA_TYPE_AMOUNT,Entity/DATA_TYPE.DATA_TYPE_UNIT,Entity/p1c5b,Entity/p6da5,Entity/p841a,Entity/p883f,Entity/pb742,Entity/p8051,...,Entity/pff00,Entity/pd184,Entity/p99bc,Entity/p1e35,Entity/p344b,Entity/p8f11,Entity/p5191,Entity/p7d72,Entity/p3604,Entity/p9c05
_rev,_emigRDy---,_emigREq---,_emigRFi---,_emigRGa---,_emigRHG---,_emigRJO---,_emigRLa---,_emigRNu---,_emigRP6---,_emigRSG---,...,_emigRcm---,_emigRey---,_emigRg6---,_emigRjG---,_emigRlW---,_emigRne---,_emigRpm---,_emigRri---,_emigRtu---,_emigRvy---
fromUuid,5afd2304-f739-4f06-b1f8-61ddda331ff6,5afd2304-f739-4f06-b1f8-61ddda331ff6,5afd2304-f739-4f06-b1f8-61ddda331ff6,5afd2304-f739-4f06-b1f8-61ddda331ff6,5afd2304-f739-4f06-b1f8-61ddda331ff6,5afd2304-f739-4f06-b1f8-61ddda331ff6,5afd2304-f739-4f06-b1f8-61ddda331ff6,5afd2304-f739-4f06-b1f8-61ddda331ff6,5afd2304-f739-4f06-b1f8-61ddda331ff6,5afd2304-f739-4f06-b1f8-61ddda331ff6,...,5afd2304-f739-4f06-b1f8-61ddda331ff6,5afd2304-f739-4f06-b1f8-61ddda331ff6,5afd2304-f739-4f06-b1f8-61ddda331ff6,5afd2304-f739-4f06-b1f8-61ddda331ff6,5afd2304-f739-4f06-b1f8-61ddda331ff6,5afd2304-f739-4f06-b1f8-61ddda331ff6,5afd2304-f739-4f06-b1f8-61ddda331ff6,5afd2304-f739-4f06-b1f8-61ddda331ff6,5afd2304-f739-4f06-b1f8-61ddda331ff6,5afd2304-f739-4f06-b1f8-61ddda331ff6
toUuid,d7192137-a103-457e-9075-39770e53de33,f2b0f9a7-c0cc-4830-8fe9-20ec9adcf89f,d9c1ade2-7f06-4fb8-9841-9084710f6474,c5bf4059-1034-4af1-b0e2-6f55fbca8a70,39f8794a-2bf9-4581-aac3-232e0716a187,f90d3397-b1c0-4a3c-9834-595cc472a286,c87d5c44-c761-4cbc-ab22-120514aec384,674e9d1c-f541-40ee-9868-c7aad5e5ffbd,1620fa3a-579b-4f3b-a361-9cc412212fdc,d154153f-100f-4614-9953-389f5e40b5f4,...,38ea99d3-315c-4def-87be-608f1a7db4cf,c8874fb6-1479-4e5b-af24-002fc3616f15,37bf7edf-5c80-4ee0-b279-612885803a05,050b7187-1a6e-468c-b51a-2c1f4d5ae62f,6cf65c5b-60c4-4831-b005-1ae6a8bd5ebd,6c843458-75ec-431f-b05f-baace41b9b40,0db09a8b-41da-4c4d-ba83-2cf20d1fd0ec,4c6e80f3-3cde-4b4d-9898-42dc113978e6,1e8ac239-140a-45ed-925b-048b0840c22e,38a5a625-3a7d-4b32-be86-db4d45b6e56b
candidateId,576d7aba-6cf0-4f27-8a19-aa01b38a5ef2,576d7aba-6cf0-4f27-8a19-aa01b38a5ef2,576d7aba-6cf0-4f27-8a19-aa01b38a5ef2,576d7aba-6cf0-4f27-8a19-aa01b38a5ef2,576d7aba-6cf0-4f27-8a19-aa01b38a5ef2,576d7aba-6cf0-4f27-8a19-aa01b38a5ef2,576d7aba-6cf0-4f27-8a19-aa01b38a5ef2,576d7aba-6cf0-4f27-8a19-aa01b38a5ef2,576d7aba-6cf0-4f27-8a19-aa01b38a5ef2,576d7aba-6cf0-4f27-8a19-aa01b38a5ef2,...,576d7aba-6cf0-4f27-8a19-aa01b38a5ef2,576d7aba-6cf0-4f27-8a19-aa01b38a5ef2,576d7aba-6cf0-4f27-8a19-aa01b38a5ef2,576d7aba-6cf0-4f27-8a19-aa01b38a5ef2,576d7aba-6cf0-4f27-8a19-aa01b38a5ef2,576d7aba-6cf0-4f27-8a19-aa01b38a5ef2,576d7aba-6cf0-4f27-8a19-aa01b38a5ef2,576d7aba-6cf0-4f27-8a19-aa01b38a5ef2,576d7aba-6cf0-4f27-8a19-aa01b38a5ef2,576d7aba-6cf0-4f27-8a19-aa01b38a5ef2
verified,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False


In [29]:
from knowledge_engine_sdk.models import TraverseParams, ConnectionDirection, TraverseAlgorithm

# First, let's print out the available options for ConnectionDirection
print("Available ConnectionDirection options:")
for direction in ConnectionDirection:
    print(f"- {direction.name}: {direction.value}")

# Now, let's create the TraverseParams object
query_params = TraverseParams(
    start_entity_uuid=entity["uuid"],
    direction=ConnectionDirection.OUTBOUND,  # We're assuming OUTBOUND is the correct value
    max_depth=1,
    min_depth=0,
    algorithm=TraverseAlgorithm.DEPTH
)

traversal = sdk.traverse_graph(params=query_params)
pp.pprint(traversal)

Available ConnectionDirection options:
- INBOUND: in
- OUTBOUND: out
{'paths': [{'edges': [],
            'vertices': [{'_id': 'Entity/DATA_ELEMENT.DATA_ELEMENT_MEDICATION_DOSAGES',
                          '_key': 'DATA_ELEMENT.DATA_ELEMENT_MEDICATION_DOSAGES',
                          '_rev': '_emigRCG---',
                          'basicEnum': 'MEDICATION_DOSAGES',
                          'candidateId': '576d7aba-6cf0-4f27-8a19-aa01b38a5ef2',
                          'creator': '29abf4b0-8d11-4eed-81d1-9ec501cc2ee7',
                          'creatorType': 'USER',
                          'entityType': 'DATA_ELEMENT',
                          'enum': 'DATA_ELEMENT_MEDICATION_DOSAGES',
                          'name': 'medication_dosages.csv',
                          'system': True,
                          'uuid': '5afd2304-f739-4f06-b1f8-61ddda331ff6',
                          'verified': False}]},
           {'edges': [{'_from': 'Entity/DATA_ELEMENT.DATA_ELEMENT_MEDI

In [31]:
from knowledge_engine_sdk.models import GetEntityParams, EntityTypes, CreateEntity, CreateConnection, GetConnectionParams, DeleteConnection, DeleteEntity

# Get entity
query_params = GetEntityParams(
    enum="MET_O95544",
    entity_type_enum=EntityTypes.DATA_TYPE,
    uuid=None  # Add this line to provide a value for the required uuid field
)

entity = sdk.get_entity(params=query_params)
pp.pprint(entity)

# Create new entity
new_data = CreateEntity(
    name="NAD",
    isSystem=True,
    entityType=EntityTypes.DATA_TYPE,
    candidateId="184e1f33-4b1b-47b6-9f5d-f7f6ef9d1fb5",
    payload={
        "type": "protein",
        "class": "kinase",
        "ec_number": "2.7.1.23",
        "pathway": "O95544"
    }
)

new_entity = sdk.create_entity(data=new_data)
pp.pprint(new_entity)

# Create new connection
new_connection_data = CreateConnection(
    fromUuid=entity["uuid"],
    toUuid=new_entity["uuid"],
    connectionType="contains",
    candidateId="184e1f33-4b1b-47b6-9f5d-f7f6ef9d1fb5",
    payload={
        "my_data": "here is the data"
    }
)

sdk.create_connection(data=new_connection_data)

# Get new connection
get_connection_params = GetConnectionParams(
    from_entity_uuid=entity["uuid"],
    to_entity_uuid=new_entity["uuid"]
)
new_connection = sdk.get_connection(params=get_connection_params)
pp.pprint(new_connection)

# Delete connection (commented out)
# delete_connection_data = DeleteConnection(
#     fromUuid=entity["uuid"],
#     toUuid=new_entity["uuid"],
#     connectionType="contains"
# )
# sdk.delete_connection(data=delete_connection_data)

# Delete entity (commented out)
# delete_entity_data = DeleteEntity(uuid=new_entity["uuid"])
# sdk.delete_entity(data=delete_entity_data)

# Get the newly created entity
get_new_entity_params = GetEntityParams(
    enum="NAD",
    entity_type_enum=EntityTypes.DATA_TYPE,
    uuid=None  # Add this line to provide a value for the required uuid field
)

new_entity_retrieved = sdk.get_entity(params=get_new_entity_params)
pp.pprint(new_entity_retrieved)

{'_id': 'Entity/DATA_TYPE.DATA_TYPE_MET_O95544',
 '_key': 'DATA_TYPE.DATA_TYPE_MET_O95544',
 '_rev': '_emigb4i---',
 'basicEnum': 'MET_O95544',
 'candidateId': '576d7aba-6cf0-4f27-8a19-aa01b38a5ef2',
 'creator': '29abf4b0-8d11-4eed-81d1-9ec501cc2ee7',
 'creatorType': 'USER',
 'entityType': 'DATA_TYPE',
 'enum': 'DATA_TYPE_MET_O95544',
 'name': 'MET_O95544',
 'system': True,
 'uuid': '9377e839-e633-483c-a7e5-b34ae5131c52',
 'verified': False}
{'_id': 'Entity/2064000',
 '_key': '2064000',
 '_rev': '_erW8PC----',
 'basicEnum': 'NAD',
 'candidateId': '184e1f33-4b1b-47b6-9f5d-f7f6ef9d1fb5',
 'creator': 'SYSTEM',
 'creatorType': 'SYSTEM',
 'entityType': 'DATA_TYPE',
 'enum': 'DATA_TYPE_NAD',
 'name': 'NAD',
 'payload': {'class': 'kinase',
             'ec_number': '2.7.1.23',
             'pathway': 'O95544',
             'type': 'protein'},
 'system': True,
 'uuid': 'f23622ac-5a1c-4630-bacd-f5af9fe75df4',
 'verified': False}
[{'_from': 'Entity/DATA_TYPE.DATA_TYPE_MET_O95544',
  '_id': 'cont

In [33]:
from knowledge_engine_sdk.models import GetEntityParams, EntityTypes

query_params = GetEntityParams(
    enum="P841A",
    entity_type_enum=EntityTypes.PH_ID,
    uuid=None  # Add this line to provide a value for the required uuid field
)

phid1 = sdk.get_entity(params=query_params)

pp.pprint(phid1)

None


In [38]:
import inspect
import knowledge_engine_sdk
from enum import Enum

def print_class_info(cls):
    print(f"\n{'='*50}\n{cls.__name__}\n{'='*50}")
    print("Docstring:")
    print(inspect.getdoc(cls))
    print("\nSignature:")
    try:
        print(inspect.signature(cls))
    except ValueError:
        print("No signature available")
    print("\nMethods:")
    for name, method in inspect.getmembers(cls, inspect.isfunction):
        print(f"  {name}:")
        try:
            print(f"    Signature: {inspect.signature(method)}")
        except ValueError:
            print("    Signature: Not available")
        doc = inspect.getdoc(method)
        if doc:
            print(f"    Docstring: {doc[:100]}..." if len(doc) > 100 else f"    Docstring: {doc}")
    print("\nAttributes:")
    for name, value in inspect.getmembers(cls):
        if not name.startswith('__') and not inspect.ismethod(value):
            print(f"  {name}: {type(value)}")

def print_enum_info(enum):
    print(f"\n{'='*50}\n{enum.__name__}\n{'='*50}")
    print("Values:")
    for name, member in enum.__members__.items():
        print(f"  {name}: {member.value}")

def print_function_info(func):
    print(f"\n{'-'*50}\n{func.__name__}\n{'-'*50}")
    print("Signature:")
    try:
        print(inspect.signature(func))
    except ValueError:
        print("No signature available")
    print("\nDocstring:")
    print(inspect.getdoc(func))

def print_all_info(module):
    print(f"Module: {module.__name__}")
    
    print("\nAll attributes:")
    for name in dir(module):
        print(name)
    
    print("\nClasses:")
    classes = [obj for name, obj in inspect.getmembers(module) if inspect.isclass(obj)]
    for cls in classes:
        if issubclass(cls, Enum):
            print_enum_info(cls)
        else:
            print_class_info(cls)
    
    print("\nFunctions:")
    functions = [obj for name, obj in inspect.getmembers(module) if inspect.isfunction(obj)]
    for func in functions:
        print_function_info(func)
    
    print("\nVariables:")
    variables = [name for name in dir(module) if not (name.startswith('__') or callable(getattr(module, name)))]
    for var in variables:
        value = getattr(module, var)
        print(f"  {var}: {type(value)}")
        if isinstance(value, (int, float, str, bool)):
            print(f"    Value: {value}")

    # Try to print version info
    try:
        print(f"\nKnowledge Engine SDK Version: {module.__version__}")
    except AttributeError:
        print("\nKnowledge Engine SDK Version: Not available")

# Run the function to print all info
print_all_info(knowledge_engine_sdk)

# Additional information about KnowledgeEngine class
ke_class = getattr(knowledge_engine_sdk, 'KnowledgeEngine', None)
if ke_class:
    print("\nDetailed KnowledgeEngine Methods:")
    for name, method in inspect.getmembers(ke_class, inspect.isfunction):
        if not name.startswith('__'):
            print(f"\n{name}:")
            try:
                print(f"Signature: {inspect.signature(method)}")
            except ValueError:
                print("Signature: Not available")
            doc = inspect.getdoc(method)
            if doc:
                print(f"Docstring: {doc}")

Module: knowledge_engine_sdk

All attributes:
Any
BaseModel
Connection
ConnectionDirection
ConnectionTypes
CreateConnection
CreateEntity
DeleteConnection
DeleteEntity
Entity
EntityTypes
Enum
GetConnectionListParams
GetConnectionParams
GetEntityListParams
GetEntityParams
GetEntityTypeParams
Knowledge
KnowledgeEngine
List
Optional
OrderBy
Pagination
TraverseAlgorithm
TraverseParams
Union
ValidationError
__builtins__
__cached__
__doc__
__file__
__loader__
__name__
__package__
__path__
__spec__
datetime
errors
knowledge_engine_sdk
models
utils

Classes:

BaseModel
Docstring:
Usage docs: https://docs.pydantic.dev/2.8/concepts/models/

A base class for creating Pydantic models.

Attributes:
    __class_vars__: The names of classvars defined on the model.
    __private_attributes__: Metadata about the private attributes of the model.
    __signature__: The signature for instantiating the model.

    __pydantic_complete__: Whether model building is completed, or if there are still undefined fi