In [1]:
# saves you having to use print as all exposed variables are printed in the cell
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

In [2]:
import os
from nn_rag import Knowledge, Controller

### Vector Params
    MILVUS_EMBEDDING_NAME
    MILVUS_EMBEDDING_DEVICE
    MILVUS_EMBEDDING_BATCH_SIZE
    MILVUS_EMBEDDING_DIM
    MILVUS_RESPONSE_LIMIT
    MILVUS_INDEX_METRIC
    MILVUS_DOC_REF

### Set parameters as environment variables

In [3]:
# Data
os.environ['HADRON_KNOWLEDGE_SOURCE_URI'] = './hadron/source/euaiact.pdf'
os.environ['HADRON_KNOWLEDGE_EMBED_URI'] = 'milvus://localhost:19530/rai'
os.environ['HADRON_KNOWLEDGE_QUERY_URI'] = 'milvus://localhost:19530/rai'

# Vector Db
os.environ['MILVUS_DOC_REF'] = 'eu_ai_act_2024'
os.environ['MILVUS_EMBEDDING_DEVICE'] = 'cpu'
os.environ['MILVUS_RESPONSE_LIMIT'] = '4'

# Parameters
os.environ['HADRON_NUM_SENTENCE_CHUNK_SIZE'] = '5'

### Instantiate capability

In [4]:
kn = Knowledge.from_env('demo', has_contract=False)

In [5]:
kn.set_description('a reusable component to embed documentation to build an AI knowledge base for a RAG')

In [6]:
kn.set_source_uri("${HADRON_KNOWLEDGE_SOURCE_URI}")
kn.set_persist_uri('${HADRON_KNOWLEDGE_EMBED_URI}')
kn.add_connector_uri('query', '${HADRON_KNOWLEDGE_QUERY_URI}')

<nn_rag.components.knowledge.Knowledge at 0x7fa495fc8d30>

<nn_rag.components.knowledge.Knowledge at 0x7fa495fc8d30>

### Document

In [7]:
doc = kn.load_source_canonical()

ModuleNotFoundError: The module 'ds_core.handlers.knowledge_handlers' could not be found

In [None]:
# tidy the text
doc = kn.tools.pattern_replace(doc, 'text', '\n', ' ', intent_order=-1)
doc = kn.tools.pattern_replace(doc, 'text', '  ', ' ', intent_order=-1)

### Sentences

In [None]:
sentences = kn.tools.text_profiler(doc, intent_order=-1)

### Chunking

In [None]:
chunks = kn.tools.sentence_chunks(sentences, num_sentence_chunk_size='${HADRON_NUM_SENTENCE_CHUNK_SIZE}', intent_order=-1)

### Embedding

In [None]:
kn.

### Run capability

In [None]:
kn.run_component_pipeline()

In [None]:
kn.remove_canonical(kn.CONNECTOR_PERSIST)

### Controller

In [None]:
ctr = Controller.from_env(has_contract=False)
ctr.set_use_case(title='Rag Demo', domain='General', 
                 overview='A pipeline that allows the collection of documentation to embed for a RAG catalog.', 
                 situation='HUB requirement for better access to documentation', 
                 opportunity='Improve accessibility through a RAG', 
                 actions='build a catalog of embedded documents')

In [None]:
ctr.register.knowledge(task_name='demo', intent_level='knowledge_demo')

In [None]:
kn.remove_canonical(kn.CONNECTOR_PERSIST)

In [None]:
ctr.run_controller()

### Reports

#### controller

In [None]:
ctr.report_use_case()

In [None]:
ctr.report_intent()

#### knowledge

In [None]:
kn.report_task()

In [None]:
kn.report_intent()

In [None]:
kn.report_connectors()