In [1]:
# saves you having to use print as all exposed variables are printed in the cell
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

In [2]:
import pyarrow as pa
import pyarrow.compute as pc
from nn_rag import Knowledge

### Instantiate capability

In [3]:
kn = Knowledge.from_memory()

In [4]:
uri = "source/Gen AI Best Practices.pdf"
tbl = kn.set_source_uri(uri).load_source_canonical()

In [5]:
tbl.shape
tbl.column_names
tbl.column('text').to_pylist()[0][:300]

(1, 1)

['text']

'Best Practices in Generative AI\nResponsible use and development \nin the modern workplace\n© Responsible AI Institute 2024  \nAll Rights Reserved | Do Not Use Without Permission\n\x0cExecutive Summary\nGenerative AI, a technology capable of producing realistic content in the form of text, images,\nsound, and'

### Tidy the text

In [6]:
tbl = kn.tools.correlate_replace(tbl, 'text', '\n', ' ', intent_order=-1)
tbl = kn.tools.correlate_replace(tbl, 'text', '  ', ' ', intent_order=-1)

### Profling

In [13]:
profile = kn.tools.text_profiler(tbl)
profile.shape
kn.table_report(profile, head=4)

(392, 5)

Unnamed: 0,sentence,sentence_num,char_count,word_count,token_count
0,"Best Practices in Generative AI Responsible use and development in the modern workplace © Responsible AI Institute 2024 All Rights Reserved | Do Not Use Without Permission Executive Summary Generative AI, a technology capable of producing realistic content in the form of text, images, sound, and more, presents signiﬁcant opportunities and challenges for businesses today.",0,375,56,93.75
1,"With generative AI (GenAI) applications ranging from customer service automation to content creation, the recent explosive adoption of LLM technologies like ChatGPT underscores the potential transformative scale of AI impact, both positive and negative.",1,253,34,63.25
2,"Potential risks and harms from generative AI impact human rights, privacy, security, labor, fairness, sustainability, and more.",2,127,17,31.75
3,"Without investing effort to comprehensively address these issues across the enterprise, businesses are exposed to the risks of compliance penalties, consumer harm, loss of trust, damages, and more.",3,197,28,49.25


### Chunking

In [8]:
chunks = kn.tools.sentence_chunk(profile, intent_order=-1)

AttributeError: 'KnowledgeIntent' object has no attribute 'sentence_chunk'

In [None]:
chunks.shape
chunks.column_names

### Embedding

In [None]:
embedding = kn.tools.chunk_embedding(chunks, batch_size=32, embedding_name='all-mpnet-base-v2', device='cpu', intent_order=-1)

In [None]:
type(embedding)
embedding.shape

In [None]:
kn.run_component_pipeline()

### Reports

In [None]:
kn.report_intent()

In [None]:
kn.report_connectors()

In [None]:
kn.report_task()