In [1]:
# saves you having to use print as all exposed variables are printed in the cell
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

In [2]:
# suppress warning message
import warnings
warnings.filterwarnings('ignore')

In [3]:
import os
import pyarrow as pa
import pyarrow.compute as pc
from nn_rag import Knowledge, Controller

### Set parameters as environment variables

In [4]:
# Data
os.environ['HADRON_KNOWLEDGE_SOURCE_URI'] = './hadron/source/Gen AI Best Practices.pdf'
os.environ['HADRON_KNOWLEDGE_EMBED_URI'] = 'chroma:///'


### Instantiate capability

In [5]:
kn = Knowledge.from_memory()

In [6]:
tbl = kn.set_source_uri("${HADRON_KNOWLEDGE_SOURCE_URI}").load_source_canonical()
kn.set_persist_uri('${HADRON_KNOWLEDGE_EMBED_URI}')

### Remove end of line from the document

In [7]:
doc = kn.tools.filter_replace_str(tbl, pattern='\n', replacement='')

### Break the text_doc into sentences with scores

In [8]:
sentences = kn.tools.text_to_sentences(doc, include_score=True, words_max=3)

building sentences:   0%|          | 0/11099 [00:00<?, ?it/s]

sentence stats:   0%|          | 0/332 [00:00<?, ?it/s]

sentence scores:   0%|          | 0/331 [00:00<?, ?it/s]

In [9]:
pc.min(sentences['char_count']).as_py()
pc.mean(sentences['char_count']).as_py()
pc.max(sentences['char_count']).as_py()

1

206.0512048192771

993

### Remove short sentences

In [10]:
small_sentences = pc.filter(sentences, pc.less(sentences['word_count'], 3)).column('index').to_pylist()
print(f"Short sentences for {small_sentences}")

Short sentences for [201, 228, 232, 260, 271, 275]


In [11]:
sentences = kn.tools.filter_on_mask(sentences, indices=small_sentences)
pc.min(sentences['word_count']).as_py()

3

### Use pyarrow to get the thrashold similarity scores

In [12]:
high_similarity = pc.filter(sentences, pc.greater(sentences['score'], 0.9)).column('index').to_pylist()
print(f"Threshold silimarity for {high_similarity}")

Threshold silimarity for []


### join the similar sentences

In [13]:
similar = kn.tools.filter_on_join(sentences, indices=high_similarity)

In [14]:
pc.min(sentences['char_count']).as_py()
pc.mean(sentences['char_count']).as_py()
pc.max(sentences['char_count']).as_py()

31

209.7085889570552

993

### chunk the sentences

In [15]:
chunks = kn.tools.filter_on_join(similar, chunk_size=1000)
sentences.shape
chunks.shape

(326, 7)

(67, 7)

### Save embedding

In [16]:
kn.save_persist_canonical(chunks)

### Query

In [17]:
import textwrap

def print_wrapped(text, wrap_length=80):
    wrapped_text = textwrap.fill(text, wrap_length)
    print(wrapped_text)

In [18]:
import random

questions = [
    "What are the main risks associated with generative AI?",
    "How can businesses mitigate the risks of using generative AI?",
    "What is the role of a cross-functional team in GenAI strategy?",
    "What are the legal considerations for implementing generative AI?",
    "How should organizations ensure data quality in GenAI models?",
    "What is the importance of ongoing enhancement and monitoring in GenAI practices?"
]

query = random.choice(questions)

### Model Answers
#### What are the main risks associated with generative AI?
Generative AI poses several risks including the creation of misleading content such as deepfakes, biased outputs due to biased training data, factually inaccurate outputs (hallucinations), security vulnerabilities (data breaches, model theft), and a lack of transparency in decision-making processes. These risks can lead to significant harm including privacy violations, intellectual property concerns, and increased carbon footprints.

#### How can businesses mitigate the risks of using generative AI?
Businesses can mitigate these risks by implementing Responsible AI (RAI) frameworks, which include gathering cross-functional teams, tracking legal requirements, ensuring high-quality and diverse training data, maintaining transparency, and engaging in continuous monitoring and enhancement of AI systems. Establishing governance structures and upholding legal and ethical standards are also crucial.

#### What is the role of a cross-functional team in GenAI strategy?
A cross-functional team centralizes AI expertise and capabilities across different departments, facilitating knowledge sharing, collaboration, and standardization. This team should oversee AI governance, resource pooling, and upskilling initiatives. They also play a key role in developing a long-term AI roadmap, promoting innovation, and ensuring alignment with company objectives and Responsible AI standards.

#### What are the legal considerations for implementing generative AI?
Legal considerations for implementing generative AI include ensuring compliance with intellectual property laws, privacy regulations, and monitoring the evolving legal landscape for new AI-specific rulings. Organizations must document and manage legal requirements, maintain robust data privacy controls, and continuously review contracts and licensing agreements related to AI models and their outputs.

#### How should organizations ensure data quality in GenAI models?
Organizations should ensure data quality by implementing stringent data acquisition, selection, and management processes. This includes verifying the provenance of training data, ensuring it is relevant to defined use cases, and maintaining high standards for data quality and diversity. Tracking metrics related to fairness, bias, and interpretability throughout the data lifecycle is also essential.

#### What is the importance of ongoing enhancement and monitoring in GenAI practices?
Ongoing enhancement and monitoring are crucial for adapting to the rapid evolution of GenAI technology. Continuous improvement ensures that AI systems remain effective, secure, and aligned with Responsible AI principles. Regular monitoring helps identify and mitigate new risks, maintain compliance with regulations, and uphold the quality and reliability of AI outputs.


In [19]:
print(f"Query: {query}\n")

answer = kn.load_persist_canonical(query=query)

for i in range(answer.num_rows):
    s = answer.slice(i,1)
    print(f"Id: {s.column('id')[0]}.as_py()")
    print(f"Distance: {s.column('distance')[0].as_py()}")
    print(f"Answer: {print_wrapped(s.column('source')[0].as_py())}\n")



Query: How can businesses mitigate the risks of using generative AI?

Id: general_24.as_py()
Distance: 1.3120352029800415
Regularly communicating with stakeholders regarding AIconsiderations and
incorporating feedback from employees, users, and experts is crucial toreﬁne
upskilling plans, keeping efforts in sync with rapid technological progress and
evolvingstandards of responsible AI. Bolster role-relevant and application-
specific trainingOrganization should develop or procure responsible AI training
resources for speciﬁc roles anddetermine what kinds of training should be
mandatory or encouraged.●Training programs should include risk, compliance, and
RAI training across variousfunctions. Training materials should include case
studies and should be engaging,accessible, and effective.●For example, HR
training should cover appropriate uses, risks, and requirements speciﬁcto
relevant use cases. Technical users should be trained in model
assessmentframeworks, research reviews, and analyt