In [1]:
# saves you having to use print as all exposed variables are printed in the cell
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

In [2]:
# suppress warning message
import warnings
warnings.filterwarnings('ignore')

In [3]:
import os
import pyarrow as pa
import pyarrow.compute as pc
from nn_rag import Knowledge, Controller

### Set parameters as environment variables

In [4]:
# Data
# os.environ['HADRON_KNOWLEDGE_SOURCE_URI'] = './hadron/source/Gen AI Best Practices.pdf'
os.environ['HADRON_KNOWLEDGE_SOURCE_URI'] = "./hadron/source/Human-Nutrition-2020-Edition.pdf"
os.environ['HADRON_KNOWLEDGE_EMBED_URI'] = 'chroma:///'


### Instantiate capability

In [5]:
kn = Knowledge.from_memory()

In [6]:
tbl = kn.set_source_uri("${HADRON_KNOWLEDGE_SOURCE_URI}").load_source_canonical()
kn.set_persist_uri('${HADRON_KNOWLEDGE_EMBED_URI}')

### Remove end of line from the document

In [7]:
doc = kn.tools.filter_replace_str(tbl, pattern='\n', replacement='')

### Break the text_doc into sentences with scores

In [8]:
sentences = kn.tools.text_to_sentences(doc, include_score=False, words_max=3)

building sentences:   0%|          | 0/164508 [00:00<?, ?it/s]

sentence stats:   0%|          | 0/7503 [00:00<?, ?it/s]

building sentences:   0%|          | 0/83372 [00:00<?, ?it/s]

sentence stats:   0%|          | 0/4096 [00:00<?, ?it/s]

Unnamed: 0,index,char_count,word_count,token_count,score,words,text
0,0,554,77,138,0,['human' 'nutrition' 'edition'],"Human Nutrition: 2020 Edition Human Nutrition: 2020 Edition UNIVERSITY OF HAWAI‘I AT MĀNOA FOOD SCIENCE AND HUMAN NUTRITION PROGRAM ALAN TITCHENAL, SKYLAR HARA, NOEMI ARCEO CAACBAY, WILLIAM MEINKE-LAU, YA-YUN YANG, MARIE KAINOA FIALKOWSKI REVILLA, JENNIFER DRAPER, GEMADY LANGFELDER, CHERYL GIBBY, CHYNA NICOLE CHUN, AND ALLISON CALABRESE Human Nutrition: 2020 Edition by University of Hawai‘i at Mānoa Food Science and Human Nutrition Program is licensed under a Creative Commons Attribution 4.0 International License, except where otherwise noted."
1,1,368,54,92,0,['human' 'nutrition' 'program'],Contents Preface University of Hawai‘i at Mānoa Food Science and Human Nutrition Program and Human Nutrition Program xxv About the Contributors University of Hawai‘i at Mānoa Food Science and Human Nutrition Program and Human Nutrition Program xxvi Acknowledgements University of Hawai‘i at Mānoa Food Science and Human Nutrition Program and Human Nutrition Program xl
2,2,18,2,4,0,['part' 'i.' 'chapter'],Part I. Chapter 1.
3,3,914,139,228,0,['nutrition' 'human' 'program'],Basic Concepts in Nutrition Introduction University of Hawai‘i at Mānoa Food Science and Human Nutrition Program and Human Nutrition Program 3 Food Quality University of Hawai‘i at Mānoa Food Science and Human Nutrition Program and Human Nutrition Program 14 Units of Measure University of Hawai‘i at Mānoa Food Science and Human Nutrition Program and Human Nutrition Program 18 Lifestyles and Nutrition University of Hawai‘i at Mānoa Food Science and Human Nutrition Program and Human Nutrition Program 21 Achieving a Healthy Diet University of Hawai‘i at Mānoa Food Science and Human Nutrition Program and Human Nutrition Program 30 Research and the Scientific Method University of Hawai‘i at Mānoa Food Science and Human Nutrition Program and Human Nutrition Program 34 Types of Scientific Studies University of Hawai‘i at Mānoa Food Science and Human Nutrition Program and Human Nutrition Program 41 Part II.
4,4,10,2,2,0,['chapter'],Chapter 2.
5,5,652,98,163,0,['human' 'nutrition' 'program'],"The Human Body Introduction University of Hawai‘i at Mānoa Food Science and Human Nutrition Program and Human Nutrition Program 55 Basic Biology, Anatomy, and Physiology University of Hawai‘i at Mānoa Food Science and Human Nutrition Program and Human Nutrition Program 62 The Digestive System University of Hawai‘i at Mānoa Food Science and Human Nutrition Program and Human Nutrition Program 68 The Cardiovascular System University of Hawai‘i at Mānoa Food Science and Human Nutrition Program and Human Nutrition Program 82 Central Nervous System University of Hawai‘i at Mānoa Food Science and Human Nutrition Program and Human Nutrition Program 94"
6,6,250,38,62,0,['human' 'nutrition' 'program'],The Respiratory System University of Hawai‘i at Mānoa Food Science and Human Nutrition Program and Human Nutrition Program 99 The Endocrine System University of Hawai‘i at Mānoa Food Science and Human Nutrition Program and Human Nutrition Program 106
7,7,246,38,62,0,['human' 'nutrition' 'program'],The Urinary System University of Hawai‘i at Mānoa Food Science and Human Nutrition Program and Human Nutrition Program 110 The Muscular System University of Hawai‘i at Mānoa Food Science and Human Nutrition Program and Human Nutrition Program 117
8,8,123,19,31,0,['human' 'nutrition' 'program'],The Skeletal System University of Hawai‘i at Mānoa Food Science and Human Nutrition Program and Human Nutrition Program 120
9,9,315,48,79,0,['human' 'nutrition' 'program'],"The Immune System University of Hawai‘i at Mānoa Food Science and Human Nutrition Program and Human Nutrition Program 129 Indicators of Health: Body Mass Index, Body Fat Content, and Fat Distribution University of Hawai‘i at Mānoa Food Science and Human Nutrition Program and Human Nutrition Program 133 Part III."


In [9]:
pc.min(sentences['char_count']).as_py()
pc.mean(sentences['char_count']).as_py()
pc.max(sentences['char_count']).as_py()

1

116.02060522458832

2390

### Use pyarrow to get the thrashold similarity scores

In [10]:
high_similarity = pc.filter(sentences, pc.greater(sentences['score'], 0.9)).column('index').to_pylist()
print(f"Threshold silimarity for {high_similarity}")

Threshold silimarity for []


### join the similar sentences

In [11]:
joined = kn.tools.filter_on_join(sentences, indices=high_similarity)

In [12]:
pc.min(sentences['char_count']).as_py()
pc.mean(sentences['char_count']).as_py()
pc.max(sentences['char_count']).as_py()

1

116.02060522458832

2390

In [13]:
kn.save_persist_canonical(joined)

ValueError: Expected collection name that (1) contains 3-63 characters, (2) starts and ends with an alphanumeric character, (3) otherwise contains only alphanumeric characters, underscores or hyphens (-), (4) contains no two consecutive periods (..) and (5) is not a valid IPv4 address, got 

### Query

In [None]:
import textwrap

def print_wrapped(text, wrap_length=80):
    wrapped_text = textwrap.fill(text, wrap_length)
    print(wrapped_text)

In [None]:
import random

# Nutrition-style questions generated with GPT4
gpt4_questions = [
    "What are the main risks associated with generative AI?",
    "How can businesses mitigate the risks of using generative AI?",
    "What is the role of a cross-functional team in GenAI strategy?",
    "What are the legal considerations for implementing generative AI?",
    "How should organizations ensure data quality in GenAI models?",
    "What is the importance of ongoing enhancement and monitoring in GenAI practices?"
]

# Manually created question list
manual_questions = []

query = random.choice(gpt4_questions + manual_questions)

### Model Answers
#### What are the main risks associated with generative AI?
Generative AI poses several risks including the creation of misleading content such as deepfakes, biased outputs due to biased training data, factually inaccurate outputs (hallucinations), security vulnerabilities (data breaches, model theft), and a lack of transparency in decision-making processes. These risks can lead to significant harm including privacy violations, intellectual property concerns, and increased carbon footprints.

#### How can businesses mitigate the risks of using generative AI?
Businesses can mitigate these risks by implementing Responsible AI (RAI) frameworks, which include gathering cross-functional teams, tracking legal requirements, ensuring high-quality and diverse training data, maintaining transparency, and engaging in continuous monitoring and enhancement of AI systems. Establishing governance structures and upholding legal and ethical standards are also crucial.

#### What is the role of a cross-functional team in GenAI strategy?
A cross-functional team centralizes AI expertise and capabilities across different departments, facilitating knowledge sharing, collaboration, and standardization. This team should oversee AI governance, resource pooling, and upskilling initiatives. They also play a key role in developing a long-term AI roadmap, promoting innovation, and ensuring alignment with company objectives and Responsible AI standards.

#### What are the legal considerations for implementing generative AI?
Legal considerations for implementing generative AI include ensuring compliance with intellectual property laws, privacy regulations, and monitoring the evolving legal landscape for new AI-specific rulings. Organizations must document and manage legal requirements, maintain robust data privacy controls, and continuously review contracts and licensing agreements related to AI models and their outputs.

#### How should organizations ensure data quality in GenAI models?
Organizations should ensure data quality by implementing stringent data acquisition, selection, and management processes. This includes verifying the provenance of training data, ensuring it is relevant to defined use cases, and maintaining high standards for data quality and diversity. Tracking metrics related to fairness, bias, and interpretability throughout the data lifecycle is also essential.

#### What is the importance of ongoing enhancement and monitoring in GenAI practices?
Ongoing enhancement and monitoring are crucial for adapting to the rapid evolution of GenAI technology. Continuous improvement ensures that AI systems remain effective, secure, and aligned with Responsible AI principles. Regular monitoring helps identify and mitigate new risks, maintain compliance with regulations, and uphold the quality and reliability of AI outputs.


In [None]:
print(f"Query: {query}\n")

answer = kn.load_canonical('query', query=query)

for i in range(answer.num_rows):
    s = answer.slice(i,1)
    print(f"Id: {s.column('id')[0]}.as_py()")
    print(f"Distance: {s.column('distance')[0].as_py()}")
    print(f"Answer: {print_wrapped(s.column('source')[0].as_py())}\n")

