In [1]:
# saves you having to use print as all exposed variables are printed in the cell
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

In [2]:
# suppress warning message
import warnings
warnings.filterwarnings('ignore')

In [3]:
import os
import pyarrow as pa
import pyarrow.compute as pc
from nn_rag import Knowledge, Retrieval

### Chroma Vector Params

        URI example in-memory
            uri = "chromadb:///<collection>?reference=<name>"

        params:
            collection: The name of the collection
            reference: a prefix name to reference the document vector

        Environment:
            CHROMA_EMBEDDING_QUANTIZE
            CHROMA_QUERY_SEARCH_LIMIT



### Instantiate capability

In [4]:
kn = Knowledge.from_memory()

In [5]:
tbl = kn.set_source_uri("./hadron/source/llama-Responsible-Use-Guide.pdf").load_source_canonical()
kn.set_persist_uri('chroma:///')

### Clean text

In [6]:
doc = kn.tools.replace_on_pattern(tbl)

### Sentences

In [7]:
sentences = kn.tools.text_to_sentences(doc, include_score=True, disable_progress_bar=True)
print(f"Max Sentence size {pc.max(sentences['char_count'])}")
print(f"Mean Sentence size {pc.mean(sentences['char_count'])}")
print(f"Min Sentence size {pc.min(sentences['char_count'])}")

Calculating scores:   0%|          | 0/258 [00:00<?, ?it/s]

Max Sentence size 777
Mean Sentence size 151.13127413127413
Min Sentence size 1


### Threshold similarity scores

In [29]:
high_similarity = pc.filter(sentences, pc.greater(sentences['score'], 0.95)).column('index').to_pylist()
print(f"Threshold silimarity for {high_similarity}")

sentence_sim = kn.tools.filter_on_join(sentences, indices=high_similarity)
percentile_values = pc.quantile(sentence_sim['char_count'], q=[0.9, 0.95, 0.98, 0.99])
print(f"Percentile Sentence size {list(zip(['90%', '95%', '98%', '99%'], pc.round(percentile_values,0).to_pylist()))}")


Threshold silimarity for [3, 4, 15, 16, 21, 22, 23, 30, 31, 32, 33, 36, 41, 42, 45, 46, 64, 71, 72, 82, 83, 103, 117, 121, 127, 135, 136, 137, 163, 164, 173, 196, 213, 216, 240, 251]
Percentile Sentence size [('90%', 343.0), ('95%', 544.0), ('98%', 694.0), ('99%', 722.0)]


### Sentence Chunking

In [31]:
sentence_chunks = kn.tools.text_to_chunks(sentence_sim, chunk_size=768, disable_progress_bar=True)
print(f"Max Sentence size {pc.max(sentence_chunks['char_count'])}")
print(f"Mean Sentence size {pc.round(pc.mean(sentence_chunks['char_count']),0)}")
print(f"Min Sentence size {pc.min(sentence_chunks['char_count'])}")

Max Sentence size 768
Mean Sentence size 174.0
Min Sentence size 1


### Sentence Join

In [38]:
sentence_join = kn.tools.filter_on_join(sentence_chunks, chunk_size=768, disable_progress_bar=True)
percentile_values = pc.quantile(sentence_join['char_count'], q=[0.001, 0.01, 0.25, 0.50])
print(f"Percentile Sentence size {list(zip(['0.1%', '1%', '25%', '50%'], pc.round(percentile_values,0).to_pylist()))}")

Percentile Sentence size [('0.1%', 198.0), ('1%', 218.0), ('25%', 585.0), ('50%', 670.0)]


### Embedding

In [11]:
kn.save_persist_canonical(sentence_join)

----------------
## Chroma Vector DB

### Query

In [12]:
import textwrap

def print_wrapped(text, wrap_length=80):
    wrapped_text = textwrap.fill(text, wrap_length)
    return wrapped_text

In [13]:
import random

questions = [
    "1. What are the core principles of responsible AI mentioned in the guide?",
    "2. How does Meta's open approach contribute to AI innovation?",
    "3. What are the stages of responsible LLM product development according to the guide?",
    "4. What are some examples of product-specific fine-tuning for LLMs?",
    "5. What considerations should be taken into account when defining content policies for LLMs?",
    "6. What are the benefits of democratizing access to large language models, as stated in the guide?"
]

query = random.choice(questions)

### Model Answers
1. **Core principles of responsible AI:**
   The guide outlines core principles of responsible AI, which include fairness and inclusion, robustness and safety, privacy and security, and transparency and control. Additionally, it emphasizes the importance of governance and accountability mechanisms to ensure these principles are upheld throughout the development and deployment of AI systems.

2. **Meta's open approach and AI innovation:**
   Meta's open approach to AI innovation involves open-sourcing code and datasets, contributing to the AI community's infrastructure, and making large language models available for research. This approach fosters a vibrant AI-innovation ecosystem, driving breakthroughs in various sectors and enabling exploratory research and large-scale production deployment. It also draws upon the collective wisdom and diversity of the AI community to improve and democratize AI technology.

3. **Stages of responsible LLM product development:**
   The guide identifies four stages of responsible LLM product development: determining the use case, fine-tuning for the product, addressing input- and output-level risks, and building transparency and reporting mechanisms in user interactions. Each stage involves specific considerations and mitigation strategies to ensure the safe and effective deployment of LLM-powered products.

4. **Examples of product-specific fine-tuning:**
   Product-specific fine-tuning examples provided in the guide include text summarization, question answering, and sentiment analysis. For instance, a pretrained language model can be fine-tuned on a dataset of long-form documents and summaries for text summarization, on a Q&A dataset for answering questions, and on labeled text reviews for sentiment analysis. These examples demonstrate how fine-tuning can tailor a model's capabilities to specific use cases, enhancing performance and applicability.

5. **Considerations for defining content policies:**
   When defining content policies for LLMs, developers should consider the intended use and audience of their product, legal and safety limitations, and the needs of specific user communities. Content policies should outline allowable content and safety limitations, which will guide data annotation and safety fine-tuning. It is also important to address potential biases in human feedback and data annotation processes to ensure fairness and objectivity.

6. **Benefits of democratizing access to large language models:**
   Democratizing access to large language models, as discussed in the guide, reduces barriers to entry for small businesses and fosters innovation across various sectors. By making these models widely available, small organizations can leverage advanced AI technology without incurring prohibitive costs, leading to economic growth and a more level playing field. This approach also promotes collaboration and collective improvement of AI models, ensuring that advancements benefit a broader range of users and applications.


In [14]:
rag = Retrieval.from_memory()
rag.set_source_uri('chroma:///')

<nn_rag.components.retrieval.Retrieval at 0x7fd167c54ac0>

In [15]:
print(f"Query: {query}\n")

answer = rag.tools.query_similarity(query, limit=5)
rag.table_report(answer)


Query: 3. What are the stages of responsible LLM product development according to the guide?



Unnamed: 0,id,distance,source
0,general_58,1.2972,"The final stage is to evaluate the fine-tuned model on a test set to measure its performance on the specific task and against safety benchmarks, according to the use case. This includes analyzing the model’s strengths and weaknesses based on evaluation results, gathering more data to further enhance performance and safety, and iterating until satisfied with the model’s performance using holdout test datasets. There are many complementary types of evaluations that are useful for measuring risks i"
1,general_25,1.3252,"In addition to performing a variety of pretraining data-level investigations to help understand the potential capabilities and limitations of our models, we applied considerable safety mitigations to the fine-tuned versions of the model through supervised fine-tuning, reinforcement learning from human feedback (RLHF), and iterative red teaming (these steps are covered further in the section - Fine-tune for product)."
2,general_56,1.3732,"n set, and hyperparameters are adjusted as necessary. Fine-tuning an LLM for safety can involve a number of techniques, many of which the research paper on Llama 2 describes in greater depth. These techniques can include: • Supervised Fine-Tuning (SFT): Supervised fine- tuning using data annotated across helpfulness and safety. • Reinforcement Learning from Human Feedback (RLHF) or AI Feedback (RLAIF):"
3,general_77,1.3739,"Classifiers: The more effective, but also more difficult, approach is to develop classifiers that detect and filter outputs based on the meaning conveyed by the words chosen. Classifiers, when properly trained on known examples of a particular sentiment or type of semantic content, can become highly effective at identifying novel instances in which that sentiment or meaning is expressed. Mitigating risks at the output level Based on the downstream use case, you can apply several approaches for d"
4,general_75,1.3821,"hus, the safety benefits of such restrictions or modifications should be weighed against those costs, until more robust solutions are developed. Alongside prompts, it might be beneficial to provide instructive sample inputs and outputs that illustrate the desired responsible behavior. 15 JULY 2023 unreasonably restrict the usage of your model."


In [16]:
print(f"Query: {query}\n")

answer = rag.tools.query_reranker(query)
rag.table_report(answer, headers='distance', drop=True)


Query: 3. What are the stages of responsible LLM product development according to the guide?



Unnamed: 0,cross-encoder_score,source,id
0,0.4792,"The fine-tuned LLM itself can be used to create synthetic ranking data for reward model training. Given a model input, response pairs and relevant guidelines, the LLM predicts which response would best follow the guidelines. The synthetic reward modeling data are then used to augment the reward model’s training data. THE RESPONSIBLE FINE-TUNING FLOW training progress is monitored using a validation set, and hyperparameters are adjusted as necessary. Fine-tuning an LLM for safety can involve a nu",general_55
1,0.4636,"st datasets. There are many complementary types of evaluations that are useful for measuring risks in models, including automatic benchmarks, manual annotations by human raters, and evaluations using an LLM itself as a rater. The Holistic Evaluation of Language Models discusses some of the commonly used automatic benchmarks. Evaluation strategies and processes to improve performance can include: • Automatic evaluation leverages automatic benchmarks and classifiers to judge the output with respec",general_59
2,0.3886,"standards. Additionally, the needs of specific user communities should be considered as you design content policies, such as the development of age-appropriate product experiences. Having these policies in place will dictate the data needed, annotation requirements, and goals for safety fine-tuning, including the types of mitigation steps that will be implemented. These policies will be used for labeling data in later stages when using RLHF and in additional product layers, such as making enfor",general_42
3,0.3864,"To yield a product that is optimized for your target use cases, it’s essential to have a consistent set of goals and outcomes that guide each stage of the process. From the data-collection stage to user feedback, be sure to keep your overall goal in mind. •",general_103
4,0.346,"Information on pretraining data, model architecture and parameters, and pretrained evaluations are contained in the Llama 2 research paper. The paper also describes in further detail the steps to develop the fine-tuned versions, including detailed safety alignment efforts and evaluation results. Additional information is included in the model card accompanying the release.",general_26


### Tidy up

In [17]:
rag.remove_embedding()

###### 