In [1]:
!pip install transformers accelerate einops Xformers langchain InstructorEmbedding sentence-transformers chromadb

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [None]:
import locale
locale.getpreferredencoding = lambda: "UTF-8"

!pip install unstructured pandoc

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [None]:
# Reference: https://huggingface.co/databricks/dolly-v2-3b
from langchain import PromptTemplate, LLMChain
from langchain.llms import HuggingFacePipeline
import torch

from transformers import pipeline, AutoTokenizer

model_name = "databricks/dolly-v2-3b" # can use dolly-v2-3b, dolly-v2-7b or dolly-v2-12b for smaller model and faster inferences.

tokenizer = AutoTokenizer.from_pretrained(model_name, padding_side="left")
generate_text = pipeline("text-generation",
                         model=model_name, 
                         tokenizer=tokenizer,
                         torch_dtype=torch.bfloat16, 
                         trust_remote_code=True, 
                         device_map="auto",
                         return_full_text=True, 
                         max_new_tokens=256, 
                         top_p=0.95, 
                         top_k=50,
                         model_kwargs={"temperature":0.001, "max_length":1024})

# template for an instrution with no input
prompt = PromptTemplate(
    input_variables=["instruction"],
    template="{instruction}")

hf_pipeline = HuggingFacePipeline(pipeline=generate_text)

llm_chain = LLMChain(llm=hf_pipeline, prompt=prompt)

# Test LLM Chain

In [None]:
question = 'Who was Dolly the sheep?'
llm_chain.run(question)

# Use an agent to summarize conversation memory

In [None]:
from transformers import load_tool
summarizer = load_tool('summarization')

In [None]:
corpus = """dolly-v2-12b Model Card
Summary
Databricks’ dolly-v2-12b, an instruction-following large language model trained on the Databricks machine learning platform that is licensed for commercial use. Based on pythia-12b, Dolly is trained on ~15k instruction/response fine tuning records databricks-dolly-15k generated by Databricks employees in capability domains from the InstructGPT paper, including brainstorming, classification, closed QA, generation, information extraction, open QA and summarization. dolly-v2-12b is not a state-of-the-art model, but does exhibit surprisingly high quality instruction following behavior not characteristic of the foundation model on which it is based.

Dolly v2 is also available in these smaller models sizes:

dolly-v2-7b, a 6.9 billion parameter based on pythia-6.9b
dolly-v2-3b, a 2.8 billion parameter based on pythia-2.8b
Please refer to the dolly GitHub repo for tips on running inference for various GPU configurations.

Owner: Databricks, Inc.

Model Overview
dolly-v2-12b is a 12 billion parameter causal language model created by Databricks that is derived from EleutherAI’s Pythia-12b and fine-tuned on a ~15K record instruction corpus generated by Databricks employees and released under a permissive license (CC-BY-SA)
"""

In [None]:
summary = summarizer(corpus)
summary

In [None]:
hf_summary = pipeline("summarization", model="knkarthick/MEETING_SUMMARY")

# Prepare Documents


In [None]:
!pip install pypdf

In [None]:
import os
from langchain.vectorstores import Chroma
from langchain.text_splitter import RecursiveCharacterTextSplitter

from langchain.chains import RetrievalQA
from langchain.document_loaders import TextLoader
from langchain.document_loaders import PyPDFLoader
from langchain.document_loaders import DirectoryLoader

In [None]:
# Load and process the PDF files
loader = DirectoryLoader('/content/docs/', glob="./*.pdf", loader_cls=PyPDFLoader)

documents = loader.load()

In [None]:
# Verify we were able to load the docs
documents[0:2]

In [None]:
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)

# Download HuggingFace Embeddings
Check [MTEB English Leaderboard](https://huggingface.co/spaces/mteb/leaderboard) to make sure you download embeddings with good performance

In [None]:
# Choose one of the top performers from the MTEB English Leaderboard

from langchain.embeddings import HuggingFaceEmbeddings, SentenceTransformerEmbeddings

# top #2 when task = Retrieval June 2023 for under ~500 MB
model_name = "intfloat/e5-base-v2" 

hf = HuggingFaceEmbeddings(model_name=model_name)

# Make a Vector Database

In [None]:
# Embed and store the texts
# Supplying a persist_directory will store the embeddings on disk
persist_directory = 'db'

## Here is the nmew embeddings being used
embedding = hf 

vectordb = Chroma.from_documents(documents=documents, 
                                 embedding=embedding,
                                 persist_directory=persist_directory)

# and a retriever

In [None]:
retriever = vectordb.as_retriever()

In [None]:
docs = retriever.get_relevant_documents("What is the AI BILL OF RIGHTS?")

# Test the basic retriever

In [None]:
docs[0]

In [None]:
docs = retriever.get_relevant_documents("What is the AI BILL OF RIGHTS?")

In [None]:
docs = retriever.get_relevant_documents("What does AI RMF mean?")

In [None]:
docs[0]

# Make a proper Question Retrieval chain

In [None]:
# use map_reduce to prevent token mismatch errors
qa_chain = RetrievalQA.from_chain_type(llm=hf_pipeline, 
                                  chain_type="map_reduce", 
                                  retriever=retriever, 
                                  return_source_documents=True,
                                  verbose=True)

qa_chain.combine_documents_chain.llm_chain.prompt.template = '''
You are an AI Ethicists. 
Use the following pieces of context to answer the users question. 
If you don't know the answer, just say that you don't know, don't try to make up an answer.
Always answer with unbiased ethical and safe advise.
----------------
{context}

Question: {question}
Helpful Answer:'''

In [None]:
def trim_string(input_string):
    input_string = str(input_string)
    trim_index = input_string.find("### Human:")
    if trim_index != -1:  # If the phrase is found
        return input_string[:trim_index]
    else:
        return input_string  # If the phrase isn't found, return the original string

In [None]:
## Cite sources

import textwrap

def wrap_text_preserve_newlines(text, width=110):
    # Split the input text into lines based on newline characters
    lines = text.split('\n')

    # Wrap each line individually
    wrapped_lines = [textwrap.fill(line, width=width) for line in lines]

    # Join the wrapped lines back together using newline characters
    wrapped_text = '\n'.join(wrapped_lines)
    print(f'Wrapped Text is: {len(wrapped_text)} chars long')
    return wrapped_text

def process_llm_response(llm_response):
    temp_resp = wrap_text_preserve_newlines(llm_response['result'])
    temp_resp = trim_string(temp_resp)
    print(temp_resp)
    print('\n\nSources:')
    for source in llm_response["source_documents"]:
        print(source.metadata['source'])

In [None]:
# question from AI Bill of Rights
query = "What is the AI Bill of Rights?"
llm_response = qa_chain(query)
process_llm_response(llm_response)

In [None]:
# Question from NIST.AI.100-1
query = "What is resilliency in AI systems?"
llm_response = qa_chain(query)
process_llm_response(llm_response)

# Notes for next version:
## Fix Token indices sequence length Issue
Reference: 
[Token indices sequence length Issue](https://stackoverflow.com/questions/68850172/token-indices-sequence-length-issue)
