In [1]:

pip install langchain langchain-openai langchain-community python-dotenv unstructured faiss-cpu pdfminer tiktoken pymupdf

Note: you may need to restart the kernel to use updated packages.


In [2]:
## import libraries
import os
import getpass

## set up openai api key
os.environ["OPENAI_API_KEY"] = getpass.getpass("OpenAI API Key:")

In [13]:
from langchain_openai import ChatOpenAI
from langchain_openai import OpenAIEmbeddings

## set up openai chat model
openai_embedding_model = OpenAIEmbeddings()

In [14]:
from langchain_core.prompts import ChatPromptTemplate

## set up system template
system_template = """You are an AI assistant specialized in answering questions about the AI Bill of Rights. 
Your responses should be based on the provided context. If the information isn't in the context, say you don't know.
Always strive for accuracy and clarity in your answers."""

## set up human template
human_template = """Context: {context}

Question: {question}

Please provide a concise and accurate answer based on the given context."""

chat_prompt = ChatPromptTemplate.from_messages([
    ("system", system_template),
    ("human", human_template)
])

In [15]:
from langchain_community.document_loaders import PyPDFLoader
from langchain_community.vectorstores import FAISS
from langchain_openai import OpenAIEmbeddings

# Load the PDF
loader = PyPDFLoader("/Users/joenoss/Downloads/Blueprint-for-an-AI-Bill-of-Rights.pdf")
pages = loader.load_and_split()

# Create FAISS index
faiss_index = FAISS.from_documents(pages, OpenAIEmbeddings())

# Perform similarity search
docs = faiss_index.similarity_search("How will the community be engaged?", k=2)

# Print results
for doc in docs:
    print(str(doc.metadata["page"]) + ":", doc.page_content[:300])


55: In discussion of technical and governance interventions that that are needed to protect against the harms of 
these technologies, various panelists emphasized that transparency is important but is not enough to achieve accountability. Some panelists discussed their individual views on additional sys
55: APPENDIX
Panelists discussed the benefits of AI-enabled systems and their potential to build better and more 
innovative infrastructure. They individually noted that while AI technologies may be new, the process of 
technological diffusion is not, and that it was critical to have thoughtful and resp


In [30]:
pip install --quiet langchain_experimental langchain_openai

Note: you may need to restart the kernel to use updated packages.


In [16]:

# Set up semantic chunker for text splitting
from langchain_experimental.text_splitter import SemanticChunker
from langchain_openai.embeddings import OpenAIEmbeddings

# Create the semantic chunker
text_splitter = SemanticChunker(
    OpenAIEmbeddings(),
    breakpoint_threshold_type="percentile"
)


In [17]:
from langchain_community.vectorstores import Qdrant
from langchain_openai import OpenAIEmbeddings

# Use the same embedding model as in the semantic chunker
embedding_model = OpenAIEmbeddings()

# Create Qdrant vector store using the loaded and split pages
qdrant_vectorstore = Qdrant.from_documents(
    documents=pages,
    embedding=embedding_model,
    location=":memory:"
)

In [18]:
qdrant_retriever = qdrant_vectorstore.as_retriever()

In [19]:
from langchain.chains import RetrievalQA
from langchain_openai import ChatOpenAI

# Create a ChatOpenAI instance with the GPT-4 model
gpt4_chat_model = ChatOpenAI(model="gpt-4o-mini", temperature=0)

# Create the retrieval QA chain
retrieval_augmented_qa_chain = RetrievalQA.from_chain_type(
    llm=gpt4_chat_model,  # Use the GPT-4 model here
    chain_type="stuff",
    retriever=qdrant_retriever,
    return_source_documents=True,
    chain_type_kwargs={"prompt": chat_prompt}
)

# The final_chain line is not necessary if you're using the retrieval_augmented_qa_chain directly
# If you want to keep it for consistency, you can do:
final_chain = retrieval_augmented_qa_chain

In [20]:
response = retrieval_augmented_qa_chain.invoke({"query" : "How do we define the riskiness of AI?"})

In [21]:
import pprint
pprint.pp(response)

{'query': 'How do we define the riskiness of AI?',
 'result': 'The context does not provide a specific definition for the '
           'riskiness of AI. It discusses various frameworks and principles '
           'aimed at managing risks associated with AI, such as the NIST AI '
           'Risk Management Framework, which focuses on trustworthiness '
           'considerations like accuracy, explainability, reliability, and '
           'privacy. However, a precise definition of "riskiness" itself is '
           'not included.',
 'source_documents': [Document(metadata={'source': '/Users/joenoss/Downloads/Blueprint-for-an-AI-Bill-of-Rights.pdf', 'page': 44, '_id': 'ce7213426ee64f0aaae5cdb3d908be89', '_collection_name': 'e44fae33793847efb8d68251c051177e'}, page_content='accuracy), and enable human users to understand, appropriately trust, and effectively manage the emerging \ngeneration of artificially intelligent partners.95 The National Science Foundation’s program on Fairness in \nA

In [22]:
import pprint

response2 = retrieval_augmented_qa_chain.invoke({"query": "What are some of the principles for ensuring safety in relation to AI?"})
pprint.pp(response2)

{'query': 'What are some of the principles for ensuring safety in relation to '
          'AI?',
 'result': 'Some principles for ensuring safety in relation to AI, as outlined '
           'in the context, include:\n'
           '\n'
           '1. **Lawful and Respectful**: AI systems must adhere to legal '
           'standards and respect national values.\n'
           '2. **Purposeful and Performance-Driven**: AI should have clear '
           'objectives and be driven by performance metrics.\n'
           '3. **Accurate, Reliable, and Effective**: AI systems must produce '
           'accurate and reliable outcomes.\n'
           '4. **Safe, Secure, and Resilient**: AI should be designed to be '
           'safe from vulnerabilities and resilient to failures.\n'
           '5. **Understandable**: The functioning of AI systems should be '
           'comprehensible to users.\n'
           '6. **Responsible and Traceable**: There should be accountability '
           'in the develop

In [23]:
from langchain_community.vectorstores import Qdrant
from langchain_openai import OpenAIEmbeddings
from qdrant_client import QdrantClient
from qdrant_client.http.models import Distance, VectorParams

# Load the PDFs
loader1 = PyPDFLoader("/Users/joenoss/Downloads/Blueprint-for-an-AI-Bill-of-Rights.pdf")
loader2 = PyPDFLoader("/Users/joenoss/Downloads/NIST.AI.600-1.pdf")
pages1 = loader1.load_and_split()
pages2 = loader2.load_and_split()

# Combine pages from both PDFs
all_pages = pages1 + pages2

# Create the Qdrant client with in-memory storage
qdrant_client = QdrantClient(":memory:")

# Create the collection
qdrant_client.create_collection(
    collection_name="ai_documents",
    vectors_config=VectorParams(size=1536, distance=Distance.COSINE)
)

# Create the OpenAI embeddings instance
embedding_model = OpenAIEmbeddings()

# Create the vector store
qdrant_vector_store = Qdrant(
    client=qdrant_client,
    collection_name="ai_documents",
    embeddings=embedding_model,  # Use 'embeddings' instead of 'embedding_function'
)

# Add documents to the vector store
qdrant_vector_store.add_documents(all_pages)

  qdrant_vector_store = Qdrant(


['42967d1c5e8d436681fd7fe82ef8fe72',
 'be6093b03ad44c949a5415380ef3e264',
 '033a2305072a48c59208700f6b06e7d9',
 '539b7e2a684b4742b5c02dd977bcc607',
 '914d39561c774dd580b1c2a52915ee74',
 '086b1048dde54307991cbf8c4e47dfef',
 'b72adfafeb1149bab191188bf5f4bc86',
 '478d0e631c9448c7ac53e373426e365a',
 'd3a1c6628c9b42c791a8ee10aeb396bc',
 'd7d7790101d04ccdb50be77476caecbf',
 '3599414751554243b435bd66c5b0a1d3',
 '507e7f1f845b434f9432ec81c9b27020',
 '1e6484fdbf68426eb4b31bfb2cd607d8',
 'be239b0154704f369299a5c40bb20ce8',
 'b59125432a7949f5805c3402607e479e',
 'ae5b095fecdf4d74bc0bb034ec4224c2',
 '3a457d4d52884167b01185a102877f8e',
 '16e30cd50d054a2ab10041b44e878a0b',
 'bfd3a5e89e6d4b4b895f1a5b349a8f8d',
 '84e135efc69f405eb1f8f67b06f0e9c0',
 '06888bbde10f483581de72e3c7625e41',
 '07ebb8444d964e0bb0c91f1f2afbf1b9',
 'c978848a6cd14f00bf5b00c605b1b225',
 '083f37b9339f4ad696397a4c8fccd9e1',
 'e91094220dda493e8e30683c4e55b9d5',
 '4046c774b4884db0b966eb7ca515c479',
 '64aa8185444a4d2fafeb4faf5d9a3546',
 

In [24]:
retriever = qdrant_vector_store.as_retriever(
    search_type="similarity",
    search_kwargs={"k": 2},
    score_threshold=0.7
)

In [25]:
retrieved_documents = retriever.invoke("What are some of the principles for ensuring safety in relation to AI?")
for doc in retrieved_documents:
  print(doc)

page_content='SAFE AND EFFECTIVE 
SYSTEMS 
HOW THESE PRINCIPLES CAN MOVE INTO PRACTICE
Real-life examples of how these principles can become reality, through laws, policies, and practical 
technical and sociotechnical approaches to protecting rights, opportunities, and access. 
Some U.S government agencies have developed specific frameworks for ethical use of AI 
systems. The Department of Energy (DOE) has activated the AI Advancement Council that oversees coordina -
tion and advises on implementation of the DOE AI Strategy and addresses issues and/or escalations on the 
ethical use and development of AI systems.20 The Department of Defense has adopted Artificial Intelligence 
Ethical Principles, and tenets for Responsible Artificial Intelligence specifically tailored to its national 
security and defense activities.21 Similarl y, the U.S. Intelligence Community (IC) has developed the Principles 
of Artificial Intelligence Ethics for the Intelligence Community to guide personnel on whe

In [26]:
from operator import itemgetter
from langchain_openai import ChatOpenAI
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough

# Use the ChatOpenAI model we set up earlier
primary_qa_llm = ChatOpenAI(model="gpt-4o-mini", temperature=0)

# Use the chat prompt we defined earlier
system_template = """You are an AI assistant specialized in answering questions about the AI Regulations, Policies, and Frameworks. 
Your responses should be based on the provided context. If the information isn't in the context, say you don't know.
Always strive for accuracy and clarity in your answers."""

human_template = """Context: {context}

Question: {question}

Please provide a concise and accurate answer based on the given context."""

chat_prompt = ChatPromptTemplate.from_messages([
    ("system", system_template),
    ("human", human_template)
])

# Set up the retrieval augmented QA chain
retrieval_augmented_qa_chain = (
    {
        "context": itemgetter("question") | retriever, 
        "question": itemgetter("question")
    }
    | RunnablePassthrough.assign(context=lambda x: "\n\n".join([doc.page_content for doc in x["context"]]))
    | {
        "response": chat_prompt | primary_qa_llm | StrOutputParser(), 
        "context": itemgetter("context")
    }
)

In [27]:
question = "How do we define the riskiness of AI?"

result = retrieval_augmented_qa_chain.invoke({"question" : question})

pprint.pp([result])

[{'response': 'The riskiness of AI is defined by the unique and exacerbated '
              'risks associated with the development and use of Generative AI '
              '(GAI). These risks can vary based on characteristics such as '
              'the GAI model or system architecture, training mechanisms, data '
              'types used, and the context of application. Organizations may '
              'tailor their measurement of GAI risks according to these '
              'characteristics and allocate risk management resources based on '
              'the severity and likelihood of negative impacts. Risks can be '
              'categorized into technical/model risks, misuse by humans, and '
              'ecosystem/societal risks, with some risks being cross-cutting '
              'between these categories. Additionally, some GAI risks are '
              'unknown or difficult to estimate due to the complexity and '
              'scale of GAI systems, as well as challenges i

In [29]:
# Reload the documents for testing purposes
from langchain.document_loaders import PyPDFLoader
from qdrant_client import QdrantClient
from qdrant_client.http.models import Distance, VectorParams
from langchain_community.vectorstores import Qdrant
from langchain_openai import OpenAIEmbeddings

# Load the PDFs
test_loader1 = PyPDFLoader("/Users/joenoss/Downloads/Blueprint-for-an-AI-Bill-of-Rights.pdf")
test_loader2 = PyPDFLoader("/Users/joenoss/Downloads/NIST.AI.600-1.pdf")
test_pages1 = test_loader1.load_and_split()
test_pages2 = test_loader2.load_and_split()

# Combine pages from both PDFs
test_all_pages = test_pages1 + test_pages2

# Create the Qdrant client with in-memory storage for testing
test_qdrant_client = QdrantClient(":memory:")

# Create the collection for testing
test_qdrant_client.create_collection(
    collection_name="test_ai_documents",
    vectors_config=VectorParams(size=1536, distance=Distance.COSINE)
)

# Create the OpenAI embeddings instance
test_embedding_model = OpenAIEmbeddings()

# Create the vector store for testing
test_qdrant_vector_store = Qdrant(
    client=test_qdrant_client,
    collection_name="test_ai_documents",
    embeddings=test_embedding_model,
)

# Add documents to the test vector store
test_qdrant_vector_store.add_documents(test_all_pages)

# Create a new retriever for testing
test_retriever = test_qdrant_vector_store.as_retriever(
    search_type="similarity",
    search_kwargs={"k": 2},
    score_threshold=0.7
)

print(f"Loaded {len(test_all_pages)} test document pages")



Loaded 155 test document pages


In [32]:
!pip install -U -q --user langchain langchain-openai langchain_core langchain-community langchainhub openai langchain-qdrant qdrant-client pymupdf pandas ragas

[0m[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
ragas 0.1.20 requires langchain-core<0.3, but you have langchain-core 0.3.6 which is incompatible.[0m[31m
[0m