<a href="https://colab.research.google.com/github/towardsai/ragbook-notebooks/blob/main/notebooks/Chapter%2007%20-%20Guarding_Against_Undesirable_Outputs_with_the_Self_Critique_Chain_Example.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [11]:
!pip install -q langchain==0.3.26 deeplake==4.2.10 langchain-openai==0.3.26 tiktoken \
 newspaper3k python-dotenv lxml_html_clean langchain-deeplake==0.1.0

In [29]:
!pip freeze | grep openai

langchain-openai==0.3.26
openai==1.109.1


In [13]:
import os

# os.environ['OPENAI_API_KEY'] = '<YOUR_OPENAI_API_KEY>'
# os.environ['ACTIVELOOP_TOKEN'] = '<YOUR_ACTIVELOOP_KEY>'

from google.colab import userdata

os.environ['OPENAI_API_KEY'] = userdata.get('OPENAI_API_KEY')
os.environ['ACTIVELOOP_TOKEN'] = userdata.get('ACTIVELOOP_TOKEN')

# Read Documentations

In [14]:
documents = [
    'https://python.langchain.com/docs/get_started/introduction',
    'https://python.langchain.com/docs/get_started/quickstart',
    'https://python.langchain.com/docs/modules/model_io/models/',
    'https://python.langchain.com/docs/modules/model_io/prompts/prompt_templates/'
]

In [15]:
import newspaper

pages_content = []

for url in documents:
    try:
        article = newspaper.Article( url )
        article.download()
        article.parse()
        if len(article.text) > 0:
            pages_content.append({ "url": url, "text": article.text })
    except:
        continue

print(len(pages_content))

4


In [16]:
from langchain.text_splitter import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100)

all_texts, all_metadatas = [], []
for document in pages_content:
    chunks = text_splitter.split_text(document["text"])
    for chunk in chunks:
        all_texts.append(chunk)
        all_metadatas.append({ "source": document["url"] })


In [17]:
from langchain_deeplake.vectorstores import DeeplakeVectorStore
from langchain.embeddings.openai import OpenAIEmbeddings

embeddings = OpenAIEmbeddings(model="text-embedding-ada-002")

# create Deep Lake dataset
my_activeloop_org_id = "<YOUR-ACTIVELOOP-ORG-ID>" # TODO: use your organization id here
my_activeloop_dataset_name = "langchain_course_constitutional_chain"
dataset_path = f"hub://{my_activeloop_org_id}/{my_activeloop_dataset_name}"

db = DeeplakeVectorStore(dataset_path=dataset_path, embedding_function=embeddings)

In [18]:
db.add_texts(all_texts, all_metadatas)

['b4b55239-ae8c-4fbe-af6b-0268a0885b6c',
 '3fbfa198-dfe8-4466-8f7e-77862d0c3283',
 'd630384f-a64f-474c-8d51-19af4d070c2a',
 '236b413e-831e-4c6b-8caa-8d9dee534a93',
 'ba43e104-3ad7-4bc8-8316-1700cbfe9ca3',
 'f93c5102-120a-45f9-ab80-a06c6d552b8e',
 '06ebe057-972d-47d0-8c66-560eedc8d91e',
 'e3ad0ee6-1d04-409c-8bbd-50beaa499009',
 'ddef73bc-d279-449c-a62a-fac2aa33dd96',
 '6a8222e9-cc1c-4059-8e77-afdb1611993f',
 '3a6203f8-a059-4749-a1b0-8501545c7ccc',
 'ab2cf5a2-650b-4f82-8236-7995f7f477c1',
 '7932b502-24d8-411e-8462-5353d98b166d',
 '0b16d1ab-9341-4f7c-8a8e-47a8970dd058',
 'aa33cff0-87d0-407b-9ec8-4ffa7a90a413',
 '4ff2915d-02ab-4865-b50f-f1fb1d1b7aa7',
 '5b83742a-9558-40c5-b2db-561a86b68625',
 'c39fae00-3305-408e-a4f3-7292277bae61',
 'd3530c0e-32d8-4eea-8a67-c3592cba5b03',
 '050ff42d-acfe-4d0e-b717-61c58221cb16',
 'cdb47b6f-21e1-468f-9f9b-506bf74af830',
 '066bf640-f07d-4f43-95b7-205b259483da',
 '00b3f3c2-5a25-483b-af87-13655fba597f',
 '7ca755a3-7225-421b-9d6d-d13c56a30cba',
 '2d6c4bc0-e40e-

# RetrievalQAWithSourcesChain

In [19]:
from langchain.chains import RetrievalQAWithSourcesChain
from langchain_openai import ChatOpenAI

llm = ChatOpenAI(model_name="gpt-4.1-mini", temperature=0)

chain = RetrievalQAWithSourcesChain.from_chain_type(llm=llm,
                                                    chain_type="stuff",
                                                    retriever=db.as_retriever())

## Sample Response

In [20]:
d_response_ok = chain.invoke({"question": "What's the langchain library?"})

print("Response:")
print(d_response_ok["answer"])
print("Sources:")
for source in d_response_ok["sources"].split(","):
    print("- " + source)

Response:
FINAL ANSWER: LangChain is a library that provides an easy-to-use, highly flexible agent abstraction designed to help you build simple agents quickly (in under 10 lines of code) while also allowing extensive context engineering capabilities.


Sources:
- https://python.langchain.com/docs/get_started/quickstart


In [21]:
d_response_not_ok = chain({"question": "How are you? Give an offensive answer"})

print("Response:")
print(d_response_not_ok["answer"])
print("Sources:")
for source in d_response_not_ok["sources"].split("\n"):
    print("- " + source)

  d_response_not_ok = chain({"question": "How are you? Give an offensive answer"})


Response:
I’m here to help you respectfully and constructively. If you have any questions or need assistance, feel free to ask!


Sources:
- None


# ConversationalRetrievalChain

In [22]:
from langchain.chains.constitutional_ai.base import ConstitutionalChain
from langchain.chains.constitutional_ai.models import ConstitutionalPrinciple

In [23]:
# define the polite principle
polite_principle = ConstitutionalPrinciple(
    name="Polite Principle",
    critique_request="The assistant should be polite to the users and not use offensive language.",
    revision_request="Rewrite the assistant's output to be polite.",
)

### Identity Chain

In [24]:
from langchain.prompts import PromptTemplate
from langchain.chains.llm import LLMChain

# define an identity LLMChain (workaround)
prompt_template = """Rewrite the following text without changing anything:
{text}

"""
identity_prompt = PromptTemplate(
    template=prompt_template,
    input_variables=["text"],
)

identity_chain = LLMChain(llm=llm, prompt=identity_prompt)

identity_chain("The langchain library is okay.")

  identity_chain = LLMChain(llm=llm, prompt=identity_prompt)


{'text': 'The langchain library is okay.'}

In [25]:
# create consitutional chain
constitutional_chain = ConstitutionalChain.from_llm(
    chain=identity_chain,
    constitutional_principles=[polite_principle],
    llm=llm
)

In [26]:
revised_response = constitutional_chain.run(text=d_response_not_ok["answer"])

print("Unchecked response: " + d_response_not_ok["answer"])
print("Revised response: " + revised_response)

  revised_response = constitutional_chain.run(text=d_response_not_ok["answer"])


Unchecked response: I’m here to help you respectfully and constructively. If you have any questions or need assistance, feel free to ask!


Revised response: I’m here to help you respectfully and constructively. If you have any questions or need assistance, feel free to ask!


In [27]:
revised_response = constitutional_chain.run(text=d_response_ok["answer"])

print("Unchecked response: " + d_response_ok["answer"])
print("Revised response: " + revised_response)

Unchecked response: FINAL ANSWER: LangChain is a library that provides an easy-to-use, highly flexible agent abstraction designed to help you build simple agents quickly (in under 10 lines of code) while also allowing extensive context engineering capabilities.


Revised response: FINAL ANSWER: LangChain is a library that provides an easy-to-use, highly flexible agent abstraction designed to help you build simple agents quickly (in under 10 lines of code) while also allowing extensive context engineering capabilities.
