In [1]:
# importing necessary dependencies
! pip install chromadb langchain openai

Collecting chromadb
  Downloading chromadb-0.5.23-py3-none-any.whl.metadata (6.8 kB)
Collecting build>=1.0.3 (from chromadb)
  Downloading build-1.2.2.post1-py3-none-any.whl.metadata (6.5 kB)
Collecting chroma-hnswlib==0.7.6 (from chromadb)
  Downloading chroma_hnswlib-0.7.6-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (252 bytes)
Collecting fastapi>=0.95.2 (from chromadb)
  Downloading fastapi-0.115.6-py3-none-any.whl.metadata (27 kB)
Collecting uvicorn>=0.18.3 (from uvicorn[standard]>=0.18.3->chromadb)
  Downloading uvicorn-0.32.1-py3-none-any.whl.metadata (6.6 kB)
Collecting posthog>=2.4.0 (from chromadb)
  Downloading posthog-3.7.4-py2.py3-none-any.whl.metadata (2.0 kB)
Collecting onnxruntime>=1.14.1 (from chromadb)
  Downloading onnxruntime-1.20.1-cp310-cp310-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl.metadata (4.5 kB)
Collecting opentelemetry-exporter-otlp-proto-grpc>=1.2.0 (from chromadb)
  Downloading opentelemetry_exporter_otlp_proto_grpc-1.28.2-py3

In [14]:
%%capture
!pip install openai==1.55.3 httpx==0.27.2 --force-reinstall --quiet

In [15]:
from openai import OpenAI
from langchain_text_splitters import RecursiveCharacterTextSplitter
import chromadb
from google.colab import userdata
from IPython.display import Markdown, display, update_display

In [16]:
texts = [
    {'title': 'Batman Begins','file': 'Batman Begins.txt'},
    {'title': 'The Dark Knight','file': 'The Dark Knight.txt'},
    {'title': 'The Dark Knight Rises','file': 'The Dark Knight Rises.txt'}
]

In [17]:
client = chromadb.PersistentClient('./vector_db')


# chromadb.PersistentClient allows you to create a client that stores info in the database even for mulitple notebook refershes

collection = client.get_or_create_collection(
    name='Batman_RAG_agent'
)

In [18]:
text_splitter = RecursiveCharacterTextSplitter(
    separators = ['\n','\n\n','.','!','?'],
    chunk_size = 1000,
    chunk_overlap = 300
)

In [10]:
for idx, text in enumerate(texts):
  with open(text['file'],'r') as file: # text['file'] gives you access to the file name of each batman movie text
    content = file.read()
  text['chunks'] = text_splitter.create_documents([content])

  for chunk_idx, chunk in enumerate(text['chunks']):
    collection.add(
        documents=[chunk.page_content],
        ids=[f'{chunk_idx}--{idx}'],
        metadatas=[{'title':text['title'],'chunk_idx': chunk_idx}]
    )


/root/.cache/chroma/onnx_models/all-MiniLM-L6-v2/onnx.tar.gz: 100%|██████████| 79.3M/79.3M [00:01<00:00, 50.0MiB/s]


In [None]:
collection.peek()

{'ids': ['0--0', '1--0', '0--1', '1--1', '0--2', '1--2', '2--2'],
 'embeddings': array([[-0.05867662,  0.01271318, -0.08357023, ...,  0.01704335,
          0.04836129,  0.06133787],
        [ 0.0320247 ,  0.04294771, -0.11116017, ...,  0.05619197,
          0.04352525, -0.00279528],
        [-0.00021628, -0.02464094, -0.09224912, ...,  0.01816081,
          0.04613294, -0.02756295],
        ...,
        [ 0.01141753, -0.02377283, -0.07031257, ..., -0.04812436,
         -0.0276113 ,  0.07476775],
        [ 0.01792043,  0.05597024, -0.10335071, ...,  0.01165169,
          0.02038735,  0.00683545],
        [-0.01217323, -0.00039762, -0.05953692, ..., -0.0174361 ,
          0.01967476,  0.02806446]]),
 'documents': ["\ufeffBatman Begins (2005)\nPlot Overview: In Batman Begins, we witness the origins of Gotham's masked vigilante, Batman. The film begins with Bruce Wayne, a billionaire haunted by his parents' murder, leaving Gotham to seek purpose and redemption. After encountering Ducard, a

In [11]:
def formatted_prompt(user_prompt,result_str):
  return f'''
  Instructions:
  You are supposed to answer question asked by the user stated under the user query section.
  Use the search results from a database provided under the search results section to answer the
  query of the user. If you cannot answer the question based on the search results provided
  just say that you do not know the answer

  User query:
  {user_prompt}

  Search results:
  {result_str}

  Agent Response:
  '''

In [19]:
# creating the ai client
ai_client = OpenAI(api_key=userdata.get('rag_key'))

TypeError: Client.__init__() got an unexpected keyword argument 'proxies'

In [None]:
def get_chat_completion(user_prompt, system_prompt, model="gpt-4"):
    completion = ai_client.chat.completions.create(
        model=model,
        messages=[{"role": "system", "content": system_prompt},{"role": "user", "content": user_prompt}],
    )
    return completion.choices[0].message.content



system_prompt = "You are a helpful RAG search assistant who uses results from a search engine to answer user queries."

In [None]:
def get_RAG_completion(query,n_results=3):
  search_results = collection.query(
      query_texts=[query],
      n_results = n_results
  )
  result_str = ''
  for result in search_results['documents'][0]:
    result_str += result

  print('\n**** Modified RAG prompt ****\n')
  modified_prompt = formatted_prompt(query,result_str)
  print(modified_prompt)
  print('\n********\n')
  return get_chat_completion(user_prompt = modified_prompt,system_prompt=system_prompt,model='gpt-4')

In [None]:
get_RAG_completion(query='What is the role of Joker in the dark knight',n_results=3)


**** Modified RAG prompt ****


  Instructions:
  You are supposed to answer question asked by the user stated under the user query section.
  Use the search results from a database provided under the search results section to answer the
  query of the user. If you cannot answer the question based on the search results provided
  just say that you do not know the answer

  User query:
  What is the role of Joker in the dark knight

  Search results:
  Themes: The movie explores the thin line between order and chaos, justice and revenge, and how far individuals will go to achieve their ideals. Batman is forced to confront the limits of his code against killing, while the Joker represents the destructive power of moral nihilism. The film also deals with the corruptibility of humanity, showing how even the purest individuals can be pushed to the dark side.
Significance: The Dark Knight is acclaimed for its dark, philosophical undertones and for Heath Ledger’s iconic portrayal of the Joke

'In "The Dark Knight", the Joker, portrayed by Heath Ledger, is Batman\'s most chaotic and cunning adversary. He emerges with a mission to unleash anarchy in Gotham. His acts of terror push Batman, District Attorney Harvey Dent, and Police Commissioner Jim Gordon, into a fragile alliance. The Joker systematically manipulates them, challenging their morality and pushing each to the limits of their moral boundaries. He represents the destructive power of moral nihilism and embodies chaos and unpredictability. Furthermore, he orchestrates a series of horrific events including Harvey Dent’s tragic transformation into the vengeful Two-Face.'