### **Setup**

In [1]:
! pip install --quiet llama-index pymongo llama-index-vector-stores-mongodb

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m670.0/670.0 kB[0m [31m5.1 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m15.4/15.4 MB[0m [31m30.2 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.0/2.0 MB[0m [31m47.7 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m307.7/307.7 kB[0m [31m20.5 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m320.6/320.6 kB[0m [31m17.4 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m75.6/75.6 kB[0m [31m4.5 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m141.9/141.9 kB[0m [31m6.2 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.1/1.1 MB[0m [31m31.1 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━

In [18]:
import os
from google.colab import userdata

os.environ["OPENAI_API_KEY"] = userdata.get("openai-api-key-2")
MONGO_URI = userdata.get("MONGO_URI")

### **OpenAI Embeddings**

In [19]:
from llama_index.embeddings.openai import OpenAIEmbedding

embed_model = OpenAIEmbedding(
    model="text-embedding-3-small",
    embed_batch_size=16,
)

### **MongoDB Atlas Vector Store**

In [20]:
import pymongo
from llama_index.vector_stores.mongodb import MongoDBAtlasVectorSearch
from llama_index.core.indices import VectorStoreIndex

mongodb_client = pymongo.MongoClient(MONGO_URI)

In [21]:
store = MongoDBAtlasVectorSearch(
    mongodb_client=mongodb_client,
    db_name="oppenheimer",
    collection_name="oppenheimer_wiki_chunks",
    index_name="vector_index",
    embedding_key="embedding",
  )

### **Loading Index**

In [22]:
from llama_index.core import VectorStoreIndex

index_loaded = VectorStoreIndex.from_vector_store(
    vector_store=store,
    embed_model=embed_model
)

### **Query Engine**

In [23]:
from llama_index.llms.openai import OpenAI

llm = OpenAI(
    model="gpt-3.5-turbo-0125",
    temperature=0,
    max_tokens=512
)

In [24]:
query_engine = index_loaded.as_query_engine(llm=llm, streaming=True, similarity_top_k=3)

In [25]:
streaming_response = query_engine.query("What's the name of the actor that played Lewis Strauss?")
for token in streaming_response.response_gen:
  print(token, end="")

Robert Downey Jr.

### **Gradio Demo**

In [26]:
! pip install --quiet gradio

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m12.3/12.3 MB[0m [31m82.8 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m92.0/92.0 kB[0m [31m10.3 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m315.9/315.9 kB[0m [31m29.8 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m142.5/142.5 kB[0m [31m14.2 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m8.7/8.7 MB[0m [31m88.5 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m47.2/47.2 kB[0m [31m6.2 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m60.8/60.8 kB[0m [31m7.3 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m129.9/129.9 kB[0m [31m16.4 M

In [27]:
import gradio as gr

# Generates response using the question answering chain defined earlier
def generate(query):

  response = ""
  streaming_response = query_engine.query(query)
  for token in streaming_response.response_gen:
    response += token
    yield response

with gr.Blocks() as demo:
  gr.Markdown("""
  # Retrieval Augmented Generation with GPT 3.5 Turbo and MongoDB Atlas Vector Search: Question Answering demo
  ### This demo uses the GPT 3.5 Turbo LLM and MongoDB Atlas Vector Search for fast and performant Retrieval Augmented Generation (RAG).
  ### The context is the new Oppenheimer movie's entire wikipedia page. The movie came out very recently in July, 2023, so the GPT 3.5 turbo model is not aware of it.
  Retrieval Augmented Generation (RAG) enables us to retrieve just the few small chunks of the document that are relevant to the our query and inject it into our prompt.
  The model is then able to answer questions by incorporating knowledge from the newly provided document. RAG can be used with thousands of documents, but this demo is limited to just one txt file.
  """)

  gr.Markdown("## Enter your question")
  with gr.Row():
    with gr.Column():
      ques = gr.Textbox(label="Question", placeholder="Enter text here", lines=2)
    with gr.Column():
      ans = gr.Textbox(label="Answer", lines=4, interactive=False)
  with gr.Row():
    with gr.Column():
      btn = gr.Button("Submit")
    with gr.Column():
      clear = gr.ClearButton([ques, ans])

  btn.click(fn=generate, inputs=[ques], outputs=[ans])
  examples = gr.Examples(
        examples=[
            "Who portrayed J. Robert Oppenheimer in the new Oppenheimer movie?",
            "In the plot of the movie, why did Lewis Strauss resent Robert Oppenheimer?",
            "What happened while Oppenheimer was a student at the University of Cambridge?",
            "How much money did the Oppenheimer movie make at the US and global box office?",
            "What score did the Oppenheimer movie get on Rotten Tomatoes and Metacritic?"
        ],
        inputs=[ques],
    )

demo.queue().launch()

Setting queue=True in a Colab notebook requires sharing enabled. Setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
Running on public URL: https://515420f82297228991.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from Terminal to deploy to Spaces (https://huggingface.co/spaces)




### **More Queries**

In [28]:
streaming_response = query_engine.query("Which character did Matthias Schweighöfer play in the movie?")
streaming_response.print_response_stream()

Werner Heisenberg

In [29]:
streaming_response = query_engine.query("Who is Boris Pash?")
streaming_response.print_response_stream()

Boris Pash is a U.S. Army military intelligence officer and commander of the Alsos Mission.

In [30]:
streaming_response = query_engine.query("Why did Lewis Strauss have a grudge against J. Robert Oppenheimer?")
streaming_response.print_response_stream()

Lewis Strauss had a grudge against J. Robert Oppenheimer because Oppenheimer publicly humiliated him by dismissing his concerns about exporting radioisotopes and recommending negotiations with the Soviet Union after they successfully detonated their own bomb. Additionally, Strauss believed that Oppenheimer denigrated him during a conversation Oppenheimer had with Einstein in 1947.

In [None]:
streaming_response = query_engine.query("What happened while Oppenheimer was a student at Cambridge?")
streaming_response.print_response_stream()

Oppenheimer grappled with anxiety and homesickness while studying under experimental physicist Patrick Blackett at the Cavendish Laboratory in the University of Cambridge. Upset with Blackett's attitude, Oppenheimer left him a poisoned apple but later retrieved it. Niels Bohr recommended that Oppenheimer study theoretical physics at the University of Göttingen.

In [None]:
sources = streaming_response.source_nodes
for node in sources:
  print(node.text)
  print("\n\n------------------------------------------\n\n")

Its simultaneous release with Warner Bros.'s Barbie led to the Barbenheimer cultural phenomenon, which encouraged audiences to see both films as a double feature. The film grossed over $955 million worldwide, becoming the third-highest-grossing film of 2023, the highest-grossing World War II-related film, the highest-grossing biographical film, and the second-highest-grossing R-rated film. It received critical acclaim and numerous accolades, including five Golden Globe Awards, eight Critic's Choice Awards and received 13 British Academy Film Awards, including Best Film and was named one of the top-ten films of 2023 by the National Board of Review and the American Film Institute.

Plot
In 1926, 22-year-old doctoral student J. Robert Oppenheimer grapples with anxiety and homesickness while studying under experimental physicist Patrick Blackett at the Cavendish Laboratory in the University of Cambridge. Upset with Blackett's attitude, Oppenheimer leaves him a poisoned apple but later retr