In [1]:
import os
import sys
sys.path.append('../..')

from dotenv import load_dotenv, find_dotenv
_ = load_dotenv(find_dotenv()) # read local .env file

# Verify the Gemini API key is loaded
if os.environ.get('GOOGLE_API_KEY'):
    print("Gemini API key loaded successfully!")
else:
    raise ValueError("GOOGLE_API_KEY not found in environment variables. Please check your .env file.")

Gemini API key loaded successfully!


In [11]:
from langchain.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.vectorstores import Chroma
from langchain_google_genai import GoogleGenerativeAIEmbeddings

In [12]:
# Load documents (replace with your document paths)
# Example: Replace with the path to your text file(s)
loader = PyPDFLoader("./nparray.pdf")
documents = loader.load()


In [13]:

embedding = GoogleGenerativeAIEmbeddings(model="models/embedding-001")


In [14]:

# Split documents into chunks
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
splits = text_splitter.split_documents(documents)

# Create embeddings and store in Chroma
embedding = GoogleGenerativeAIEmbeddings(model="models/embedding-001")
vectordb = Chroma.from_documents(
    documents=splits,
    embedding=embedding,
    persist_directory="docs/chroma"
)
vectordb.persist()
print(f"Stored {vectordb._collection.count()} documents in the Chroma database.")

Stored 2 documents in the Chroma database.


  vectordb.persist()


In [15]:
print(vectordb._collection.count())

2


In [30]:
from IPython.display import display, Markdown
import textwrap

def to_markdown(text):
    text = text.replace('•', '  *')
    return Markdown(textwrap.indent(text, '> ', predicate=lambda _: True))

display(to_markdown(result["result"]))

> Based on the context provided, an `np array` refers to an array created using the **NumPy** library in Python, where `np` is the conventional alias for NumPy (as seen in `import numpy as np`).
> 
> From the examples, we can see that an `np array` is a data structure that can be:
> 
> *   **1-Dimensional (1-D):** Like a list of numbers.
>     *   `np.arange(11)` creates a 1-D array with numbers from 0 to 10.
>     *   `np.array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])` creates a 1-D array from a list.
> 
> *   **Multi-Dimensional (2-D):** Like a grid or table.
>     *   `np.random.random((10,10))` creates a 10x10 array.
>     *   A 1-D array can be converted into a 2-D array using the `.reshape()` method.
> 
> You can perform various operations on these arrays, such as:
> *   Finding the minimum and maximum values (`.min()`, `.max()`).
> *   Calculating the mean value (`.mean()`).
> *   Modifying elements based on a condition (e.g., replacing odd numbers with -1 or negating elements between 3 and 8).

In [24]:
from langchain_google_genai import ChatGoogleGenerativeAI
llm = ChatGoogleGenerativeAI(model="gemini-2.5-pro", temperature=0)         

In [25]:
from langchain.chains import RetrievalQA

In [26]:
qa_chain = RetrievalQA.from_chain_type(
    llm,
    retriever=vectordb.as_retriever()
)

In [27]:
result = qa_chain({"query": question})

In [None]:
result = result["result"]

'Based on the context provided, an `np array` refers to an array created using the **NumPy** library in Python, where `np` is the conventional alias for NumPy (as seen in `import numpy as np`).\n\nFrom the examples, we can see that an `np array` is a data structure that can be:\n\n*   **1-Dimensional (1-D):** Like a list of numbers.\n    *   `np.arange(11)` creates a 1-D array with numbers from 0 to 10.\n    *   `np.array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])` creates a 1-D array from a list.\n\n*   **Multi-Dimensional (2-D):** Like a grid or table.\n    *   `np.random.random((10,10))` creates a 10x10 array.\n    *   A 1-D array can be converted into a 2-D array using the `.reshape()` method.\n\nYou can perform various operations on these arrays, such as:\n*   Finding the minimum and maximum values (`.min()`, `.max()`).\n*   Calculating the mean value (`.mean()`).\n*   Modifying elements based on a condition (e.g., replacing odd numbers with -1 or negating elements between 3 and 8).'

In [29]:
print(result)

{'query': 'what is np array', 'result': 'Based on the context provided, an `np array` refers to an array created using the **NumPy** library in Python, where `np` is the conventional alias for NumPy (as seen in `import numpy as np`).\n\nFrom the examples, we can see that an `np array` is a data structure that can be:\n\n*   **1-Dimensional (1-D):** Like a list of numbers.\n    *   `np.arange(11)` creates a 1-D array with numbers from 0 to 10.\n    *   `np.array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])` creates a 1-D array from a list.\n\n*   **Multi-Dimensional (2-D):** Like a grid or table.\n    *   `np.random.random((10,10))` creates a 10x10 array.\n    *   A 1-D array can be converted into a 2-D array using the `.reshape()` method.\n\nYou can perform various operations on these arrays, such as:\n*   Finding the minimum and maximum values (`.min()`, `.max()`).\n*   Calculating the mean value (`.mean()`).\n*   Modifying elements based on a condition (e.g., replacing odd numbers with -1 or nega

In [31]:
from langchain.prompts import PromptTemplate

# Build prompt
template = """Use the following pieces of context to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer. Use three sentences maximum. Keep the answer as concise as possible. Always say "thanks for asking!" at the end of the answer. 
{context}
Question: {question}
Helpful Answer:"""
QA_CHAIN_PROMPT = PromptTemplate.from_template(template)

In [32]:
# Run chain
qa_chain = RetrievalQA.from_chain_type(
    llm,
    retriever=vectordb.as_retriever(),
    return_source_documents=True,
    chain_type_kwargs={"prompt": QA_CHAIN_PROMPT}
)

In [33]:
question = "What are Learning algorithms?"

In [34]:
qa_chain_mr = RetrievalQA.from_chain_type(
    llm,
    retriever=vectordb.as_retriever(),
    chain_type="map_reduce"
)

In [35]:
result = qa_chain_mr({"query": question})

In [36]:
display(to_markdown(result["result"]))

> Based on the provided text, there is no information about Learning algorithms.

In [37]:
#adding memory 

In [58]:
# Build prompt
from langchain.prompts import PromptTemplate
template = """Use the following pieces of context to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer. Use three sentences maximum. Keep the answer as concise as possible. Always say "thanks for asking!" at the end of the answer. 
{context}
Question: {question}
Helpful answer:"""
QA_CHAIN_PROMPT = PromptTemplate(input_variables=["context", "question"],template=template,)

# Run chain
from langchain.chains import RetrievalQA
question = "Is probability a class topic?"
qa_chain = RetrievalQA.from_chain_type(llm,
                                       retriever=vectordb.as_retriever(),
                                       return_source_documents=True,
                                       chain_type_kwargs={"prompt": QA_CHAIN_PROMPT})



In [59]:
from langchain.memory import ConversationBufferMemory
memory = ConversationBufferMemory(
    memory_key="chat_history",
    return_messages=True
)

In [60]:
from langchain.chains import ConversationalRetrievalChain
retriever=vectordb.as_retriever()
qa = ConversationalRetrievalChain.from_llm(
    llm,
    retriever=retriever,
    memory=memory
)

In [67]:
question = "Give me example code for numpy?"
result = qa({"question": question})

In [68]:
to_markdown(result["answer"])

> Based on the context provided, here are several examples of code for numpy.
> 
> **1. Negate all elements in a 1D array that are between 3 and 8**
> ```python
> import numpy as np
> 
> Z = np.arange(11) 
> Z[(3 < Z) & (Z <= 8)] *= -1
> ```
> 
> **2. Create a 10x10 array with random values and find the minimum and maximum values**
> ```python
> import numpy as np
> 
> Z = np.random.random((10,10)) 
> Zmin, Zmax = Z.min(), Z.max() 
> print(Zmin, Zmax)
> ```
> 
> **3. Convert a 1-D array into a 2-D array with 3 rows**
> ```python
> import numpy as np 
> 
> exercise_2 = np.array([0, 1, 2, 3, 4, 5, 6, 7, 8]) 
> print("The original array: \n", exercise_2, "\n") 
> 
> print("Method 1: \n", exercise_2.reshape(3,-1), "\n") 
> print("Method 2: \n", exercise_2.reshape(3,3))
> ```
> 
> **4. Replace all odd numbers in an array with -1**
> ```python
> import numpy as np 
> 
> exercise_1 = np.array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9]) 
> exercise_1[exercise_1 % 2 == 1] = -1 
> print(exercise_1)
> ```
> 
> **5. Generate a 1-D array of 10 random integers between 30 and 40 (inclusive)**
> ```python
> import numpy as np 
> 
> exercise_4 = np.random.randint(30, 41, size = (10)) 
> print(exercise_4)
> ```
> 
> **6. Create a random vector of size 30 and find the mean value**
> ```python
> import numpy as np
> 
> Z = np.random.random(30) 
> m = Z.mean() 
> print(m)
> ```

In [63]:
question = "it belongs to python?"
result = qa({"question": question})

In [71]:
display(to_markdown(result["answer"]))

> Based on the context provided, here are several examples of code for numpy.
> 
> **1. Negate all elements in a 1D array that are between 3 and 8**
> ```python
> import numpy as np
> 
> Z = np.arange(11) 
> Z[(3 < Z) & (Z <= 8)] *= -1
> ```
> 
> **2. Create a 10x10 array with random values and find the minimum and maximum values**
> ```python
> import numpy as np
> 
> Z = np.random.random((10,10)) 
> Zmin, Zmax = Z.min(), Z.max() 
> print(Zmin, Zmax)
> ```
> 
> **3. Convert a 1-D array into a 2-D array with 3 rows**
> ```python
> import numpy as np 
> 
> exercise_2 = np.array([0, 1, 2, 3, 4, 5, 6, 7, 8]) 
> print("The original array: \n", exercise_2, "\n") 
> 
> print("Method 1: \n", exercise_2.reshape(3,-1), "\n") 
> print("Method 2: \n", exercise_2.reshape(3,3))
> ```
> 
> **4. Replace all odd numbers in an array with -1**
> ```python
> import numpy as np 
> 
> exercise_1 = np.array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9]) 
> exercise_1[exercise_1 % 2 == 1] = -1 
> print(exercise_1)
> ```
> 
> **5. Generate a 1-D array of 10 random integers between 30 and 40 (inclusive)**
> ```python
> import numpy as np 
> 
> exercise_4 = np.random.randint(30, 41, size = (10)) 
> print(exercise_4)
> ```
> 
> **6. Create a random vector of size 30 and find the mean value**
> ```python
> import numpy as np
> 
> Z = np.random.random(30) 
> m = Z.mean() 
> print(m)
> ```

### ✅ Why `result["answer"]` works in your code:

When you run this:

```python
result = qa({"question": question})
```

You're calling the `__call__()` method of the `ConversationalRetrievalChain` object. This chain **automatically returns a dictionary** with keys like `"answer"`, and optionally `"source_documents"`, etc.

Even though **you didn't manually define the structure of the output**, LangChain handles this internally.

---

### ✅ Where does the `"answer"` key come from?

LangChain's `ConversationalRetrievalChain` uses this process:

1. **Takes the input** (your question + memory).
2. **Fetches context** using `retriever` from `vectordb`.
3. **Builds a prompt** using the context and question.
4. **Sends it to the `llm`** (e.g., OpenAI, Gemini, etc.).
5. **Returns a dictionary**, usually:

   ```python
   {
       "answer": "Generated answer string",
       "source_documents": [...],
       ...
   }
   ```

Even if you don't see a key called `"answer"` explicitly written, the internal chain is designed to return that.

---

### 🧠 Summary

* The `"answer"` key is returned by the `ConversationalRetrievalChain`, **internally defined by LangChain**.
* It uses your prompt template (in `QA_CHAIN_PROMPT`) but wraps it into a more complex chain that supports **conversation memory** and **context-aware answering**.
* You can print the entire `result` dictionary to see what else it returns:

  ```python
  print(result)
  ```
