In [7]:
! pip install --upgrade 'deeplake[enterprise]'

Collecting deeplake[enterprise]
  Downloading deeplake-3.8.19.tar.gz (585 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m585.8/585.8 kB[0m [31m225.4 kB/s[0m eta [36m0:00:00[0ma [36m0:00:01[0m
[?25h  Installing build dependencies ... [?25ldone
[?25h  Getting requirements to build wheel ... [?25ldone
[?25h  Preparing metadata (pyproject.toml) ... [?25ldone
Collecting lz4 (from deeplake[enterprise])
  Downloading lz4-4.3.3-cp311-cp311-macosx_10_9_x86_64.whl.metadata (3.7 kB)
Collecting libdeeplake==0.0.99 (from deeplake[enterprise])
  Downloading libdeeplake-0.0.99-cp311-cp311-macosx_10_12_x86_64.whl.metadata (346 bytes)
Downloading libdeeplake-0.0.99-cp311-cp311-macosx_10_12_x86_64.whl (11.6 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m11.6/11.6 MB[0m [31m392.3 kB/s[0m eta [36m0:00:00[0m00:01[0m00:02[0m
[?25hDownloading lz4-4.3.3-cp311-cp311-macosx_10_9_x86_64.whl (254 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m 

In [2]:
'''
    Deep Lake provides storage for embeddings and their corresponding metadata in the context of LLM apps. 
    It enables hybrid searches on these embeddings and their attributes for efficient data retrieval. 
    It also integrates with LangChain, facilitating the development and deployment of applications.

    Deep Lake provides several advantages over the typical vector store:
    1. It’s multimodal, which means that it can be used to store items of diverse modalities, such as texts, images, audio, and video, along with their vector representations. 
    2. It’s serverless, which means that we can create and manage cloud datasets without creating and managing a database instance. This aspect gives a great speedup to new projects.
    3. Last, it’s possible to easily create a data loader out of the data loaded into a Deep Lake dataset. 
       It is convenient for fine-tuning machine learning models using common frameworks like PyTorch and TensorFlow.

'''

from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.vectorstores.deeplake import DeepLake
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.llms import OpenAI
from langchain.chains import RetrievalQA

from langchain.agents import initialize_agent, Tool
from langchain.agents import AgentType


In [3]:
# instantiate the LLM and embeddings models
llm = OpenAI(model="gpt-3.5-turbo-instruct", temperature=0)
embeddings = OpenAIEmbeddings(model="text-embedding-ada-002")

In [4]:
# create our documents
texts = [
    "Napoleon Bonaparte was born in 15 August 1769",
    "Louis XIV was born in 5 September 1638"
]
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
docs = text_splitter.create_documents(texts)


In [5]:
my_activeloop_org_id = "srishtysuman2919" 
my_activeloop_dataset_name = "langchain_course_from_zero_to_hero"
dataset_path = "https:/app.activeloop.ai/srishtysuman2919/langchain_course_from_zero_to_hero"
db = DeepLake(dataset_path=dataset_path, embedding_function=embeddings)

# # add documents to our Deep Lake dataset
db.add_documents(docs)


Using embedding function is deprecated and will be removed in the future. Please use embedding instead.
Creating 2 embeddings in 1 batches of size 2:: 100%|██████████| 1/1 [00:10<00:00, 10.89s/it]

Dataset(path='https:/app.activeloop.ai/srishtysuman2919/langchain_course_from_zero_to_hero', tensors=['text', 'metadata', 'embedding', 'id'])

  tensor      htype      shape     dtype  compression
  -------    -------    -------   -------  ------- 
   text       text      (2, 1)      str     None   
 metadata     json      (2, 1)      str     None   
 embedding  embedding  (2, 1536)  float32   None   
    id        text      (2, 1)      str     None   





['7c74a188-c65b-11ee-9b1a-acde48001122',
 '7c74a25a-c65b-11ee-9b1a-acde48001122']

In [6]:
# let's create a RetrievalQA chain
retrieval_qa = RetrievalQA.from_chain_type(
	llm=llm,
	chain_type="stuff",
	retriever=db.as_retriever()
)

In [7]:
# Next, let's create an agent that uses the RetrievalQA chain as a tool
tools = [
    Tool(
        name="Retrieval QA System",
        func=retrieval_qa.run,
        description="Useful for answering questions."
    ),
]

In [8]:
agent = initialize_agent(
	tools,
	llm,
	agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION,
	verbose=True,
    max_iterations=6
)

In [9]:
# Finally, we can use the agent to ask a question:
response = agent.run("When was Napoleone born?")
print(response)



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m I should use the Retrieval QA System to answer this question
Action: Retrieval QA System
Action Input: When was Napoleone born?[0m
Observation: [36;1m[1;3m
Napoleon Bonaparte was born in 15 August 1769.[0m
Thought:[32;1m[1;3m I now know the final answer
Final Answer: Napoleon Bonaparte was born in 15 August 1769.[0m

[1m> Finished chain.[0m
Napoleon Bonaparte was born in 15 August 1769.


In [10]:
from dotenv import load_dotenv
load_dotenv('/Users/srishtysuman/.env')
print(load_dotenv('/Users/srishtysuman/.env'))

True


In [11]:
# We should see something similar to the following printed output. 
# Here, the agent used the “Retrieval QA System” tool with the query “When was Napoleone born?” 
# which is then run on our new Deep Lake dataset, returning the most similar document 
# (i.e., the document containing the date of birth of Napoleon). This document is eventually used to generate the final output.



# Let’s add an example of reloading an existing vector store and adding more data
# load the existing Deep Lake dataset and specify the embedding function
db = DeepLake(dataset_path=dataset_path, embedding_function=embeddings)


Using embedding function is deprecated and will be removed in the future. Please use embedding instead.


Deep Lake Dataset in https:/app.activeloop.ai/srishtysuman2919/langchain_course_from_zero_to_hero already exists, loading from the storage


In [12]:
# create new documents
texts = [
    "Lady Gaga was born in 28 March 1986",
    "Michael Jeffrey Jordan was born in 17 February 1963"
]
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
docs = text_splitter.create_documents(texts)

# add documents to our Deep Lake dataset
db.add_documents(docs)


Creating 2 embeddings in 1 batches of size 2:: 100%|██████████| 1/1 [00:00<00:00,  1.02it/s]

Dataset(path='https:/app.activeloop.ai/srishtysuman2919/langchain_course_from_zero_to_hero', tensors=['embedding', 'id', 'metadata', 'text'])

  tensor      htype      shape     dtype  compression
  -------    -------    -------   -------  ------- 
 embedding  embedding  (4, 1536)  float32   None   
    id        text      (4, 1)      str     None   
 metadata     json      (4, 1)      str     None   
   text       text      (4, 1)      str     None   





['c894c610-c65b-11ee-9b1a-acde48001122',
 'c894c6ec-c65b-11ee-9b1a-acde48001122']

In [13]:
# We then recreate our previous agent and ask a question that can be answered only by the last documents added
# instantiate the wrapper class for GPT3
llm = OpenAI(model="gpt-3.5-turbo-instruct", temperature=0)


In [14]:
# create a retriever from the db
retrieval_qa = RetrievalQA.from_chain_type(
	llm=llm, chain_type="stuff", retriever=db.as_retriever()
)

# instantiate a tool that uses the retriever
tools = [
    Tool(
        name="Retrieval QA System",
        func=retrieval_qa.run,
        description="Useful for answering questions."
    ),
]

# create an agent that uses the tool
agent = initialize_agent(
	tools,
	llm,
	agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION,
	verbose=True
)

# Let’s now test our agent with a new question
response = agent.run("When was Michael Jordan born?")
print(response)

# The LLM successfully retrieves accurate information by using the power of Deep Lake as a vector store and the OpenAI language model.



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m I should use the Retrieval QA System to find the answer.
Action: Retrieval QA System
Action Input: "When was Michael Jordan born?"[0m
Observation: [36;1m[1;3m Michael Jordan was born on 17 February 1963.[0m
Thought:[32;1m[1;3m I now know the final answer.
Final Answer: Michael Jordan was born on 17 February 1963.[0m

[1m> Finished chain.[0m
Michael Jordan was born on 17 February 1963.
