## Working Test

In [None]:
import os
from dotenv import load_dotenv
from langchain_community.vectorstores import FAISS
from langchain_community.document_loaders import PyPDFLoader
from langchain.chains.question_answering import load_qa_chain
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_google_genai import GoogleGenerativeAIEmbeddings, ChatGoogleGenerativeAI

load_dotenv()

# Loading The PDF File and Splitting it into Pages
loader = PyPDFLoader("2205.15868v1-CogVideo-Large-scale Pretraining for Text-to-Video.pdf")
pages = loader.load_and_split()

# Chunking the Pages into fixed size chunks
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200, length_function=len)
documents = text_splitter.split_documents(pages)

# Converting the documents into embeddings and storing them in a FAISS Vector Store
embedding = GoogleGenerativeAIEmbeddings(model="models/embedding-001")
vectordb = FAISS.from_documents(documents, embedding=embedding)
store_name = loader.source[:-4]

query = ("What is the main idea of the paper? WHat are the math formulas used in this paper")

docs = vectordb.similarity_search(query=query, k=5)
llm = ChatGoogleGenerativeAI(model="gemini-1.5-pro")
chain = load_qa_chain(llm=llm, chain_type="stuff")
response = chain.run(input_documents=docs, question=query)
print(response)

In [None]:
# # CHAINS
# https://python.langchain.com/v0.1/docs/modules/chains/

# # DOCUMENT LOADERS
# https://python.langchain.com/v0.2/docs/integrations/document_loaders/


## Testing Class

In [101]:
import os
import logging
from langchain.chains import RetrievalQA
from langchain_core.documents import Document
from langchain_community.vectorstores import FAISS
from langchain_community.document_loaders import (PyPDFLoader, 
                                                  Docx2txtLoader, 
                                                  UnstructuredExcelLoader, 
                                                  UnstructuredPowerPointLoader,
                                                  TextLoader,
                                                  CSVLoader,
                                                  YoutubeLoader)
from langchain.text_splitter import RecursiveCharacterTextSplitter


files = [
"files/2205.15868v1-CogVideo-Large-scale Pretraining for Text-to-Video.pdf",
'files/IM-Report.docx',
"files/UNIT-2.pptx",
"files/Hperverge OT list final.xlsx",
"files/hey.txt",
"files/test.md",
]
file_path = files[2]
print(file_path)



logging.basicConfig(level=logging.INFO)



class Model:
    def __init__(self, llm, embeddings):
        self.llm = llm
        self.embeddings = embeddings


class ScanDocuments:
    def __init__(self):
        pass
    
    def upload_single_file(self, path):
        if path.endswith(".pdf"):
            loader = PyPDFLoader(path)
        elif path.endswith(".docx"):
            loader = Docx2txtLoader(path)
        elif path.endswith(".xlsx"):
            loader = UnstructuredExcelLoader(path, encoding = 'UTF-8')
        elif path.endswith(".pptx"):
            loader = UnstructuredPowerPointLoader(path, encoding = 'UTF-8')
        elif path.endswith(".txt") or path.endswith(".md"):
            loader = TextLoader(path, encoding = 'UTF-8')
        elif path.endswith(".csv"):
            loader = CSVLoader(path, encoding = 'UTF-8')
        data = loader.load_and_split()
        return data
    
    def upload_url(self, url):
        if "www.youtube.com" in url:
            loader =  YoutubeLoader.from_youtube_url(url,add_video_info=True)
        data = loader.load_and_split()
        return data
        
    def upload_zip_file(self):
        pass
    def process_document(self, data):
        doc_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200, length_function=len)
        documents = doc_splitter.split_documents(data)
        return documents


class VectorStore:
    def __init__(self, model, search_num_docs):
        self.model = model
        self.search_num_docs = search_num_docs
        self.vectordb = self.initialize_vector_store()
        self.retriever = self.vectordb.as_retriever(search_kwargs={"k": self.search_num_docs})
        self.qa_chain = self.setup_qa_chain()

    def initialize_vector_store(self):
        """Initialize or load the FAISS vector database."""
        if not os.path.exists("./faiss_vectordb"):
            logging.info("Creating FAISS Vector Store")
            vectordb = FAISS.from_documents(documents=[Document(page_content="")],
                                            embedding=self.model.embeddings)
            vectordb.save_local("faiss_vectordb")
        else:
            logging.info("Loading FAISS Vector Store")
            vectordb = FAISS.load_local("faiss_vectordb",
                                         embeddings=self.model.embeddings, 
                                         allow_dangerous_deserialization=True)
        return vectordb

    def setup_qa_chain(self):
        """Set up the QA chain using the retriever."""
        return RetrievalQA.from_chain_type(
            llm=self.model.llm,
            chain_type="stuff",
            retriever=self.retriever,
            return_source_documents=True
        )

    def add_documents(self, documents):
        """Add documents to the vector database."""
        self.vectordb.add_documents(documents)
        logging.info(f"Added {len(documents)} documents to the vector store.")

class Search:
    def __init__(self, Model):
        self.model = Model
        self.wikipeadia = None
        self.duckduckgo = None
        self.jinai = None
        self.googleserper = None
        # self.bing = None
        # self.brave = None
        self.base_engine = self.duckduckgo

    def search_base_engine(self, query):
        results = self.base_engine.search(query)
        return results

    def search_multi_engines(self, query):
        wikipedia_results = self.wikipeadia.run(query)
        duckduckgo_results = self.duckduckgo.run(query)
        jina_ai_results = self.jinai.run(query)
        google_serper_results = self.googleserper.run(query)
        
        multi_engine_results = {
            "wikipedia": wikipedia_results,
            "duckduckgo": duckduckgo_results,
            "jina_ai": jina_ai_results,
            "google_serper": google_serper_results
        }
        return multi_engine_results

    def search_best_sites_for_query(self, query):
        prompt = f"""You are a helpful Assistant, You would be Provided a Query by the user, You Have to take the 
        query, extract the main idea behind the query and provide the a sentence which would say:

        Best Sites to Inquire more about query

        For Example:

        User: What is the temperature in Japan?
        Answer: sites to find about weather conditions of countries?:

        Now Create the Reverse Query for the given Query by the user:
        User:{query}
        Answer:
        """
        search_query = self.model.llm.invoke(prompt)
        sites = "https://s.jina.ai/"+search_query
        return sites        

# ------------------------------------------------------------------------------
# ------------------------------------------------------------------------------
# Google Drive: [https://python.langchain.com/v0.2/docs/integrations/document_loaders/google_drive]
# BibTeX: [https://python.langchain.com/v0.2/docs/integrations/document_loaders/bibtex]
# ArxivLoader, []


files/UNIT-2.pptx


<langchain_community.vectorstores.faiss.FAISS at 0x17a80c94260>

In [137]:
searcher = Search(model)
response = searcher.search_best_sites_for_query("Time in Paris").content
response.split("Answer:")[-1].split("\n")[0]

'Answer: sites to find out the current time in different cities/timezones? \n'

## Sample test

In [None]:
url = "https://www.youtube.com/watch?v=0AW6tWTRLeU"
uploader = ScanDocuments()
data = uploader.upload_url(url)
documents = uploader.process_document(data)

# -----------------------------------------------------------------------
# -----------------------------------------------------------------------
embedding = GoogleGenerativeAIEmbeddings(model="models/embedding-001")
llm = ChatGoogleGenerativeAI(model="gemini-1.5-pro")
model = Model(llm, embedding)

# -----------------------------------------------------------------------
# -----------------------------------------------------------------------
vector_store = VectorStore().create_vector_store(model.embeddings)
vector_store.add_documents(documents)
retriever = vector_store.as_retriever()


In [None]:
query = "What does Shayne say about Garlic Naan"

qa_chain = RetrievalQA.from_chain_type(
    llm = model.llm,
    chain_type = "stuff",
    retriever = retriever,
    return_source_documents = True,
)
qa_chain.invoke(query)

In [None]:
from langchain_core.documents import Document

base_description_doc = documents[0]

title = base_description_doc.metadata.get("title")
description = base_description_doc.metadata.get("description")
author = base_description_doc.metadata.get("author")
date = base_description_doc.metadata.get("date")
view_count = base_description_doc.metadata.get("view_count")

summary_doc = Document(
    metadata = base_description_doc.metadata,
    page_content = f"""This is a Youtube video Titled: {title}.
    This video was created by the Channel {author} on {date}. 
    The video has {view_count} views. 
    The Description of the video is: {description}""",
)
summary_doc

new_documents = [summary_doc] + documents
new_documents

## Search

### Jina-ai

In [None]:
import requests
from langchain_core.documents import Document
from langchain_community.vectorstores import FAISS
# -----------------------------------------------------------------------
# -----------------------------------------------------------------------
jina_search_url = "https://s.jina.ai/"
search_query = "Tell me About Stable Diffusion CEO and Founder"
response = requests.get(jina_search_url+search_query)

uploader = ScanDocuments()  
text = Document(metadata={'source': '0AW6tWTRLeU'},page_content=str(response.text))
documents = uploader.process_document([text])
documents
# -----------------------------------------------------------------------
# -----------------------------------------------------------------------
embedding = GoogleGenerativeAIEmbeddings(model="models/embedding-001")
llm = ChatGoogleGenerativeAI(model="gemini-1.5-pro")
model = Model(llm, embedding)

# -----------------------------------------------------------------------
# -----------------------------------------------------------------------
# vectordb = FAISS.from_documents(documents, embedding=model.embeddings)
vectordb.add_documents(documents)
# vector_store = VectorStore().create_vector_store()
# vector_store.add_documents(documents)
retriever = vectordb.as_retriever()
# -----------------------------------------------------------------------
# -----------------------------------------------------------------------
qa_chain = RetrievalQA.from_chain_type(
    llm = model.llm,
    chain_type = "stuff",
    retriever = retriever,
    return_source_documents = True,
)
qa_chain.invoke(search_query)


### DuckDuckGo

In [None]:
from langchain_community.tools import DuckDuckGoSearchRun

search = DuckDuckGoSearchRun(max_results=5)
query = "Tell me About Stable Diffusion CEO and Founder, Who is the new CEO of Stable Diffusion"
results = search.invoke(query)
# -----------------------------------------------------------------------
# -----------------------------------------------------------------------
uploader = ScanDocuments()  
text = Document(metadata={'source': '0AW6tWTRLeU'},page_content=str(results))
documents = uploader.process_document([text])
documents
# -----------------------------------------------------------------------
# -----------------------------------------------------------------------
embedding = GoogleGenerativeAIEmbeddings(model="models/embedding-001")
llm = ChatGoogleGenerativeAI(model="gemini-1.5-pro")
model = Model(llm, embedding)
# -----------------------------------------------------------------------
# -----------------------------------------------------------------------
vectordb.add_documents(documents)
retriever = vectordb.as_retriever()
# -----------------------------------------------------------------------
# -----------------------------------------------------------------------
qa_chain = RetrievalQA.from_chain_type(
    llm = model.llm,
    chain_type = "stuff",
    retriever = retriever,
    return_source_documents = True,
)
qa_chain.invoke(query)


### Wikipedia

In [None]:
from langchain_community.tools import WikipediaQueryRun
from langchain_community.utilities import WikipediaAPIWrapper
wikipedia = WikipediaQueryRun(api_wrapper=WikipediaAPIWrapper())
results = wikipedia.run("Tell me About Stable Diffusion CEO and Founder")
results
# -----------------------------------------------------------------------
# -----------------------------------------------------------------------
uploader = ScanDocuments()  
text = Document(metadata={'source': '0AW6tWTRLeU'},page_content=str(results))
documents = uploader.process_document([text])
documents
# -----------------------------------------------------------------------
# -----------------------------------------------------------------------
embedding = GoogleGenerativeAIEmbeddings(model="models/embedding-001")
llm = ChatGoogleGenerativeAI(model="gemini-1.5-pro")
model = Model(llm, embedding)
# -----------------------------------------------------------------------
# -----------------------------------------------------------------------
vectordb.add_documents(documents)
retriever = vectordb.as_retriever()
# -----------------------------------------------------------------------
# -----------------------------------------------------------------------
query = "Tell me About Stable Diffusion CEO and Founder"
qa_chain = RetrievalQA.from_chain_type(
    llm = model.llm,
    chain_type = "stuff",
    retriever = retriever,
    return_source_documents = True,
)
qa_chain.invoke(query)

### Serper Search

In [None]:
from langchain_community.utilities import GoogleSerperAPIWrapper
from dotenv import load_dotenv

load_dotenv() 

search = GoogleSerperAPIWrapper()
search.run("What are the Trending News in Paris Today")


### Brave

In [None]:
from langchain_community.tools import BraveSearch
tool = BraveSearch.from_api_key(search_kwargs={"count": 3})

### SearchXGN

In [None]:
from langchain_community.utilities import SearxSearchWrapper
searx = SearxSearchWrapper(searx_host="http://localhost:8888",
                                        unsecure=True)
searx.run("What are the Trending News in Paris Today")

In [None]:
from langchain.vectorstores import Chroma
from langchain.chains import RetrievalQA
from langchain_google_genai import GoogleGenerativeAIEmbeddings, ChatGoogleGenerativeAI 

# documents =
llm = ChatGoogleGenerativeAI(model="gemini-1.5-pro")
embedding = GoogleGenerativeAIEmbeddings(model="models/embedding-001")


vectordb = Chroma.from_documents(documents, 
                                 embedding=embedding,
                                 persist_directory="chroma"
                                 )

retriever = vectordb.as_retriever(
    search_type = "similarity",
    search_kwargs = {'k':3})

qa_chain = RetrievalQA.from_chain_type(
    llm = llm,
    chain_type = "stuff",
    retriever = vectordb.as_retriever(),
    # return_source_documents = True,
)

query = "What is video about?"
docs = retriever.get_relevant_documents(query)
qa_chain.invoke(query, documents = docs)

In [None]:
# -----------------------------------------------------------------------------------------
# -----------------------------------------------------------------------------------------
# from langchain.vectorstores import Chroma
# from langchain.chains import RetrievalQA

# vectordb  = FAISS.from_documents(documents, embedding=embedding)
# qa_chain = RetrievalQA.from_chain_type(
#     llm = llm,
#     chain_type = "stuff",
#     retriever = vectordb.as_retriever(),
# )

# query = "What is the main idea of the paper? WHat are the math formulas used in this paper"	
# results = qa_chain.invoke(query)
# vectordb.save_local("vectordb")


# vectordb.delete_collection()
# qa_chain.combine_documents_chain_.llm_chain.prompt.messages[0].prompt.template

In [None]:
from langchain_community.document_loaders import YoutubeLoader

youtube_url = "https://www.youtube.com/watch?v=QsYGlZkevEg"
loader = YoutubeLoader.from_youtube_url(youtube_url,add_video_info=True)
# transcript = loader.load()
# transcript


In [None]:
import nltk
nltk.download('punkt_tab')
nltk.download('averaged_perceptron_tagger_eng')

## Parallel Chains

In [73]:
from langchain_community.utilities import DuckDuckGoSearchAPIWrapper, WikipediaAPIWrapper, GoogleSerperAPIWrapper
from langchain_core.runnables import RunnableParallel, RunnableMap
from dotenv import load_dotenv
from rich import print
# from langchain_openai import ChatOpenAI  # Assuming you want to use this for summarization

load_dotenv() 

# Initialize search wrappers for each search engine
duckduckgo_search = DuckDuckGoSearchAPIWrapper()
wikipedia_search = WikipediaAPIWrapper()
serperapi_search = GoogleSerperAPIWrapper()

# Create a parallel runnable for the searches
search_chain = RunnableParallel(
    duckduckgo=lambda query: duckduckgo_search.run(query),
    wikipedia=lambda query: wikipedia_search.run(query),
    serperapi=lambda query: serperapi_search.run(query),
)

# Initialize the model for summarization
summarization_model = ChatGoogleGenerativeAI(model="gemini-1.5-pro")
# ChatOpenAI(model="gpt-3.5-turbo")  # Adjust as needed

# Define a function to summarize the results
def summarize_results(results):
    prompt = f"""
    You have received the following search results:

    DuckDuckGo: {results['duckduckgo']}
    Wikipedia: {results['wikipedia']}
    Google Serper: {results['serperapi']}

    Please summarize the most accurate information from these results.
    """
    summary = summarization_model.run(prompt)
    return summary

# Combine the search and summarization into a single chain
combined_chain = RunnableMap(
    search_results=search_chain,
    summary=summarize_results
)

# Run the combined chain with a query
query = "What is the capital of France?"
try:
    results = combined_chain.invoke({"query": query})

    # Print the results from each search engine and the summary
    print("DuckDuckGo Results:", results["search_results"]["duckduckgo"])
    print("Wikipedia Results:", results["search_results"]["wikipedia"])
    print("Google Serper Results:", results["search_results"]["serperapi"])
    print("Summary of Most Accurate Information:", results["summary"])
except Exception as e:
    print("An error occurred during the search or summarization:", str(e))

An error occurred during the search or summarization: https://links.duckduckgo.com/d.js TypeError: argument 'params': 'dict' object cannot be converted to 'PyString'


In [90]:
from rich import print, Pretty

text = "hello"
print(text)

ImportError: cannot import name 'Pretty' from 'rich' (c:\Users\Nilay Kumar\Desktop\langchain_app\venv\Lib\site-packages\rich\__init__.py)

In [112]:
from langchain_community.utilities import DuckDuckGoSearchAPIWrapper, WikipediaAPIWrapper, GoogleSerperAPIWrapper
from langchain_core.runnables import RunnableParallel, RunnableMap
from dotenv import load_dotenv
# from langchain_openai import ChatOpenAI  # Assuming you want to use this for summarization
from rich import print
load_dotenv() 

# Initialize search wrappers for each search engine
duckduckgo_search = DuckDuckGoSearchAPIWrapper()
wikipedia_search = WikipediaAPIWrapper()
serperapi_search = GoogleSerperAPIWrapper()

# Create a parallel runnable for the searches
search_chain = RunnableParallel(
    duckduckgo=duckduckgo_search.run,
    wikipedia=wikipedia_search.run,
    serperapi=serperapi_search.run,
)

# Initialize the model for summarization
summarization_model = ChatGoogleGenerativeAI(model="gemini-1.5-pro")

# Define a function to summarize the results
def summarize_results(results, query):
    print(results)
    prompt = f"""
    You have received the following search results for the query: {query}

    DuckDuckGo: {results['duckduckgo']}
    Wikipedia: {results['wikipedia']}
    Google Serper: {results['serperapi']}

    Your Job is to provide only the most accurate information from these results for the given query.
    """
    summary = summarization_model.invoke(prompt)
    return summary

query = "Giive me the Recipe for Chicken Tikka Masala and also the steps by step instructions"
first_chain_result = search_chain.invoke(query)
second_chain_result = summarize_results(first_chain_result, query)
print(second_chain_result)

In [None]:
from langchain_community.utilities import DuckDuckGoSearchAPIWrapper, WikipediaAPIWrapper, GoogleSerperAPIWrapper
from langchain_core.runnables import RunnableParallel, RunnableMap
from dotenv import load_dotenv
# from langchain_openai import ChatOpenAI  # Assuming you want to use this for summarization
from rich import print
load_dotenv() 

# Initialize search wrappers for each search engine
duckduckgo_search = DuckDuckGoSearchAPIWrapper()
wikipedia_search = WikipediaAPIWrapper()
serperapi_search = GoogleSerperAPIWrapper()

# Create a parallel runnable for the searches
search_chain = RunnableParallel(
    duckduckgo=duckduckgo_search.run,
    wikipedia=wikipedia_search.run,
    serperapi=serperapi_search.run,
)

# Initialize the model for summarization
summarization_model = ChatGoogleGenerativeAI(model="gemini-1.5-pro")

# Define a function to summarize the results
def summarize_results(results, query):
    print(results)
    prompt = f"""
    You have received the following search results for the query: {query}

    DuckDuckGo: {results['duckduckgo']}
    Wikipedia: {results['wikipedia']}
    Google Serper: {results['serperapi']}

    Your Job is to provide only the most accurate information from these results for the given query.
    """
    summary = summarization_model.invoke(prompt)
    return summary

query = "What are the Harmful effects of Smoking"
first_chain_result = search_chain.invoke(query)


second_chain_result = summarize_results(first_chain_result, query)
print(second_chain_result)

## Agent

In [5]:
from langchain.prompts import PromptTemplate 
from langchain.agents import initialize_agent                                   
from langchain.memory import ConversationBufferMemory
from langchain.agents import AgentType
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain_community.tools.tavily_search import TavilySearchResults
from dotenv import load_dotenv
from langchain.agents.agent import AgentExecutor
from langchain.agents import initialize_agent, create_react_agent
from langchain import hub

#  AgentExecutor.from_agent_and_tools
load_dotenv()
llm = ChatGoogleGenerativeAI(model="gemini-1.5-pro")
search = TavilySearchResults()
tools = [search]
memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)

prompt = hub.pull("hwchase17/react")

agent = create_react_agent(
    llm=llm,
    tools=tools,
    prompt=prompt,
)

agent_executor = AgentExecutor.from_agent_and_tools(
    agent=agent,
    tools=tools,
    llm=llm,
    max_iterations=4,
    verbose=True,
    memory=memory,
    handle_parsing_errors=True,
    return_intermediate_steps=False,
    # agent_kwargs={
        # 'prefix': PREFIX, 
    #     'format_instructions': FORMAT_INSTRUCTIONS,
    #     'suffix': SUFFIX
    # }
)

agent_executor.invoke({"input": "Who won the most golds in 2024 olympics?"})
# while True:
#     query = input("Query: ")
#     if query == "exit":
#         break
#     print("User: ", query)
#     response = agent.invoke({"input": query})
#     print("AI: ", response.get("output"))

Please use the `langsmith sdk` instead:
  pip install langsmith
Use the `pull_prompt` method.
  res_dict = client.pull_repo(owner_repo_commit)




[1m> Entering new AgentExecutor chain...[0m


Retrying langchain_google_genai.chat_models._chat_with_retry.<locals>._chat_with_retry in 2.0 seconds as it raised ResourceExhausted: 429 Resource has been exhausted (e.g. check quota)..


[32;1m[1;3mThought: I need to search for the gold medal count of the 2024 Olympics. 
Action: tavily_search_results_json
Action Input: 2024 Olympics gold medal count [0m[36;1m[1;3m[{'url': 'https://www.sportingnews.com/uk/olympics/news/olympics-winner-2024-final-medal-standings-usa/10d8066d05bb7d20c197104f', 'content': "SN's 2024 OLYMPICS HQ Updated medal count ... Here's a look at the 10 countries with the most gold medals at the Paris Olympics: Country: Gold medals: Overall medals: USA: 40: 126: China: 40: 91 ..."}, {'url': 'https://www.sportingnews.com/us/olympics/news/usa-medal-count-2024-olympics-gold-silver-bronze/8311c9b2059e5d6257b1d81c', 'content': 'The Sporting News is tracking of all Olympic medals won by the United States at the 2024 Paris Olympics. USA medal count 2024: Updated tracker of Olympic gold, silver, bronze medals for United ...'}, {'url': 'https://www.bbc.co.uk/sport/olympics/paris-2024/medals', 'content': 'Find out the medal table from the Paris Olympics 20

{'input': 'Who won the most golds in 2024 olympics?',
 'chat_history': [HumanMessage(content='Who won the most golds in 2024 olympics?'),
  AIMessage(content='The USA and China tied for the most gold medals at the 2024 Olympics.')],
 'output': 'The USA and China tied for the most gold medals at the 2024 Olympics.'}

In [19]:
from langchain.agents import Tool
from langchain.chains import RetrievalQA
from langchain_community.vectorstores import FAISS

uploader = ScanDocuments()
data = uploader.upload_single_file("files/2205.15868v1-CogVideo-Large-scale Pretraining for Text-to-Video.pdf")
documents = uploader.process_document(data)


embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001")
llm = ChatGoogleGenerativeAI(model="gemini-1.5-pro")
model = Model(llm, embeddings)

vector_store = VectorStore(model, search_num_docs=10)
vector_store.add_documents(documents)

vectordb_search_tool =Tool(
        name = "Vector Database Search Tool",
        func=vector_store.qa_chain.invoke,
        description="""Useful for when you want to search information regarding a document uploaded by the User. 
        If the User Query seems vague try to search for the same in vector Store and ask The user if the Query is regarding the paper in the Vector Store?
        """
    )

# vectordb_search_tool.invoke("What is the main idea of the paper? WHat are the math formulas used in this paper")

In [59]:

# FAISS.

<langchain_community.vectorstores.faiss.FAISS at 0x17a7b2b6ff0>

In [23]:
search_engine_tool = TavilySearchResults()

tools = [search_engine_tool, vectordb_search_tool]

memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)
prompt = hub.pull("hwchase17/react")

agent = create_react_agent(
    llm=llm,
    tools=tools,
    prompt=prompt,
)

agent_executor = AgentExecutor.from_agent_and_tools(
    agent=agent,
    tools=tools,
    llm=llm,
    max_iterations=15,
    verbose=True,
    memory=memory,
    handle_parsing_errors=True,
    return_intermediate_steps=False,
    # agent_kwargs={
        # 'prefix': PREFIX, 
    #     'format_instructions': FORMAT_INSTRUCTIONS,
    #     'suffix': SUFFIX
    # }
)

agent_executor.invoke({"input": "Can you Explain in Detail The idea behind the paper CogVideo"})

Please use the `langsmith sdk` instead:
  pip install langsmith
Use the `pull_prompt` method.
  res_dict = client.pull_repo(owner_repo_commit)




[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mThought: The question is asking for an explanation of a research paper. This is likely best answered from a vector database of research papers.
Action: Vector Database Search Tool
Action Input: {"query": "Explain in Detail The idea behind the paper CogVideo"}[0m[33;1m[1;3m{'query': '{"query": "Explain in Detail The idea behind the paper CogVideo"}', 'result': "CogVideo aims to tackle the challenging task of **text-to-video generation**, essentially teaching machines to create videos from text descriptions.  Here's a breakdown of its key ideas:\n\n**1. Building on Existing Knowledge:**\n\n* Instead of starting from scratch, CogVideo leverages the power of a pre-trained text-to-image model called CogView2. This allows it to inherit a strong understanding of visual concepts and their relationship to language.\n\n**2. Multi-Frame-Rate Hierarchical Training:**\n\n* Videos involve motion, which is challenging to capture from tex

{'input': 'Can you Explain in Detail The idea behind the paper CogVideo',
 'chat_history': [HumanMessage(content='Can you Explain in Detail The idea behind the paper CogVideo'),
  AIMessage(content='CogVideo is a text-to-video generation model that builds on the pre-trained text-to-image model CogView2. It uses a multi-frame-rate hierarchical training approach to generate videos from text descriptions. The model first generates keyframes sequentially based on the text prompt and a specified frame rate. Then, it recursively interpolates intermediate frames at different frame rates to create smoother and more coherent motion. CogVideo is trained on a massive dataset of 5.4 million text-video pairs, which helps it learn complex relationships between language and dynamic visual content.')],
 'output': 'CogVideo is a text-to-video generation model that builds on the pre-trained text-to-image model CogView2. It uses a multi-frame-rate hierarchical training approach to generate videos from te