## Working Test

In [None]:
import os
from dotenv import load_dotenv
from langchain_community.vectorstores import FAISS
from langchain_community.document_loaders import PyPDFLoader
from langchain.chains.question_answering import load_qa_chain
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_google_genai import GoogleGenerativeAIEmbeddings, ChatGoogleGenerativeAI

load_dotenv()

# Loading The PDF File and Splitting it into Pages
loader = PyPDFLoader("2205.15868v1-CogVideo-Large-scale Pretraining for Text-to-Video.pdf")
pages = loader.load_and_split()

# Chunking the Pages into fixed size chunks
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200, length_function=len)
documents = text_splitter.split_documents(pages)

# Converting the documents into embeddings and storing them in a FAISS Vector Store
embedding = GoogleGenerativeAIEmbeddings(model="models/embedding-001")
vectordb = FAISS.from_documents(documents, embedding=embedding)
store_name = loader.source[:-4]

query = ("What is the main idea of the paper? WHat are the math formulas used in this paper")

docs = vectordb.similarity_search(query=query, k=5)
llm = ChatGoogleGenerativeAI(model="gemini-1.5-pro")
chain = load_qa_chain(llm=llm, chain_type="stuff")
response = chain.run(input_documents=docs, question=query)
print(response)

In [None]:
# # CHAINS
# https://python.langchain.com/v0.1/docs/modules/chains/

# # DOCUMENT LOADERS
# https://python.langchain.com/v0.2/docs/integrations/document_loaders/


## Youtube Loader

In [None]:
url = "https://www.youtube.com/watch?v=0AW6tWTRLeU"
uploader = ScanDocuments()
data = uploader.upload_url(url)
documents = uploader.process_document(data)

# -----------------------------------------------------------------------
# -----------------------------------------------------------------------
embedding = GoogleGenerativeAIEmbeddings(model="models/embedding-001")
llm = ChatGoogleGenerativeAI(model="gemini-1.5-pro")
model = Model(llm, embedding)

# -----------------------------------------------------------------------
# -----------------------------------------------------------------------
vector_store = VectorStore().create_vector_store(model.embeddings)
vector_store.add_documents(documents)
retriever = vector_store.as_retriever()


In [None]:
query = "What does Shayne say about Garlic Naan"

qa_chain = RetrievalQA.from_chain_type(
    llm = model.llm,
    chain_type = "stuff",
    retriever = retriever,
    return_source_documents = True,
)
qa_chain.invoke(query)

In [None]:
from langchain_core.documents import Document

base_description_doc = documents[0]

title = base_description_doc.metadata.get("title")
description = base_description_doc.metadata.get("description")
author = base_description_doc.metadata.get("author")
date = base_description_doc.metadata.get("date")
view_count = base_description_doc.metadata.get("view_count")

summary_doc = Document(
    metadata = base_description_doc.metadata,
    page_content = f"""This is a Youtube video Titled: {title}.
    This video was created by the Channel {author} on {date}. 
    The video has {view_count} views. 
    The Description of the video is: {description}""",
)
summary_doc

new_documents = [summary_doc] + documents
new_documents

## Search

### Jina-ai

In [None]:
import requests
from langchain_core.documents import Document
from langchain_community.vectorstores import FAISS
# -----------------------------------------------------------------------
# -----------------------------------------------------------------------
jina_search_url = "https://s.jina.ai/"
search_query = "Tell me About Stable Diffusion CEO and Founder"
response = requests.get(jina_search_url+search_query)

uploader = ScanDocuments()  
text = Document(metadata={'source': '0AW6tWTRLeU'},page_content=str(response.text))
documents = uploader.process_document([text])
documents
# -----------------------------------------------------------------------
# -----------------------------------------------------------------------
embedding = GoogleGenerativeAIEmbeddings(model="models/embedding-001")
llm = ChatGoogleGenerativeAI(model="gemini-1.5-pro")
model = Model(llm, embedding)

# -----------------------------------------------------------------------
# -----------------------------------------------------------------------
# vectordb = FAISS.from_documents(documents, embedding=model.embeddings)
vectordb.add_documents(documents)
# vector_store = VectorStore().create_vector_store()
# vector_store.add_documents(documents)
retriever = vectordb.as_retriever()
# -----------------------------------------------------------------------
# -----------------------------------------------------------------------
qa_chain = RetrievalQA.from_chain_type(
    llm = model.llm,
    chain_type = "stuff",
    retriever = retriever,
    return_source_documents = True,
)
qa_chain.invoke(search_query)


### DuckDuckGo

In [None]:
from langchain_community.tools import DuckDuckGoSearchRun

search = DuckDuckGoSearchRun(max_results=5)
query = "Tell me About Stable Diffusion CEO and Founder, Who is the new CEO of Stable Diffusion"
results = search.invoke(query)
# -----------------------------------------------------------------------
# -----------------------------------------------------------------------
uploader = ScanDocuments()  
text = Document(metadata={'source': '0AW6tWTRLeU'},page_content=str(results))
documents = uploader.process_document([text])
documents
# -----------------------------------------------------------------------
# -----------------------------------------------------------------------
embedding = GoogleGenerativeAIEmbeddings(model="models/embedding-001")
llm = ChatGoogleGenerativeAI(model="gemini-1.5-pro")
model = Model(llm, embedding)
# -----------------------------------------------------------------------
# -----------------------------------------------------------------------
vectordb.add_documents(documents)
retriever = vectordb.as_retriever()
# -----------------------------------------------------------------------
# -----------------------------------------------------------------------
qa_chain = RetrievalQA.from_chain_type(
    llm = model.llm,
    chain_type = "stuff",
    retriever = retriever,
    return_source_documents = True,
)
qa_chain.invoke(query)


### Wikipedia

In [None]:
from langchain_community.tools import WikipediaQueryRun
from langchain_community.utilities import WikipediaAPIWrapper
wikipedia = WikipediaQueryRun(api_wrapper=WikipediaAPIWrapper())
results = wikipedia.invoke("Tell me About Stable Diffusion CEO and Founder")
results
# -----------------------------------------------------------------------
# -----------------------------------------------------------------------
uploader = ScanDocuments()  
text = Document(metadata={'source': '0AW6tWTRLeU'},page_content=str(results))
documents = uploader.process_document([text])
documents
# -----------------------------------------------------------------------
# -----------------------------------------------------------------------
embedding = GoogleGenerativeAIEmbeddings(model="models/embedding-001")
llm = ChatGoogleGenerativeAI(model="gemini-1.5-pro")
model = Model(llm, embedding)
# -----------------------------------------------------------------------
# -----------------------------------------------------------------------
vectordb.add_documents(documents)
retriever = vectordb.as_retriever()
# -----------------------------------------------------------------------
# -----------------------------------------------------------------------
query = "Tell me About Stable Diffusion CEO and Founder"
qa_chain = RetrievalQA.from_chain_type(
    llm = model.llm,
    chain_type = "stuff",
    retriever = retriever,
    return_source_documents = True,
)
qa_chain.invoke(query)

### Serper Search

In [None]:
from langchain_community.utilities import GoogleSerperAPIWrapper
from dotenv import load_dotenv

load_dotenv() 

search = GoogleSerperAPIWrapper()
search.run("What are the Trending News in Paris Today")


### Brave

In [None]:
from langchain_community.tools import BraveSearch
tool = BraveSearch.from_api_key(search_kwargs={"count": 3})

In [None]:
import nltk
nltk.download('punkt_tab')
nltk.download('averaged_perceptron_tagger_eng')

## Parallel Chains

In [None]:
from langchain_community.utilities import DuckDuckGoSearchAPIWrapper, WikipediaAPIWrapper, GoogleSerperAPIWrapper
from langchain_core.runnables import RunnableParallel, RunnableMap
from dotenv import load_dotenv
from rich import print
# from langchain_openai import ChatOpenAI  # Assuming you want to use this for summarization

load_dotenv() 

# Initialize search wrappers for each search engine
duckduckgo_search = DuckDuckGoSearchAPIWrapper()
wikipedia_search = WikipediaAPIWrapper()
serperapi_search = GoogleSerperAPIWrapper()

# Create a parallel runnable for the searches
search_chain = RunnableParallel(
    duckduckgo=lambda query: duckduckgo_search.run(query),
    wikipedia=lambda query: wikipedia_search.run(query),
    serperapi=lambda query: serperapi_search.run(query),
)

# Initialize the model for summarization
summarization_model = ChatGoogleGenerativeAI(model="gemini-1.5-pro")
# ChatOpenAI(model="gpt-3.5-turbo")  # Adjust as needed

# Define a function to summarize the results
def summarize_results(results):
    prompt = f"""
    You have received the following search results:

    DuckDuckGo: {results['duckduckgo']}
    Wikipedia: {results['wikipedia']}
    Google Serper: {results['serperapi']}

    Please summarize the most accurate information from these results.
    """
    summary = summarization_model.run(prompt)
    return summary

# Combine the search and summarization into a single chain
combined_chain = RunnableMap(
    search_results=search_chain,
    summary=summarize_results
)

# Run the combined chain with a query
query = "What is the capital of France?"
try:
    results = combined_chain.invoke({"query": query})

    # Print the results from each search engine and the summary
    print("DuckDuckGo Results:", results["search_results"]["duckduckgo"])
    print("Wikipedia Results:", results["search_results"]["wikipedia"])
    print("Google Serper Results:", results["search_results"]["serperapi"])
    print("Summary of Most Accurate Information:", results["summary"])
except Exception as e:
    print("An error occurred during the search or summarization:", str(e))

In [None]:
from langchain_community.utilities import DuckDuckGoSearchAPIWrapper, WikipediaAPIWrapper, GoogleSerperAPIWrapper
from langchain_core.runnables import RunnableParallel, RunnableMap
from dotenv import load_dotenv
# from langchain_openai import ChatOpenAI  # Assuming you want to use this for summarization
from rich import print
load_dotenv() 

# Initialize search wrappers for each search engine
duckduckgo_search = DuckDuckGoSearchAPIWrapper()
wikipedia_search = WikipediaAPIWrapper()
serperapi_search = GoogleSerperAPIWrapper()

# Create a parallel runnable for the searches
search_chain = RunnableParallel(
    duckduckgo=duckduckgo_search.run,
    wikipedia=wikipedia_search.run,
    serperapi=serperapi_search.run,
)

# Initialize the model for summarization
summarization_model = ChatGoogleGenerativeAI(model="gemini-1.5-pro")

# Define a function to summarize the results
def summarize_results(results, query):
    print(results)
    prompt = f"""
    You have received the following search results for the query: {query}

    DuckDuckGo: {results['duckduckgo']}
    Wikipedia: {results['wikipedia']}
    Google Serper: {results['serperapi']}

    Your Job is to provide only the most accurate information from these results for the given query.
    """
    summary = summarization_model.invoke(prompt)
    return summary

query = "Giive me the Recipe for Chicken Tikka Masala and also the steps by step instructions"
first_chain_result = search_chain.invoke(query)
second_chain_result = summarize_results(first_chain_result, query)
print(second_chain_result)

In [None]:
from langchain_community.utilities import DuckDuckGoSearchAPIWrapper, WikipediaAPIWrapper, GoogleSerperAPIWrapper
from langchain_core.runnables import RunnableParallel, RunnableMap
from dotenv import load_dotenv
# from langchain_openai import ChatOpenAI  # Assuming you want to use this for summarization
from rich import print
load_dotenv() 

# Initialize search wrappers for each search engine
duckduckgo_search = DuckDuckGoSearchAPIWrapper()
wikipedia_search = WikipediaAPIWrapper()
serperapi_search = GoogleSerperAPIWrapper()

# Create a parallel runnable for the searches
search_chain = RunnableParallel(
    duckduckgo=duckduckgo_search.run,
    wikipedia=wikipedia_search.run,
    serperapi=serperapi_search.run,
)

# Initialize the model for summarization
summarization_model = ChatGoogleGenerativeAI(model="gemini-1.5-pro")

# Define a function to summarize the results
def summarize_results(results, query):
    print(results)
    prompt = f"""
    You have received the following search results for the query: {query}

    DuckDuckGo: {results['duckduckgo']}
    Wikipedia: {results['wikipedia']}
    Google Serper: {results['serperapi']}

    Your Job is to provide only the most accurate information from these results for the given query.
    """
    summary = summarization_model.invoke(prompt)
    return summary

query = "What are the Harmful effects of Smoking"
first_chain_result = search_chain.invoke(query)


second_chain_result = summarize_results(first_chain_result, query)
print(second_chain_result)

## Agent

In [23]:
import warnings
from langchain import hub
from dotenv import load_dotenv
from langchain_groq import ChatGroq

from langchain.agents import create_react_agent
from langchain.agents.agent import AgentExecutor
from langchain.memory import ConversationBufferMemory
from langchain_community.tools.tavily_search import TavilySearchResults
from langchain_google_genai import ChatGoogleGenerativeAI, GoogleGenerativeAIEmbeddings

from helper_classes.model import Model
from helper_classes.search import Search
from helper_classes.vector_store import VectorStore
from helper_classes.scan_documents import ScanDocuments
from helper_classes.converser import Converser

warnings.filterwarnings("ignore")

load_dotenv()  

# DOCUMENTS
uploader = ScanDocuments()
data = uploader.upload_single_file("files/2205.15868v1-CogVideo-Large-scale Pretraining for Text-to-Video.pdf")
documents = uploader.process_document(data)


# LLMS AND EMBEDDINGS
embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001")
# llm = ChatGoogleGenerativeAI(model="gemini-1.5-pro")
llm = ChatGroq(model="mixtral-8x7b-32768")
model = Model(llm, embeddings)


# VECTOR STORE
vector_store = VectorStore(model, search_num_docs=10)
vector_store.add_documents(documents)


# TOOLS
search_engine_tool = TavilySearchResults()
tools = [search_engine_tool, vector_store.vectordb_search_tool]


memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)
prompt = hub.pull("hwchase17/react")

# AGENT
agent = create_react_agent(llm=llm, tools=tools, prompt=prompt)
agent_executor = AgentExecutor.from_agent_and_tools(
    agent=agent,
    tools=tools,
    llm=llm,
    max_iterations=15,
    verbose=True,
    memory=memory,
    handle_parsing_errors=True,
    return_intermediate_steps=False,
)

INFO:root:Loading FAISS Vector Store
INFO:root:Added 65 documents to the vector store.


In [22]:
converser = Converser(agent_executor)
converser.converse()

In [None]:
agent_executor = AgentExecutor.from_agent_and_tools(
    agent=agent,
    tools=tools,
    llm=llm,
    max_iterations=15,
    verbose=True,
    memory=memory,
    handle_parsing_errors=True,
    return_intermediate_steps=False,
    # agent_kwargs={
        # 'prefix': PREFIX, 
    #     'format_instructions': FORMAT_INSTRUCTIONS,
    #     'suffix': SUFFIX
    # }
)

# agent_executor.invoke({"input": "Can you Explain in Detail The idea behind the paper CogVideo"})
# llm.invoke("What is the main idea of the paper? WHat are the math formulas used in this paper")
# vector_store.vectordb_search_tool.invoke("What is the main idea of the paper? WHat are the math formulas used in this paper")
# vector_store.qa_chain.invoke("What is the main idea of the paper? WHat are the math formulas used in this paper")