## Working Test

In [None]:
import os
from dotenv import load_dotenv
from langchain_community.vectorstores import FAISS
from langchain_community.document_loaders import PyPDFLoader
from langchain.chains.question_answering import load_qa_chain
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_google_genai import GoogleGenerativeAIEmbeddings, ChatGoogleGenerativeAI

load_dotenv()

# Loading The PDF File and Splitting it into Pages
loader = PyPDFLoader("2205.15868v1-CogVideo-Large-scale Pretraining for Text-to-Video.pdf")
pages = loader.load_and_split()

# Chunking the Pages into fixed size chunks
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200, length_function=len)
documents = text_splitter.split_documents(pages)

# Converting the documents into embeddings and storing them in a FAISS Vector Store
embedding = GoogleGenerativeAIEmbeddings(model="models/embedding-001")
vectordb = FAISS.from_documents(documents, embedding=embedding)
store_name = loader.source[:-4]

query = ("What is the main idea of the paper? WHat are the math formulas used in this paper")

docs = vectordb.similarity_search(query=query, k=5)
llm = ChatGoogleGenerativeAI(model="gemini-1.5-pro")
chain = load_qa_chain(llm=llm, chain_type="stuff")
response = chain.run(input_documents=docs, question=query)
print(response)

In [None]:
# # CHAINS
# https://python.langchain.com/v0.1/docs/modules/chains/

# # DOCUMENT LOADERS
# https://python.langchain.com/v0.2/docs/integrations/document_loaders/


In [None]:
import os
import zipfile
import tempfile
from helper_classes.model import Model
from helper_classes.vector_store import VectorStore
from helper_classes.scan_documents import ScanDocuments
from langchain_google_genai import GoogleGenerativeAIEmbeddings, ChatGoogleGenerativeAI

uploader = ScanDocuments()
data = uploader.upload_zip_file("files.zip")
documents = uploader.process_document(data)

embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001")
llm = ChatGoogleGenerativeAI(model="gemini-1.5-pro")
model = Model(llm,embeddings)

vectordb = VectorStore(model, search_num_docs=10)
vectordb.add_documents(documents)

## Web Search

In [None]:
import requests
from bs4 import BeautifulSoup

from langchain_groq import ChatGroq
from langchain_community.tools import BraveSearch
from langchain_community.tools import WikipediaQueryRun
from langchain_community.tools import DuckDuckGoSearchRun
from langchain_community.utilities import WikipediaAPIWrapper
from langchain_community.utilities import GoogleSerperAPIWrapper

from helper_classes.model import Model
from helper_classes.search import Search
from helper_classes.converser import Converser
from helper_classes.vector_store import VectorStore
from helper_classes.scan_documents import ScanDocuments

search_query = "What is the Current time in Jaipur"

jinai_search = requests.get("https://s.jina.ai/"+search_query)
duckduckgo_search = DuckDuckGoSearchRun(max_results=5)
wikipedia_search = WikipediaQueryRun(api_wrapper=WikipediaAPIWrapper())
google_serper_search = GoogleSerperAPIWrapper()
# brave_search = BraveSearch.from_api_key(search_kwargs={"count": 3})

results = duckduckgo_search.invoke(search_query)

text = jinai_search.content
soup = BeautifulSoup(text, 'html.parser')
stripped_text = soup.get_text()

# # -----------------------------------------------------------------------
# # -----------------------------------------------------------------------
uploader = ScanDocuments()
data = uploader.upload_from_raw_text(stripped_text)
documents = uploader.process_document(data)
documents
# # -----------------------------------------------------------------------
# # -----------------------------------------------------------------------
embedding = GoogleGenerativeAIEmbeddings(model="models/embedding-001")
# llm = ChatGoogleGenerativeAI(model="gemini-1.5-pro")
llm = ChatGroq(model="mixtral-8x7b-32768")
model = Model(llm, embedding)
# # -----------------------------------------------------------------------
# # -----------------------------------------------------------------------
vectordb = VectorStore(model, search_num_docs=10)
vectordb.add_documents(documents)
vectordb.qa_chain.invoke(search_query)

In [None]:
from langchain_groq import ChatGroq
from helper_classes.model import Model
from tools.weather_time_tool import WeatherTimeTool
from langchain_google_genai import GoogleGenerativeAIEmbeddings, ChatGoogleGenerativeAI

llm = ChatGoogleGenerativeAI(model="gemini-1.5-pro")
# llm = ChatGroq(model="mixtral-8x7b-32768")
embedding = GoogleGenerativeAIEmbeddings(model="models/embedding-001")
model = Model(llm, embedding)



In [None]:
import nltk
nltk.download('punkt_tab')
nltk.download('averaged_perceptron_tagger_eng')

## Parallel Chains

In [None]:
from langchain_community.utilities import DuckDuckGoSearchAPIWrapper, WikipediaAPIWrapper, GoogleSerperAPIWrapper
from langchain_core.runnables import RunnableParallel, RunnableMap
from dotenv import load_dotenv
# from langchain_openai import ChatOpenAI  # Assuming you want to use this for summarization
from rich import print
load_dotenv() 

# Initialize search wrappers for each search engine
duckduckgo_search = DuckDuckGoSearchAPIWrapper()
wikipedia_search = WikipediaAPIWrapper()
serperapi_search = GoogleSerperAPIWrapper()

# Create a parallel runnable for the searches
search_chain = RunnableParallel(
    duckduckgo=duckduckgo_search.run,
    wikipedia=wikipedia_search.run,
    serperapi=serperapi_search.run,
)

# Initialize the model for summarization
summarization_model = ChatGoogleGenerativeAI(model="gemini-1.5-pro")

# Define a function to summarize the results
def summarize_results(results, query):
    print(results)
    prompt = f"""
    You have received the following search results for the query: {query}

    DuckDuckGo: {results['duckduckgo']}
    Wikipedia: {results['wikipedia']}
    Google Serper: {results['serperapi']}

    Your Job is to provide only the most accurate information from these results for the given query.
    """
    summary = summarization_model.invoke(prompt)
    return summary

query = "What are the Harmful effects of Smoking"
first_chain_result = search_chain.invoke(query)


second_chain_result = summarize_results(first_chain_result, query)
print(second_chain_result)

## Agent

In [None]:
import warnings
import warnings
from langchain import hub
from dotenv import load_dotenv
from langchain_groq import ChatGroq
from pydantic import PydanticDeprecatedSince20

from langchain.agents import create_react_agent
from langchain.agents.agent import AgentExecutor
from langchain.memory import ConversationBufferMemory, ConversationBufferWindowMemory
from langchain_community.tools.tavily_search import TavilySearchResults
from langchain_google_genai import ChatGoogleGenerativeAI, GoogleGenerativeAIEmbeddings

from helper_classes.model import Model
from helper_classes.search import Search
from helper_classes.converser import Converser
from helper_classes.vector_store import VectorStore
from helper_classes.scan_documents import ScanDocuments

from tools.calculator_tool import CalculatorTool
from tools.weather_time_tool import WeatherTimeTool


load_dotenv()  
warnings.filterwarnings("ignore", category=PydanticDeprecatedSince20)


# DOCUMENTS
uploader = ScanDocuments()
data = uploader.upload_single_file("files/2205.15868v1-CogVideo-Large-scale Pretraining for Text-to-Video.pdf")
documents = uploader.process_document(data)


# LLMS AND EMBEDDINGS
embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001")
llm = ChatGoogleGenerativeAI(model="gemini-1.5-pro")
# llm = ChatGroq(model="mixtral-8x7b-32768")
model = Model(llm, embeddings)


# VECTOR STORE
vector_store = VectorStore(model, search_num_docs=10)
vector_store.add_documents(documents)


# TOOLS
weather_time_tool = WeatherTimeTool(model)
search_engine_tool = TavilySearchResults()
calculator_tool = CalculatorTool()


tools = [
    search_engine_tool, # Tool to search the web
    vector_store.vectordb_search_tool, # Tool To Search the Vector Data Base
    weather_time_tool.tool, # Tool to get the weather and time of a location
    calculator_tool.tool, # Tool to perform calculations
    ]


memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)
prompt = hub.pull("hwchase17/react")

# AGENT
agent = create_react_agent(llm=llm, tools=tools, prompt=prompt)
agent_executor = AgentExecutor.from_agent_and_tools(
    agent=agent,
    tools=tools,
    llm=llm,
    max_iterations=5,
    verbose=True,
    memory=memory,
    handle_parsing_errors=True,
    return_intermediate_steps=False,
)

In [None]:
AgentExecutor.from_agent_and_tools?

In [None]:
ConversationBufferMemory?

In [None]:
converser = Converser(agent_executor)
converser.converse()

In [None]:
import requests
from bs4 import BeautifulSoup

url = "https://s.jina.ai/Tell me about the flight from jaipur to chennai on 27th august?"
response = requests.get(url)
text = response.text


uploader = ScanDocuments()
data = uploader.upload_from_raw_text(text)
documents = uploader.process_document(data)
vector_store = VectorStore(model, search_num_docs=10)
vector_store.add_documents(documents)
vector_store.qa_chain.invoke("Tell me about the flight from jaipur to chennai on 27th august?")

In [None]:
agent_executor = AgentExecutor.from_agent_and_tools(
    agent=agent,
    tools=tools,
    llm=llm,
    max_iterations=15,
    verbose=True,
    memory=memory,
    handle_parsing_errors=True,
    return_intermediate_steps=False,
    # agent_kwargs={
        # 'prefix': PREFIX, 
    #     'format_instructions': FORMAT_INSTRUCTIONS,
    #     'suffix': SUFFIX
    # }
)

# agent_executor.invoke({"input": "Can you Explain in Detail The idea behind the paper CogVideo"})
# llm.invoke("What is the main idea of the paper? WHat are the math formulas used in this paper")
# vector_store.vectordb_search_tool.invoke("What is the main idea of the paper? WHat are the math formulas used in this paper")
# vector_store.qa_chain.invoke("What is the main idea of the paper? WHat are the math formulas used in this paper")