## Working Test

In [None]:
import os
from dotenv import load_dotenv
from langchain_community.vectorstores import FAISS
from langchain_community.document_loaders import PyPDFLoader
from langchain.chains.question_answering import load_qa_chain
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_google_genai import GoogleGenerativeAIEmbeddings, ChatGoogleGenerativeAI

load_dotenv()

# Loading The PDF File and Splitting it into Pages
loader = PyPDFLoader("2205.15868v1-CogVideo-Large-scale Pretraining for Text-to-Video.pdf")
pages = loader.load_and_split()

# Chunking the Pages into fixed size chunks
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200, length_function=len)
documents = text_splitter.split_documents(pages)

# Converting the documents into embeddings and storing them in a FAISS Vector Store
embedding = GoogleGenerativeAIEmbeddings(model="models/embedding-001")
vectordb = FAISS.from_documents(documents, embedding=embedding)
store_name = loader.source[:-4]

query = ("What is the main idea of the paper? WHat are the math formulas used in this paper")

docs = vectordb.similarity_search(query=query, k=5)
llm = ChatGoogleGenerativeAI(model="gemini-1.5-pro")
chain = load_qa_chain(llm=llm, chain_type="stuff")
response = chain.run(input_documents=docs, question=query)
print(response)

In [None]:
# # CHAINS
# https://python.langchain.com/v0.1/docs/modules/chains/

# # DOCUMENT LOADERS
# https://python.langchain.com/v0.2/docs/integrations/document_loaders/


In [None]:
import os
import zipfile
import tempfile
from helper_classes.model import Model
from helper_classes.vector_store import VectorStore
from helper_classes.scan_documents import ScanDocuments
from langchain_google_genai import GoogleGenerativeAIEmbeddings, ChatGoogleGenerativeAI

uploader = ScanDocuments()
data = uploader.upload_zip_file("files.zip")
documents = uploader.process_document(data)

embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001")
llm = ChatGoogleGenerativeAI(model="gemini-1.5-pro")
model = Model(llm,embeddings)

vectordb = VectorStore(model, search_num_docs=10)
vectordb.add_documents(documents)

## Web Search

In [None]:
import requests
from bs4 import BeautifulSoup

from langchain_groq import ChatGroq
from langchain_community.tools import BraveSearch
from langchain_community.tools import WikipediaQueryRun
from langchain_community.tools import DuckDuckGoSearchRun
from langchain_community.utilities import WikipediaAPIWrapper
from langchain_community.utilities import GoogleSerperAPIWrapper

from helper_classes.model import Model
from helper_classes.search import Search
from helper_classes.converser import Converser
from helper_classes.vector_store import VectorStore
from helper_classes.scan_documents import ScanDocuments

search_query = "What is the Current time in Jaipur"

jinai_search = requests.get("https://s.jina.ai/"+search_query)
duckduckgo_search = DuckDuckGoSearchRun(max_results=5)
wikipedia_search = WikipediaQueryRun(api_wrapper=WikipediaAPIWrapper())
google_serper_search = GoogleSerperAPIWrapper()
# brave_search = BraveSearch.from_api_key(search_kwargs={"count": 3})

results = duckduckgo_search.invoke(search_query)

text = jinai_search.content
soup = BeautifulSoup(text, 'html.parser')
stripped_text = soup.get_text()

# # -----------------------------------------------------------------------
# # -----------------------------------------------------------------------
uploader = ScanDocuments()
data = uploader.upload_from_raw_text(stripped_text)
documents = uploader.process_document(data)
documents
# # -----------------------------------------------------------------------
# # -----------------------------------------------------------------------
embedding = GoogleGenerativeAIEmbeddings(model="models/embedding-001")
# llm = ChatGoogleGenerativeAI(model="gemini-1.5-pro")
llm = ChatGroq(model="mixtral-8x7b-32768")
model = Model(llm, embedding)
# # -----------------------------------------------------------------------
# # -----------------------------------------------------------------------
vectordb = VectorStore(model, search_num_docs=10)
vectordb.add_documents(documents)
vectordb.qa_chain.invoke(search_query)

In [276]:
import ast
import requests
import itertools
from bs4 import BeautifulSoup
from deep_translator import GoogleTranslator

def find_date_and_time(query, model):
    prompt = f"""You are a Helpful assistant who will be asked to tell the weather and time for a given GeoGraphical Location.
    You have to extract the geographical location from the user query and also what type of query it is.
    You have to return three pieces of information from the query.
        (1)The Country for the Geographical Location.
        (2)The City or State or Prefecture(Japan only) for the Geographical Location.
        (3)Weather the User Query is asking for the Weather or Time/Day.

         
    If the User Query mentions the weather/time for a Country, dont return the City or State or Prefecture and return an empty string.
    If the User Query mentions the City/State/Prefecture,  Find the country from which these Cities/States/Prefectures are from and return the Country and the City/State/Prefecture.

    RULES FOR RETURNING THE COUNTRY
    Once the country has been successfull identified, match the country name from the following and return it as mentioned in here.
    {{
    'afghanistan','albania','algeria','angola','argentina','armenia','australia',
    'austria','azerbaijan','bahamas','bahrain','bangladesh','barbados','belarus',
    'belgium','belize','benin','bermuda','bhutan','biot','bolivia','bosnia-herzegovina',
    'botswana','brazil','brunei','bulgaria','burkina-faso','burundi','cambodia','cameroon',
    'canada','cape-verde','central-african-republic','chad','chile','china','colombia',
    'comoros','congo','congo-demrep','cook-islands','costa-rica','cote-divoire','croatia',
    'cuba','cyprus','czech-republic','denmark','djibouti','dominican-republic','ecuador',
    'egypt','el-salvador','equatorial-guinea','eritrea','estonia','ethiopia','falkland',
    'faroe','fiji','finland','france','gabon','gambia','georgia','germany','ghana','gibraltar',
    'greece','greenland','guadeloupe','guatemala','guinea','guinea-bissau','guyana','haiti',
    'honduras','hong-kong','hungary','iceland','india','indonesia','iran','iraq','ireland',
    'isle-of-man','israel','italy','jamaica','japan','jordan','kazakhstan','kazakstan','kenya',
    'kerguelen','kiribati','kuwait','kyrgyzstan','laos','latvia','lebanon','lesotho','liberia',
    'libya','lithuania','luxembourg','madagascar','malawi','malaysia','maldives','mali','malta',
    'marshall-islands','mauritania','mauritius','mexico','micronesia','moldova','monaco','mongolia',
    'montenegro','morocco','mozambique','myanmar','namibia','nauru','nepal','netherlands','new-zealand',
    'nicaragua','niger','nigeria','niue','north-korea','norway','oman','pakistan','palau','panama',
    'papua-new-guinea','paraguay','peru','philippines','pitcairn','poland','portugal','puerto-rico',
    'qatar','republic-of-macedonia','reunion','romania','russia','rwanda','samoa','sao-tome-and-principe',
    'saudi-arabia','senegal','serbia','seychelles','sierra-leone','singapore','slovakia','slovenia',
    'solomon-islands','somalia','south-africa','south-georgia-sandwich','south-korea','south-sudan',
    'spain','sri-lanka','sudan','suriname','swaziland','sweden','switzerland','syria','taiwan',
    'tajikistan','tanzania','thailand','timor-leste','togo','tokelau','tonga','trinidad-and-tobago',
    'tunisia','turkey','turkmenistan','tuvalu','uganda','uk','ukraine','united-arab-emirates','uruguay',
    'usa','uzbekistan','vanuatu','vatican-city-state','venezuela','vietnam','western-sahara','yemen','zambia',
    'zimbabwe'
    }}

    Example: 
    (1)If the Country identified is United States, return 'usa' as the country name.
    (2)If the Country identified is United Kingdom, return 'uk' as the country name.
    (3)If the country identified is South Korea, return 'south-korea' as the country name.

    
    RULE FOR RETURNING THE CITY/STATE/PREFECTURE
    Once the City/State/Prefecture has been identified, the name should be 
    (1)lowercased
    (2)If it contains spaces, replace the spaces with a hyphen.

    Example:
    (1)If the City identified is New York, return 'new-york' as the city name.
    (2)If the City identified is Los Angeles, return 'los-angeles' as the city name.
    (3)If the City identified is San Francisco, return 'san-francisco' as the city name.

    RULE FOR RETURNING THE INFORMATION TYPE:
    If the User Query mentions anything related to weather, return 'weather' as the information type.
    If the User Query mentions anything related to time, return 'worldclock' as the information type.
    
    Example:
    (1)If the Query is "What is the time in New York right now right here", return 'worldclock' as the information type.
    (2)if the Query is "Yo wagwan, What would be the weather in United States", return 'weather' as the information type.


    RETURN FORMAT
    Once the Country, City/State/Prefecture, information-type has been identified, return all three in the following format.
    {{'country': country, 'sub-location': city/state/prefecture, 'information-type': weather/worldclock}}
    Example:
    (1)"What is the time in New York right now right here",
    {{'country':'usa', 'sub-location':'new-york', 'information-type':'worldclock'}}
    (2)"Yo boss, What would be the weather in United States"
    {{'country':'usa', 'sub-location':'', 'information-type':'weather'}}
    (3)"What would be the time in the other side of the world that is Tokyo"
    {{'country':'japan', 'sub-location':'tokyo', 'information-type':'worldclock'}}
    (4)"Is it cloudy right now in United Emirates"
    {{'country':'united-arab-emirates', 'sub-location':'', 'information-type':'weather'}}


    Now Return the Country, the City/State/Prefecture in the dictionary format for the given query.
    Query: {query}
    Output: 
    """ 
    output = model.llm.invoke(prompt)
    string_location_dictionary = output.content.split("```json\n")[-1].split("\n")[0]
    location_dictionary = ast.literal_eval(string_location_dictionary)
    return location_dictionary


def fetch_info(location_dictionary):
    if location_dictionary['information-type'] == 'worldclock':
        return fetch_time_info(location_dictionary)
    elif location_dictionary['information-type'] == 'weather':
        return fetch_weather_info(location_dictionary)
    
def fetch_time_info(location_dictionary):

    url = f"https://www.timeanddate.com/{location_dictionary['information-type']}/{location_dictionary['country']}/{location_dictionary['sub-location']}"
    response = requests.get(url)
    soup = BeautifulSoup(response.content, 'html.parser')
    
    data = soup.find("div",id="qlook")
    time = data.find("span",class_="h1").get_text()
    standard_time = data.find("span",id="cta").find("a").get("title")
    date = data.find("span",id="ctdat").get_text()
    full_info = f"{time}, {standard_time}, {date}"
    
    translated_info = GoogleTranslator(target='english').translate(full_info)
    return translated_info

def fetch_weather_info(location_dictionary):
    url = f"https://www.timeanddate.com/{location_dictionary['information-type']}/{location_dictionary['country']}/{location_dictionary['sub-location']}"
    response = requests.get(url)

    soup = BeautifulSoup(response.content, 'html.parser')

    temp_weather_table = soup.find("div", id="qlook")
    sub_info_table = soup.find("div", class_="bk-focus__info")
    
    temperature = temp_weather_table.find("div", class_="h2").get_text().replace("\xa0", "") if temp_weather_table else 'No temperature data'
    
    temp_weather_rows = temp_weather_table.find_all("p") if temp_weather_table else []
    temp_weather_info = [item.replace("\xa0", "°C") for item in itertools.chain.from_iterable(row.get_text().split("°C") for row in temp_weather_rows)]
    
    sub_info_rows = sub_info_table.find_all("tr") if sub_info_table else []
    sub_info = [row.get_text().replace("\xa0", "") for row in sub_info_rows][3:]  # Adjust slice as needed
    
    final_weather_info = ", ".join(["Current Temperature: " + temperature] + temp_weather_info + sub_info)

    return final_weather_info

In [292]:
# duckduckgo_search = DuckDuckGoSearchRun(max_results=5)
# wikipedia_search = WikipediaQueryRun(api_wrapper=WikipediaAPIWrapper())
# google_serper_search = GoogleSerperAPIWrapper()

# llm = ChatGoogleGenerativeAI(model="gemini-1.5-pro")
llm = ChatGroq(model="mixtral-8x7b-32768")
model = Model(llm, embedding)

query = "Hey cutiepie, tell me is it sunny in Washington-DC Right now?"
location_dictionary = find_date_and_time(query, model)
time_info = fetch_info(location_dictionary)
time_info


INFO:httpx:HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"


'Current Temperature: 19°C, Passing clouds., Feels Like: 19°C, Forecast: 30 / 15°C, Wind: 9 km/h ↑ from Southwest, Visibility: 16km, Pressure: 1025 mbar, Humidity: 71%, Dew Point: 14°C'

In [None]:
import nltk
nltk.download('punkt_tab')
nltk.download('averaged_perceptron_tagger_eng')

## Parallel Chains

In [None]:
from langchain_community.utilities import DuckDuckGoSearchAPIWrapper, WikipediaAPIWrapper, GoogleSerperAPIWrapper
from langchain_core.runnables import RunnableParallel, RunnableMap
from dotenv import load_dotenv
# from langchain_openai import ChatOpenAI  # Assuming you want to use this for summarization
from rich import print
load_dotenv() 

# Initialize search wrappers for each search engine
duckduckgo_search = DuckDuckGoSearchAPIWrapper()
wikipedia_search = WikipediaAPIWrapper()
serperapi_search = GoogleSerperAPIWrapper()

# Create a parallel runnable for the searches
search_chain = RunnableParallel(
    duckduckgo=duckduckgo_search.run,
    wikipedia=wikipedia_search.run,
    serperapi=serperapi_search.run,
)

# Initialize the model for summarization
summarization_model = ChatGoogleGenerativeAI(model="gemini-1.5-pro")

# Define a function to summarize the results
def summarize_results(results, query):
    print(results)
    prompt = f"""
    You have received the following search results for the query: {query}

    DuckDuckGo: {results['duckduckgo']}
    Wikipedia: {results['wikipedia']}
    Google Serper: {results['serperapi']}

    Your Job is to provide only the most accurate information from these results for the given query.
    """
    summary = summarization_model.invoke(prompt)
    return summary

query = "What are the Harmful effects of Smoking"
first_chain_result = search_chain.invoke(query)


second_chain_result = summarize_results(first_chain_result, query)
print(second_chain_result)

## Agent

In [None]:
import warnings
from langchain import hub
from dotenv import load_dotenv
from langchain_groq import ChatGroq

from langchain.agents import create_react_agent
from langchain.agents.agent import AgentExecutor
from langchain.memory import ConversationBufferMemory
from langchain_community.tools.tavily_search import TavilySearchResults
from langchain_google_genai import ChatGoogleGenerativeAI, GoogleGenerativeAIEmbeddings

from helper_classes.model import Model
from helper_classes.search import Search
from helper_classes.vector_store import VectorStore
from helper_classes.scan_documents import ScanDocuments
from helper_classes.converser import Converser

warnings.filterwarnings("ignore")

load_dotenv()  

# DOCUMENTS
uploader = ScanDocuments()
data = uploader.upload_single_file("files/2205.15868v1-CogVideo-Large-scale Pretraining for Text-to-Video.pdf")
documents = uploader.process_document(data)


# LLMS AND EMBEDDINGS
embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001")
# llm = ChatGoogleGenerativeAI(model="gemini-1.5-pro")
llm = ChatGroq(model="mixtral-8x7b-32768")
model = Model(llm, embeddings)


# VECTOR STORE
vector_store = VectorStore(model, search_num_docs=10)
vector_store.add_documents(documents)


# TOOLS
search_engine_tool = TavilySearchResults()
tools = [search_engine_tool, vector_store.vectordb_search_tool]


memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)
prompt = hub.pull("hwchase17/react")

# AGENT
agent = create_react_agent(llm=llm, tools=tools, prompt=prompt)
agent_executor = AgentExecutor.from_agent_and_tools(
    agent=agent,
    tools=tools,
    llm=llm,
    max_iterations=15,
    verbose=True,
    memory=memory,
    handle_parsing_errors=True,
    return_intermediate_steps=False,
)

In [None]:
converser = Converser(agent_executor)
converser.converse()

In [None]:
agent_executor = AgentExecutor.from_agent_and_tools(
    agent=agent,
    tools=tools,
    llm=llm,
    max_iterations=15,
    verbose=True,
    memory=memory,
    handle_parsing_errors=True,
    return_intermediate_steps=False,
    # agent_kwargs={
        # 'prefix': PREFIX, 
    #     'format_instructions': FORMAT_INSTRUCTIONS,
    #     'suffix': SUFFIX
    # }
)

# agent_executor.invoke({"input": "Can you Explain in Detail The idea behind the paper CogVideo"})
# llm.invoke("What is the main idea of the paper? WHat are the math formulas used in this paper")
# vector_store.vectordb_search_tool.invoke("What is the main idea of the paper? WHat are the math formulas used in this paper")
# vector_store.qa_chain.invoke("What is the main idea of the paper? WHat are the math formulas used in this paper")