## SEARCHING WEB FOR ACCURATE RESPONSE

In this tutorial we are using RAG technique with TavilyAPI which connects our LLM to the web to retrieve accurate and up-to-date information, mitigating hallucinations.

In [None]:
import os
import gradio as gr
from dotenv import load_dotenv
from langchain.memory import ConversationBufferMemory
from langchain.chains import ConversationalRetrievalChain
from langchain_community.retrievers import TavilySearchAPIRetriever
from langchain_openai import ChatOpenAI
from langchain_huggingface import ChatHuggingFace, HuggingFacePipeline
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline

In [None]:
# setting up api keys

load_dotenv()

os.environ['TAVILY_API_KEY'] = os.getenv('TAVILY_API_KEY')
os.environ['OPENAI_API_KEY'] = os.getenv('OPENAI_API_KEY')
os.environ['HF_TOKEN'] = os.getenv('HF_TOKEN')

In [None]:
# model ids

LLAMA = "meta-llama/Llama-3.2-1B-Instruct"
GPT = "gpt-4o-mini"

In [None]:
# building pipeline with huggingface for llama

tokenizer = AutoTokenizer.from_pretrained(MODEL)
tokenizer.pad_token_id = tokenizer.eos_token_id
model = AutoModelForCausalLM.from_pretrained(MODEL)
pipe = pipeline(
    "text-generation", model=LLAMA, tokenizer=tokenizer, max_new_tokens=512
)

In [None]:
llm = ChatOpenAI(model_name=GPT, temperature=0.7)
#llm = HuggingFacePipeline(pipeline=pipe)
memory = ConversationBufferMemory(memory_key='chat_history', return_messages=True)

# using tavily api to search web for the up-to-date information
retriever = TavilySearchAPIRetriever(k=60)

conversation_chain = ConversationalRetrievalChain.from_llm(llm=llm, retriever=retriever, memory=memory)

In [None]:
# creating interface using gradio

def chat(question, history):
    result = conversation_chain.invoke({"question": question})
    return result["answer"]

view = gr.ChatInterface(chat, type="messages").launch()