## Imports

In [1]:
import os
from dotenv import load_dotenv
load_dotenv()

True

In [2]:
from huggingface_hub import InferenceClient
from langchain_core.runnables import RunnableLambda
from langchain_classic.chains import RetrievalQA
from langchain_core.prompts import PromptTemplate
from langchain_community.vectorstores import FAISS
from langchain_community.embeddings import HuggingFaceEmbeddings

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
embedding_model = HuggingFaceEmbeddings(
    model_name = "sentence-transformers/all-MiniLM-L6-v2"
)

  embedding_model = HuggingFaceEmbeddings(


In [4]:
db = FAISS.load_local(
    "../indexes/artificial_intelligence",
    embedding_model,
    allow_dangerous_deserialization=True
)

## Custom grounded prompt

In [5]:
CUSTOM_PROMPT_TEMPLATE = """
Use the pieces of information provided in the context to answer the user's question.
If you do not know the answer, say that you do not know.
Do not make up an answer.
Do not use information outside the given context.

Context:
{context}

Question:
{question}

Answer directly. No small talk.
"""

In [6]:
prompt = PromptTemplate(
    template=CUSTOM_PROMPT_TEMPLATE,
    input_variables=["context", "question"]
)

## Hugging Face InferenceClient

In [7]:
client = InferenceClient(
    model="meta-llama/Meta-Llama-3-8B-Instruct",
    token=os.getenv("HF_TOKEN")
)

In [8]:
def hf_llm_call(prompt: str, **kwargs) -> str:
    # Convert LangChain prompt object to string
    if hasattr(prompt, "to_string"):
        prompt = prompt.to_string()
    
    response = client.chat_completion(
        messages = [{"role": "user", "content": prompt}],
        max_tokens = 512,
        temperature = 0.2
    )
    return response.choices[0].message.content

In [9]:
llm = RunnableLambda(hf_llm_call)

## Create the RAG chain

In [10]:
qa_chain = RetrievalQA.from_chain_type(
    llm = llm,
    chain_type = "stuff",
    retriever = db.as_retriever(search_kwargs={"k": 16}),
    return_source_documents = True,
    chain_type_kwargs = {"prompt": prompt}
)

In [11]:
query = "What are the recent research trends in Artificial Intelligence?"

response = qa_chain.invoke({"query": query})

In [12]:
print("ANSWER:\n", response["result"])

ANSWER:
 The recent research trends in Artificial Intelligence are indicated by the dramatic increase in the number of publications on AI in information engineering from 2014 to 2024, as shown in Figure 1. The number of papers increased from 7 in 2014 to 200 in 2024, a nearly 20-fold increase, with an explosive growth phase in 2019-2024 and a deepening research focus.


In [13]:
print("\nSOURCES:")
for doc in response["source_documents"]:
    print(doc.metadata.get("source"))


SOURCES:
AI computer science 4.pdf
AI computer science 1.pdf
AI computer science 1.pdf
AI computer science 4.pdf
3727353.3727478.pdf
AI computer science 5.pdf
AI computer science 5.pdf
AI computer science 5.pdf
AI computer science 5.pdf
AI computer science 5.pdf
AI computer science 5.pdf
AI computer science 4.pdf
AI computer science 4.pdf
AI computer science 5.pdf
AI computer science 5.pdf
AI computer science 5.pdf
