### Export all dependencies using in this notebook

In [7]:
import os
from pinecone import Pinecone
from dotenv import load_dotenv
from llama_index.llms.gemini import Gemini
from llama_index.vector_stores.pinecone import PineconeVectorStore
from llama_index.embeddings.gemini import GeminiEmbedding
from llama_index.core.node_parser import SentenceSplitter
from llama_index.core.ingestion import IngestionPipeline
from llama_index.core.retrievers import VectorIndexRetriever
from llama_index.core.query_engine import RetrieverQueryEngine
from llama_index.core import VectorStoreIndex, Settings, SimpleDirectoryReader, PromptTemplate, get_response_synthesizer, Document
import pandas as pd

In [2]:
load_dotenv()

True

In [3]:
pinecone_client = Pinecone(api_key=os.getenv("PINECONE_API_KEY"))
pinecone_index = pinecone_client.Index("lorawan-rag")

In [4]:
llm = Gemini(api_key=os.getenv("GOOGLE_API_KEY"), model="models/gemini-1.5-flash")
embed_model = GeminiEmbedding(model_name="models/embedding-001")
Settings.llm = llm
Settings.embed_model = embed_model
Settings.chunk_size = 768

In [9]:
import nest_asyncio
nest_asyncio.apply()

documents = SimpleDirectoryReader(input_dir="./data/articles", recursive=True).load_data()
for d in documents:
    d.excluded_embed_metadata_keys = ["file_path", "file_type", "file_size", "last_modified_date"]
    d.excluded_llm_metadata_keys = ["file_path", "file_type", "file_size", "last_modified_date"]

In [10]:
documents[0]

Document(id_='056aace1-7597-4958-b1cb-d15a77a22b6e', embedding=None, metadata={'file_path': '/Users/Jorge/intership/lorawan-chatbot-rag-llm/data/articles/1-what-is-lorawan.md', 'file_name': '1-what-is-lorawan.md', 'file_size': 7078, 'creation_date': '2024-08-02', 'last_modified_date': '2024-08-02'}, excluded_embed_metadata_keys=['file_path', 'file_type', 'file_size', 'last_modified_date'], excluded_llm_metadata_keys=['file_path', 'file_type', 'file_size', 'last_modified_date'], relationships={}, text='\n\nWhat are LoRa and LoRaWAN?\n\nWelcome to the first chapter of The Things Fundamentals on LoRaWAN. In this section, you’ll learn why LoRaWAN is so awesome, hear about some great LoRaWAN use cases, and learn the difference between LoRa and LoRaWAN.', mimetype='text/plain', start_char_idx=None, end_char_idx=None, text_template='{metadata_str}\n\n{content}', metadata_template='{key}: {value}', metadata_seperator='\n')

In [11]:
df = pd.DataFrame([d.metadata for d in documents])
df.head(20)

Unnamed: 0,file_path,file_name,file_size,creation_date,last_modified_date
0,/Users/Jorge/intership/lorawan-chatbot-rag-llm...,1-what-is-lorawan.md,7078,2024-08-02,2024-08-02
1,/Users/Jorge/intership/lorawan-chatbot-rag-llm...,1-what-is-lorawan.md,7078,2024-08-02,2024-08-02
2,/Users/Jorge/intership/lorawan-chatbot-rag-llm...,1-what-is-lorawan.md,7078,2024-08-02,2024-08-02
3,/Users/Jorge/intership/lorawan-chatbot-rag-llm...,1-what-is-lorawan.md,7078,2024-08-02,2024-08-02
4,/Users/Jorge/intership/lorawan-chatbot-rag-llm...,1-what-is-lorawan.md,7078,2024-08-02,2024-08-02
5,/Users/Jorge/intership/lorawan-chatbot-rag-llm...,1-what-is-lorawan.md,7078,2024-08-02,2024-08-02
6,/Users/Jorge/intership/lorawan-chatbot-rag-llm...,1-what-is-lorawan.md,7078,2024-08-02,2024-08-02
7,/Users/Jorge/intership/lorawan-chatbot-rag-llm...,1-what-is-lorawan.md,7078,2024-08-02,2024-08-02
8,/Users/Jorge/intership/lorawan-chatbot-rag-llm...,10-adaptive-data-rate.md,3708,2024-08-02,2024-08-02
9,/Users/Jorge/intership/lorawan-chatbot-rag-llm...,10-adaptive-data-rate.md,3708,2024-08-02,2024-08-02


In [12]:
vector_store = PineconeVectorStore(pinecone_index=pinecone_index)

In [13]:
pipeline = IngestionPipeline(
    transformations=[SentenceSplitter(chunk_size=768, chunk_overlap=15), embed_model],
    vector_store=vector_store,
)
pipeline.run(documents=documents)

Upserted vectors: 100%|██████████| 648/648 [00:05<00:00, 118.17it/s]


[TextNode(id_='af57ce5e-4b65-4305-9933-e7b407d9ba38', embedding=[0.04405704, -0.036303032, -0.017672429, -0.03175469, 0.058192916, 0.0055493326, -0.01782808, 0.018981304, 0.035656992, 0.03873263, 0.043827917, 0.03734909, -0.08133292, -0.016846241, -0.005989481, -0.083714634, 0.05957442, 0.03080101, 0.006789348, 0.0013408107, -0.030987944, -0.016526744, 0.023739595, -0.0033317881, 0.002218888, 0.04799894, -0.0033085183, -0.017440652, -0.022129854, 0.05428385, -0.013358625, -0.0009762259, -0.034260675, 0.024907116, 0.013114852, -0.02157839, -0.014050344, 0.04116639, -0.014403325, 0.0005632061, 0.024600985, -0.03102043, -0.044416077, 0.01375851, -0.029643402, -0.01085584, -0.018854631, 0.008636527, 0.035570614, -0.08132082, 0.054773685, -0.028442256, 0.016452884, -0.017158434, -0.019497398, -0.024605097, 0.044946145, -0.04360759, -0.002294879, -0.021667393, -0.016280683, 0.029593294, 0.008471722, 0.028156538, -0.0023933987, -0.059900768, -0.0469232, 0.0144814, 0.0038171653, 0.019017624, -

In [14]:
index = VectorStoreIndex.from_vector_store(vector_store=vector_store)
retriever = VectorIndexRetriever(index=index, similarity_top_k=5)
query_engine = RetrieverQueryEngine(retriever=retriever)

In [16]:
prompt_template = """Text: {context}
    Question: {question}
    you are a chatbot designed to assist the users.
    Answer only the questions based on the text provided. If the text doesn't contain the answer,
    reply that the answer is not available.
    keep the answers precise to the question"""

qa_template = PromptTemplate(template=prompt_template)
chain_type_kwargs = { "prompt" : qa_template }
response_synthesizer = get_response_synthesizer(
    llm=llm, text_qa_template=qa_template, response_mode="compact"
)
query_engine = RetrieverQueryEngine(
    retriever=retriever,
    response_synthesizer=response_synthesizer,
)

In [17]:
response = query_engine.query("What is LoRaWAN?")
print(response)

UnauthorizedException: (401)
Reason: Unauthorized
HTTP response headers: HTTPHeaderDict({'Date': 'Mon, 05 Aug 2024 22:07:29 GMT', 'Content-Type': 'text/plain', 'Content-Length': '12', 'Connection': 'keep-alive', 'x-pinecone-auth-rejected-reason': 'Malformed domain', 'www-authenticate': 'Malformed domain', 'server': 'envoy'})
HTTP response body: Unauthorized
