### Export all dependencies using in this notebook

In [2]:
import os
from pinecone import Pinecone
from dotenv import load_dotenv
from llama_index.llms.gemini import Gemini
from llama_index.vector_stores.pinecone import PineconeVectorStore
from llama_index.embeddings.gemini import GeminiEmbedding
from llama_index.core.node_parser import SentenceSplitter
from llama_index.core.ingestion import IngestionPipeline
from llama_index.core.retrievers import VectorIndexRetriever
from llama_index.core.query_engine import RetrieverQueryEngine
from llama_index.core import VectorStoreIndex, Settings, SimpleDirectoryReader, PromptTemplate, get_response_synthesizer, Document
import pandas as pd

  from tqdm.autonotebook import tqdm


In [3]:
load_dotenv()

True

In [4]:
pinecone_client = Pinecone(api_key=os.getenv("PINECONE_API_KEY"))
pinecone_index = pinecone_client.Index("lorawan-rag-app")

In [5]:
llm = Gemini(api_key=os.getenv("GOOGLE_API_KEY"), model="models/gemini-1.5-flash")
embed_model = GeminiEmbedding(model_name="models/embedding-001")
Settings.llm = llm
Settings.embed_model = embed_model
Settings.chunk_size = 768

In [6]:
import nest_asyncio
nest_asyncio.apply()

documents = SimpleDirectoryReader(input_dir="../data/articles", recursive=True).load_data()

In [7]:
documents[0]

Document(id_='2df92acb-548b-45ce-a853-8e16dabff3f6', embedding=None, metadata={'file_path': '/Users/Jorge/intership/lorawan-chatbot-rag-llm/notebooks/../data/articles/1-what-is-lorawan.md', 'file_name': '1-what-is-lorawan.md', 'file_size': 7078, 'creation_date': '2024-08-02', 'last_modified_date': '2024-08-02'}, excluded_embed_metadata_keys=['file_name', 'file_type', 'file_size', 'creation_date', 'last_modified_date', 'last_accessed_date'], excluded_llm_metadata_keys=['file_name', 'file_type', 'file_size', 'creation_date', 'last_modified_date', 'last_accessed_date'], relationships={}, text='\n\nWhat are LoRa and LoRaWAN?\n\nWelcome to the first chapter of The Things Fundamentals on LoRaWAN. In this section, you’ll learn why LoRaWAN is so awesome, hear about some great LoRaWAN use cases, and learn the difference between LoRa and LoRaWAN.', mimetype='text/plain', start_char_idx=None, end_char_idx=None, text_template='{metadata_str}\n\n{content}', metadata_template='{key}: {value}', metad

In [8]:
df = pd.DataFrame([d.metadata for d in documents])
df.head(20)

Unnamed: 0,file_path,file_name,file_size,creation_date,last_modified_date
0,/Users/Jorge/intership/lorawan-chatbot-rag-llm...,1-what-is-lorawan.md,7078,2024-08-02,2024-08-02
1,/Users/Jorge/intership/lorawan-chatbot-rag-llm...,1-what-is-lorawan.md,7078,2024-08-02,2024-08-02
2,/Users/Jorge/intership/lorawan-chatbot-rag-llm...,1-what-is-lorawan.md,7078,2024-08-02,2024-08-02
3,/Users/Jorge/intership/lorawan-chatbot-rag-llm...,1-what-is-lorawan.md,7078,2024-08-02,2024-08-02
4,/Users/Jorge/intership/lorawan-chatbot-rag-llm...,1-what-is-lorawan.md,7078,2024-08-02,2024-08-02
5,/Users/Jorge/intership/lorawan-chatbot-rag-llm...,1-what-is-lorawan.md,7078,2024-08-02,2024-08-02
6,/Users/Jorge/intership/lorawan-chatbot-rag-llm...,1-what-is-lorawan.md,7078,2024-08-02,2024-08-02
7,/Users/Jorge/intership/lorawan-chatbot-rag-llm...,1-what-is-lorawan.md,7078,2024-08-02,2024-08-02
8,/Users/Jorge/intership/lorawan-chatbot-rag-llm...,10-adaptive-data-rate.md,3708,2024-08-02,2024-08-02
9,/Users/Jorge/intership/lorawan-chatbot-rag-llm...,10-adaptive-data-rate.md,3708,2024-08-02,2024-08-02


In [9]:
vector_store = PineconeVectorStore(pinecone_index=pinecone_index)

In [10]:
pipeline = IngestionPipeline(
    transformations=[SentenceSplitter(chunk_size=768, chunk_overlap=20), embed_model],
    vector_store=vector_store,
)
pipeline.run(documents=documents)

Upserted vectors: 100%|██████████| 338/338 [00:03<00:00, 94.61it/s] 


[TextNode(id_='aedec1ff-c8a6-4a85-91aa-a3eaabe7a00f', embedding=[0.045997035, -0.0416846, -0.018313983, -0.023120388, 0.05881126, 0.0031233605, -0.0060542566, 0.015311598, 0.025461841, 0.0389607, 0.023915997, 0.01876894, -0.08906889, -0.024515716, -0.007628807, -0.07802315, 0.063768454, 0.035091665, -0.010453027, -0.0009125741, -0.038347304, -0.023735315, 0.023413932, 0.005440682, 0.011789792, 0.046025373, -0.018153265, -0.01718932, -0.024854014, 0.05400826, -0.0063062357, 0.010419227, -0.02954922, 0.005476679, 0.010528334, -0.011441502, -0.008746806, 0.055051934, -0.002694691, -0.006326267, 0.019841379, -0.029696185, -0.05499229, 0.0079074, -0.040513042, -0.02014552, -0.027093029, 0.0060940175, 0.034724757, -0.080886886, 0.051289894, -0.030787777, 0.024208462, -0.02084023, -0.013691403, -0.018901723, 0.048855443, -0.0305717, -0.011128024, -0.023675097, -0.02554846, 0.031034932, 0.0052405153, 0.038620386, -0.005057901, -0.060310673, -0.05151496, 0.015559717, 0.0129731875, 0.017188873, 

In [10]:
index = VectorStoreIndex.from_vector_store(vector_store=vector_store)
retriever = VectorIndexRetriever(index=index, similarity_top_k=5)

In [11]:
prompt_template = (
    "You are an advanced AI assistant specialized in providing detailed and accurate information "
    "by leveraging both general knowledge and real-time data retrieval. Below is the context related "
    "to the question, followed by the specific question itself.\n"
    "Context:\n"
    "#####################################\n"
    "{context_str}\n"
    "#####################################\n"
    "Based on the above context, provide a comprehensive and well-structured answer to the following question:\n"
    "Question: {query_str}\n"
    "Answer:"
)

qa_template = PromptTemplate(template=prompt_template)
chain_type_kwargs = {"prompt": qa_template}
response_synthesizer = get_response_synthesizer(
    llm=llm, text_qa_template=qa_template, response_mode="compact"
)
query_engine = RetrieverQueryEngine(
    retriever=retriever,
    response_synthesizer=response_synthesizer,
)

In [13]:
response = query_engine.query("What is LoRa?")
print(response)

LoRa is a **long-range, low-power wireless communication technology** that operates in the unlicensed ISM bands. It is a **modulation technique** used in the LoRaWAN protocol, which stands for **Long Range Wide Area Network**. 

LoRa is known for its ability to achieve **long-range communication** (multiple kilometers) while consuming **minimal power**, making it suitable for applications where battery life is crucial. It also offers **low bandwidth** (between 250bit/s and 11kbit/s in Europe), making it ideal for transmitting small amounts of data. 

LoRa's **low cost** and **secure communication** features make it a popular choice for various applications, including **Internet of Things (IoT)** deployments. 

