# Practicing RAG

In [1]:
import langchain
from langchain_community.document_loaders import PyMuPDFLoader

  from .autonotebook import tqdm as notebook_tqdm


## Loading the PDF

In [2]:
file_path = "dataset/rag/Law.pdf"
loader = PyMuPDFLoader(file_path)

In [3]:
docs = loader.load()

In [4]:
docs[0].page_content

'1 \n \nTHE INDIAN PENAL CODE \n___________ \nARRANGEMENT OF SECTIONS \n__________ \nCHAPTER I \nINTRODUCTION \nPREAMBLE \nSECTIONS \n1. \nTitle and extent of operation of the Code. \n2. \nPunishment of offences committed within India. \n3. \nPunishment of offences committed beyond, but which by law may be tried within, India. \n4. \nExtension of Code to extra-territorial offences. \n5. \nCertain laws not to be affected by this Act. \nCHAPTER II \nGENERAL EXPLANATIONS \n6. \nDefinitions in the Code to be understood subject to exceptions. \n7. \nSense of expression once explained. \n8. \nGender. \n9. \nNumber. \n10. “Man”. “Woman”. \n11. “Person”. \n12. “Public”. \n13. [Omitted.]. \n14. “Servant of Government”. \n15. [Repealed.]. \n16. [Repealed.]. \n17. “Government”. \n18. “India”. \n19. “Judge”. \n20. “Court of Justice”. \n21. “Public servant”. \n22. “Moveable property”. \n23. “Wrongful gain”. \n“Wrongful loss”. \nGaining wrongfully/ Losing wrongfully. \n24. “Dishonestly”. \n25. “Frau

## Chunking

### Trying Sliding Window by increasing the Chunk Size and Chunk overlap


In [5]:
from langchain_text_splitters import RecursiveCharacterTextSplitter

slider = RecursiveCharacterTextSplitter(
    chunk_size=100,
    chunk_overlap=50
)

slide_chunks = slider.split_documents(docs)

In [6]:
slide_chunks[0].page_content

'1 \n \nTHE INDIAN PENAL CODE \n___________ \nARRANGEMENT OF SECTIONS \n__________ \nCHAPTER I'

In [7]:
slide_chunks[1].page_content

'ARRANGEMENT OF SECTIONS \n__________ \nCHAPTER I \nINTRODUCTION \nPREAMBLE \nSECTIONS \n1.'

## Saving into Vector DB - Milvus

In [8]:
from langchain_milvus import Milvus
from langchain_huggingface import HuggingFaceEmbeddings


In [9]:
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")

In [10]:
embeddings.embed_documents

<bound method HuggingFaceEmbeddings.embed_documents of HuggingFaceEmbeddings(model_name='sentence-transformers/all-MiniLM-L6-v2', cache_folder=None, model_kwargs={}, encode_kwargs={}, query_encode_kwargs={}, multi_process=False, show_progress=False)>

In [11]:
URI = "./milvus_example.db"

vector_store = Milvus(
    embedding_function=embeddings,
    connection_args={"uri": URI},
    index_params={"index_type": "FLAT", "metric_type": "L2"},
)

ConnectionConfigException: <ConnectionConfigException: (code=1, message=milvus-lite is required for local database connections. Please install it with: pip install pymilvus[milvus_lite])>

In [12]:
from pymilvus import connections
print("Milvus Lite installed successfully!")

Milvus Lite installed successfully!


In [14]:
URI = "milvus_example.db"

vector_store = Milvus(
    embedding_function=embeddings,
    connection_args={"uri": URI},
    index_params={"index_type": "FLAT", "metric_type": "L2"},
)

ConnectionConfigException: <ConnectionConfigException: (code=1, message=milvus-lite is required for local database connections. Please install it with: pip install pymilvus[milvus_lite])>