In [1]:
import re
import fitz 

In [2]:
class Node:
    def __init__(self, data):
        self.data = data
        self.prev = None
        self.next = None

In [3]:
class DoublyLinkedList:
    def __init__(self):
        self.head = None
        self.tail = None

    def append(self, data):
        new_node = Node(data)
        if self.tail:
            self.tail.next = new_node
            new_node.prev = self.tail
            self.tail = new_node
        else:
            self.head = self.tail = new_node

    def traverse_forward(self):
        current = self.head
        while current:
            print("----------- Node Start -----------")
            print(current.data)
            print("------------ Node End ------------\n")
            current = current.next

    def traverse_backward(self):
        current = self.tail
        while current:
            print("----------- Node Start -----------")
            print(current.data)
            print("------------ Node End ------------\n")
            current = current.prev


In [4]:
def extract_text_from_pdf(pdf_path):
    doc = fitz.open(pdf_path)
    content = ""
    for page in doc:
        content += page.get_text()
    return content


In [5]:
def split_into_paragraphs(text):
    paragraphs = re.split(r"\n\s*\n", text)
    paragraphs = [p.strip() for p in paragraphs if p.strip()]
    return paragraphs


In [6]:
def pdf_to_doubly_linked_list(pdf_path):
    text = extract_text_from_pdf(pdf_path)
    paragraphs = split_into_paragraphs(text)
    dll = DoublyLinkedList()
    for paragraph in paragraphs:
        dll.append(paragraph)

    return dll


In [7]:
if __name__ == "__main__":
    pdf_path = r"C:\Users\SNEHA\Downloads\CompanyLease Vehicle FAQ.pdf"

    doubly_linked_list = pdf_to_doubly_linked_list(pdf_path)

    print("Traversing forward:")
    doubly_linked_list.traverse_forward()

    print("\nTraversing backward:")
    doubly_linked_list.traverse_backward()


Traversing forward:
----------- Node Start -----------
Reliance Policy: RJ-15-V1-05 
        Company Lease Vehicle Policy
------------ Node End ------------

----------- Node Start -----------
Reliance Jio-Internal                                                   Page 1 of 5
------------ Node End ------------

----------- Node Start -----------
Frequently Asked Questions: Company Lease Vehicle Policy
------------ Node End ------------

----------- Node Start -----------
1. What is my eligibility to apply for the vehicle? 
As per Policy, EMI value shall be limited up to 50% of the Residual Choice Pay at the time of 
application & CIBIL score should be greater than 700. 
2. Whom should I contact, to buy a vehicle under the new CLV plan and how do I apply for the 
vehicle? 
An employee needs to touch base with Vehicle Leasing Company for the necessary information and 
also to obtain quotes for the vehicle selected.  
As a second step, you are required to apply through ESS as per the quot

In [8]:
from qdrant_client import QdrantClient
from qdrant_client.models import PointStruct, VectorParams, Distance
from sentence_transformers import SentenceTransformer

  from .autonotebook import tqdm as notebook_tqdm


In [9]:
def store_paragraphs_in_qdrant(paragraphs):
    client = QdrantClient(host="localhost", port=6333)  # Or use cloud API key and URL
    model = SentenceTransformer('all-MiniLM-L6-v2')
    client.recreate_collection(
        collection_name="pdf_paragraphs",
        vectors_config=VectorParams(size=384, distance=Distance.COSINE)  # 384 is embedding size
    )

    embeddings = model.encode(paragraphs)
    points = [
        PointStruct(
            id=i,
            vector=embeddings[i],
            payload={"text": paragraphs[i], "position": i}
        )
        for i in range(len(paragraphs))
    ]
    client.upsert(collection_name="pdf_paragraphs", points=points)

    print("Paragraphs stored successfully!")
