Point to the correct directory

In [4]:
import sys
import os
from dotenv import load_dotenv

# Get the absolute path to the project root (the parent of notebooks folder)
project_root = os.path.abspath(os.path.join(os.getcwd(), "..", ".."))

# Add it to sys.path so Python can find 'src'
if project_root not in sys.path:
    sys.path.insert(0, project_root)

# Now you can import
from src.services.document_operations import DocumentOperations

In [5]:
from src.config import BASE_URL

load_dotenv()

True

Upload Doc

In [7]:
doc_ops = DocumentOperations(
    base_url= BASE_URL,
    client= os.getenv("CLIENT_ID"), # Client ID
    api_key= os.getenv("API_KEY") # API Key
)

doc_id = doc_ops.upload_doc(r"C:\Users\mpalacios\OneDrive - Sompo\Documents\Project Repo\trial_project\data\input\HDFC-Life-Guaranteed-Income-Insurance-Plan-Policy-Bond.pdf")
print(doc_id)

603fa735-c0e0-4f72-ae09-688f223a7201


Get Doc

In [4]:
doc_content = doc_ops.get_doc(doc_id)


Get Doc content as plain text

In [5]:
doc_content_text = doc_ops.extract_text(doc_content)
print(doc_content_text)

HDFC Life Guaranteed Income Insurance Plan – Terms and Conditions  
                      An individual non -participating, non -linked savings life insurance plan  
 Page 1 of 37   
Part A  
<<01 August  2025>> 
<<Policyholder’s Name>>   
<<Policyholder’s Address>>  
<<Policyholder’s Contact Number>>  
 
Dear <<Policyholder’s Name>>,  
 
Sub: Your Policy no. <<>>  
We are glad to inform you that your proposal has been accepted and the HDFC Life Guaranteed Income 
Insurance Plan  (“Policy”) being this Policy, has been issued. We have made every effort to design your Policy 
in a simple format. We have highlighted items of importance so that you may recognise them easily.  
 
Policy document:  
As an evidence of the insurance contract  between HDFC Life Insurance Company Limited and you, the Policy 
is enclosed herewith. Please preserve this document safely and also inform your Nominees about the same. A 
copy of your proposal form submitted by you is enclosed for your information and r

Get Doc content as a paginated JSON

In [6]:
doc_content_json = doc_ops.extract_text_json(doc_content)
print(doc_content_json)

[
  {
    "page_number": 1,
    "page_content": "HDFC Life Guaranteed Income Insurance Plan – Terms and Conditions  \n                      An individual non -participating, non -linked savings life insurance plan  \n Page 1 of 37   \nPart A  \n<<01 August  2025>> \n<<Policyholder’s Name>>   \n<<Policyholder’s Address>>  \n<<Policyholder’s Contact Number>>  \n \nDear <<Policyholder’s Name>>,  \n \nSub: Your Policy no. <<>>  \nWe are glad to inform you that your proposal has been accepted and the HDFC Life Guaranteed Income \nInsurance Plan  (“Policy”) being this Policy, has been issued. We have made every effort to design your Policy \nin a simple format. We have highlighted items of importance so that you may recognise them easily.  \n \nPolicy document:  \nAs an evidence of the insurance contract  between HDFC Life Insurance Company Limited and you, the Policy \nis enclosed herewith. Please preserve this document safely and also inform your Nominees about the same. A \ncopy of your p

In [7]:
doc_tables_json = doc_ops.extract_tables(doc_content)
print(doc_tables_json)

[{'page_number': 4, 'table': [['Name of the Policy', 'HDFC Life Guaranteed Income Insurance Plan', None, None], ['UIN No. of the Product', 'Guaranteed', None, None], ['Policy Number', 'Income\nInsurance Plan', None, None], ['Date of the Proposal', '', None, None], ['Policy Commencement Date', '', None, None], ['Risk Commencement Date', '', None, None], ['Basic Sum Assured/Sum Assured on\nMaturity', '', None, None], ['Auto Vesting if Life Assured is minor', '', None, None], ['Deferment Period:', '', None, None], ['Policy Term :', '', None, None], ['Policy Maturity Date:', '', None, None], ['Premium Amount:', '', None, None], ['Frequency of Premium Payment:', '', None, None], [']\nDue Date of Premium Payment:', '', None, None], ['Premium Payment Term:', '', None, None], ['Date of Last Installment Premium:', '', None, None], ['Guaranteed Income (GI):', '', None, None], ['Payout Timing:', '', None, None], ['GI Installment:', '', None, None], ['Payout Frequency:', '', None, None], ['Payout 

In [None]:
results = doc_ops.semantic_search(
    query="Theft Coverage",
    document_ids=[str(doc_id)]
)

print(results)

For Excels

In [None]:
excel_id = doc_ops.upload_doc(r"C:\Users\mpalacios\OneDrive - Sompo\Documents\Project Repo\trial_project\data\input\Inventory-Records-Sample-Data.xlsx")
excel_content = doc_ops.get_doc(excel_id)
print(excel_content)

b'PK\x03\x04\x14\x00\x06\x00\x08\x00\x00\x00!\x00b\xee\x9dh^\x01\x00\x00\x90\x04\x00\x00\x13\x00\x08\x02[Content_Types].xml \xa2\x04\x02(\xa0\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x0

In [9]:
excel_wb = doc_ops.load_workbook_from_bytes(excel_content)
excel_sheet = doc_ops.get_sheet(excel_wb, sheet_name= "Inventory Records Data")
excel_df = doc_ops.convert_sheet_to_dataframe(excel_sheet)

excel_df.head()

Unnamed: 0,None,None.1,None.2,None.3,None.4,None.5,None.6,None.7,None.8
0,,Excel Sample Data,,,,,,,
1,,,,,,,,,
2,,Inventory Records Data,,,,,,,
3,,,,,,,,,
4,,Product ID,Product Name,Opening \nStock,Purchase/\nStock in,Number of \nUnits Sold,Hand-In-\nStock,Cost Price \nPer Unit (USD),Cost Price\nTotal (USD)
