In [4]:
from langchain_community.document_loaders import PyPDFLoader
from langchain_core.documents import Document
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_google_genai import GoogleGenerativeAIEmbeddings
from langchain_chroma import Chroma
from dotenv import load_dotenv
from typing import List
import time
import os

load_dotenv()
base_url = os.getenv("BASE_URL")
api_key= os.getenv("API_KEY")
google_api_key = os.getenv("GOOGLE_API_KEY")

In [5]:
embeddings = GoogleGenerativeAIEmbeddings(
    google_api_key=google_api_key,
    model='models/embedding-001'
)

In [3]:
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=1000,
    chunk_overlap=600,
    length_function=len,
    add_start_index=True
)

In [4]:
PATH="./DataCo Global Policy Dataset"

In [5]:
def load_pdf(filename: str) -> List[Document]:
    """Load all PDF files from a given directory."""
    PATH="./DataCo Global Policy Dataset"
    documents = []
    if filename.endswith(".pdf"):
        loader = PyPDFLoader(os.path.join(PATH, filename))
        documents.extend(loader.load())
    return documents 

In [6]:
def load_pdfs_from_directory(directory: str) -> List[Document]:
    for pdf_file in os.listdir(directory):
        documents = []
        if not pdf_file.endswith(".pdf"):
            continue
        documents.extend(load_pdf(pdf_file))
        name,_ = os.path.splitext(pdf_file)
        # Split the documents into chunks
        collection=Chroma(collection_name=name.replace(' ','-'),persist_directory='./chroma_db',embedding_function=embeddings)
        chunk_documents = text_splitter.split_documents(documents)
        collection.add_documents(chunk_documents)
        print(f"Added {len(documents)} documents to collection '{name}'")

In [None]:
# load_pdfs_from_directory(PATH)

Added 5 documents to collection 'Anti-Counterfeit and Product Authenticity Policy'
Added 2 documents to collection 'Circular Economy'
Added 2 documents to collection 'COC'
Added 4 documents to collection 'Communication and Crisis Management Policy for DataCo Global'
Added 2 documents to collection 'Continuous Improvement'
Added 2 documents to collection 'Cost Reduction'
Added 2 documents to collection 'Data Security'
Added 4 documents to collection 'DataCo Global Capacity Planning Policy'
Added 3 documents to collection 'Dataco Global Change Management Policy for Supply Chain Processes'
Added 4 documents to collection 'DataCo Global Contract Management and Negotiation Policy'
Added 4 documents to collection 'Dataco Global Order Management Policy'
Added 4 documents to collection 'Dataco Global Transportation and Logistics Policy'
Added 4 documents to collection 'DataCo Global Warehouse and Storage Policy'
Added 4 documents to collection 'Dataco Global_ Demand Forecasting and Planning Po

In [None]:
from langchain_chroma import Chroma

def retriever_tool(query:str,collection_name:str) -> str:
    db=Chroma(collection_name=collection_name,persist_directory='./chroma_db',embedding_function=embeddings)
    results=db.search(query,search_type='similarity',k=3)
    content=''
    for result in results:
        content+=result.page_content
    return content

In [7]:
result=retriever_tool("Tell me the purpose of data global adoption policy",collection_name="Circular-Economy")

In [21]:
print(result)

Dataco Global Technology Adoption Policy
Purpose
This policy establishes clear standards for the adoption and management of emerging technologies
(including Internet of Things (IoT), Blockchain, and related innovations) within Dataco Global. It
ensures all technology use aligns with company objectives, emphasizes security, safeguards data,
and supports operational excellence through measurable controls and responsibilities.
Scope
This policy applies to all employees, contractors, partners, and third-party service providers
involved in the selection, deployment, management, and operation of IoT, Blockchain, and similar
technologies within Dataco Globalʼs business processes.
1. Technology Evaluation and Approval
All new technology projects must undergo a standardized evaluation, including risk
assessment and Return on Investment (ROI) analysis, before approval.
A minimum of two formal pilot tests, each covering at least 10% of the relevant operationalScope
This policy applies to all empl