In [3]:
import os
import glob
from dotenv import load_dotenv, find_dotenv
import gradio as gr

from langchain.document_loaders import DirectoryLoader, TextLoader
from langchain.text_splitter import CharacterTextSplitter


In [4]:
load_dotenv(find_dotenv(), override=True)

True

In [None]:
folders = glob.glob("knowledge-base/*")

text_loader_kwargs = {'encoding': 'utf-8'}
# If that doesn't work, some Windows users might need to uncomment the next line instead
# text_loader_kwargs={'autodetect_encoding': True}

documents = []
for folder in folders:
    doc_type = os.path.basename(folder)
    loader = DirectoryLoader(folder, glob="**/*.md", loader_cls=TextLoader, loader_kwargs=text_loader_kwargs)
    folder_docs = loader.load()
    for doc in folder_docs:
        doc.metadata["doc_type"] = doc_type
        documents.append(doc)


In [6]:
len(documents)

31

In [10]:
documents[3].page_content

"# Contract with Apex Reinsurance for Rellm: AI-Powered Enterprise Reinsurance Solution\n\n## Terms\n\n1. **Parties Involved**: This contract (“Agreement”) is entered into between Insurellm, Inc. (“Provider”) and Apex Reinsurance (“Client”) on this [Date].\n\n2. **Scope of Services**: Provider agrees to deliver the Rellm solution, which includes AI-driven analytics, seamless integrations, risk assessment modules, customizable dashboards, regulatory compliance tools, and client and broker portals as described in the product summary.\n\n3. **Payment Terms**: Client shall pay the Provider the sum of $10,000 per month for the duration of this agreement. Payments are due on the first day of each month and will be processed via electronic funds transfer.\n\n4. **Contract Duration**: This Agreement shall commence on [Start Date] and shall remain in effect for a period of twelve (12) months unless terminated earlier in accordance with the terms set forth herein.\n\n## Renewal\n\n1. **Automatic

In [11]:
text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
chunks = text_splitter.split_documents(documents)

Created a chunk of size 1088, which is longer than the specified 1000


In [12]:
len(chunks)

123

In [15]:
chunks[120].page_content

'### Regulatory Compliance Tools\nRellm includes built-in compliance tracking features to help organizations meet local and international regulatory standards. This ensures that reinsurance practices remain transparent and accountable.\n\n### Client and Broker Portals\nRellm offers dedicated portals for both clients and brokers, facilitating real-time communication and documentation sharing. This strengthens partnerships and drives operational excellence across the board.\n\n## Pricing\n\nInsurellm offers flexible pricing plans for Rellm to cater to various business needs:\n\n- **Basic Plan**: $5,000/month\n  - Includes access to core features and standard integrations.\n  \n- **Professional Plan**: $10,000/month\n  - Includes all features, advanced integrations, and priority customer support.\n  \n- **Enterprise Plan**: Custom pricing\n  - Tailored solutions with personalized features, extensive integrations, and dedicated account management.'

In [16]:
doc_types = set(chunk.metadata['doc_type'] for chunk in chunks)
print(f"Document types found: {', '.join(doc_types)}")

Document types found: products, employees, company, contracts


In [20]:
for chunk in chunks:
    if 'CEO' in chunk.page_content:
        print(chunk)
        print("-" * 40)

page_content='3. **Regular Updates:** Insurellm will offer ongoing updates and enhancements to the Homellm platform, including new features and security improvements.

4. **Feedback Implementation:** Insurellm will actively solicit feedback from GreenValley Insurance to ensure Homellm continues to meet their evolving needs.

---

**Signatures:**

_________________________________  
**[Name]**  
**Title**: CEO  
**Insurellm, Inc.**

_________________________________  
**[Name]**  
**Title**: COO  
**GreenValley Insurance, LLC**  

---

This agreement represents the complete understanding of both parties regarding the use of the Homellm product and supersedes any prior agreements or communications.' metadata={'source': 'knowledge-base\\contracts\\Contract with GreenValley Insurance for Homellm.md', 'doc_type': 'contracts'}
----------------------------------------
page_content='## Support

1. **Customer Support**: Velocity Auto Solutions will have access to Insurellm’s customer support te