In [1]:
ROLE_HIERARCHY = {
    "C-Level": ["engineering", "finance", "hr", "marketing", "general"],
    "HR": ["hr", "general"],
    "Finance": ["finance"],
    "Engineering": ["engineering"],
    "Marketing": ["marketing"]
}

In [2]:
def normalize_role(role: str):
    role = role.strip().lower()

    ROLE_ALIASES = {
        "c-level": "C-Level",
        "clevel": "C-Level",
        "engineering": "Engineering",
        "finance": "Finance",
        "hr": "HR",
        "marketing": "Marketing",
        "general": "General"
    }

    return ROLE_ALIASES.get(role)


In [3]:
import re

def normalize_query(query: str):
    query = query.lower()
    query = re.sub(r"[^\w\s]", "", query)  
    query = re.sub(r"\s+", " ", query).strip()
    return query


In [4]:
def build_role_filter(user_role: str):
    role = normalize_role(user_role)

    if role not in ROLE_HIERARCHY:
        raise ValueError(f"Invalid role: {user_role}")

    return {
        "department": {
            "$in": ROLE_HIERARCHY[role]
        }
    }

In [5]:
def role_based_search(query, user_role, top_k=5):
    clean_query = normalize_query(query)
    query_embedding = model.encode([clean_query]).tolist()

    role = normalize_role(user_role)
    allowed_departments = ROLE_HIERARCHY[role]

    results = collection.query(
        query_embeddings=query_embedding,
        n_results=top_k * 2 
    )

    filtered_docs = []
    filtered_metas = []

    for doc, meta in zip(results["documents"][0], results["metadatas"][0]):
        if meta.get("department") in allowed_departments:
            filtered_docs.append(doc)
            filtered_metas.append(meta)

    return {
        "documents": [filtered_docs[:top_k]],
        "metadatas": [filtered_metas[:top_k]]
    }

In [6]:
from sentence_transformers import SentenceTransformer

model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")

  from .autonotebook import tqdm as notebook_tqdm


In [7]:
import chromadb

client = chromadb.PersistentClient(
    path=r"C:\Users\prath\OneDrive\Desktop\Infosys-project\chroma_db"
)

collection = client.get_collection("company_documents")

print("Count:", collection.count())

Count: 271


In [8]:
results = role_based_search(
    query="microservices scalability",
    user_role="Engineering",
    top_k=3
)

for doc in results["documents"][0]:
    print(doc[:120], "...\n")

FinSolve Technologies Engineering Document 2.4.1 Horizontal Scaling * Kubernetes Horizontal Pod Autoscaler (HPA) automat ...

FinSolve Technologies Engineering Document 2.3.3 Microservices * **Authentication Service**: Manages user identity, auth ...

FinSolve Technologies Engineering Document 2.5.4 Data Consistency * Event sourcing patterns for critical financial trans ...



In [9]:
results = role_based_search(
    query="employee leave policy",
    user_role="Finance",
    top_k=3
)

print("Results returned:", len(results["documents"][0]))


Results returned: 0


In [10]:
results = role_based_search(
    query="employee benefits and payroll",
    user_role="C-Level",
    top_k=3
)

for meta in results["metadatas"][0]:
    print(meta["department"])


general
general
general


In [11]:
test_cases = [
    ("Finance", "leave policies", 0),
    ("HR", "leave policies", 1),
    ("Engineering", "leave policies", 0),
    ("C-Level", "employee handbook policies", 1)
]

for role, query, expected_min in test_cases:
    res = role_based_search(query, role)
    actual = len(res["documents"][0])

    print(f"Role: {role}")
    print(f"Query: {query}")
    print(f"Results: {actual}")

    if expected_min == 0:
        print("PASS" if actual == 0 else "FAIL")
    else:
        print("PASS" if actual >= expected_min else "FAIL")

    print("-" * 50)


Role: Finance
Query: leave policies
Results: 0
PASS
--------------------------------------------------
Role: HR
Query: leave policies
Results: 5
PASS
--------------------------------------------------
Role: Engineering
Query: leave policies
Results: 0
PASS
--------------------------------------------------
Role: C-Level
Query: employee handbook policies
Results: 5
PASS
--------------------------------------------------
