Modern AI Pro: Building Auth Levels on RAG Queries

In [106]:
# Sample documents for XYZ Company
documents = [
    "XYZ Company is a leading provider of innovative technology solutions with a focus on AI and ML. The company operates in over 30 countries and employs over 5,000 professionals.",
    "Internally, XYZ Company emphasizes employee development, with over 20% of the workforce participating in ongoing training each year. Recent internal reports also highlight a push towards sustainability goals.",
    "XYZ is focusing on reducing its carbon footprint by 30% over the next five years as part of its sustainability push. This is a confidential strategy document outlining key initiatives.",
    "Office server password is highly secured and that is Password@133"
]

# Define metadata for each document (auth_level, category, year)
metadata = [
    {"auth_level": 1, "category": "Public Summary", "year": "2023"},
    {"auth_level": 2, "category": "Internal Report", "year": "2023"},
    {"auth_level": 3, "category": "Confidential Strategy", "year": "2023"},
    {"auth_level": 3, "category": "Confidential Strategy", "year": "2023"}
]


In [None]:
documents_vectors = model.encode(documents)
# print(documents_vectors)
# print(type(documents_vectors))
documents_vectors = np.array(documents_vectors).astype('float32')

<class 'numpy.ndarray'>


In [108]:
index = faiss.IndexFlatL2(documents_vectors.shape[1])
index.add(documents_vectors)

In [109]:
def vector_search_with_rbac(query_text, user_auth_level, k=3):
    if not isinstance(user_auth_level, int) or user_auth_level < 0:
        raise ValueError("Invalid user_auth_level. Must be Integer or it should greater than zero")
    
    query_vector = model.encode([query_text]).astype('float32')

    D, I = index.search(query_vector, k)

    results = []
    for i, idx in enumerate(I[0]):
        if metadata[idx]["auth_level"]<= user_auth_level:
            results.append({
                "documents":documents[idx],
                "metadata":metadata[idx],
                # "distance":D[0][i]
            })
    return results

In [110]:
# Example usage for a public user (auth_level 1)
query_text = "Comapny server password"
user_auth_level = 1  # Public user
results = vector_search_with_rbac(query_text, user_auth_level)
print("Results for User A (auth_level 1):", results)

# Example usage for an internal user (auth_level 2)
user_auth_level = 2  # Internal user
results = vector_search_with_rbac(query_text, user_auth_level)
print("Results for User B (auth_level 2):", results)

# Example usage for a confidential user (auth_level 3)
user_auth_level = 3  # Confidential user
results = vector_search_with_rbac(query_text, user_auth_level)
print("Results for User C (auth_level 3):", results)


Results for User A (auth_level 1): [{'documents': 'XYZ Company is a leading provider of innovative technology solutions with a focus on AI and ML. The company operates in over 30 countries and employs over 5,000 professionals.', 'metadata': {'auth_level': 1, 'category': 'Public Summary', 'year': '2023'}}]
Results for User B (auth_level 2): [{'documents': 'XYZ Company is a leading provider of innovative technology solutions with a focus on AI and ML. The company operates in over 30 countries and employs over 5,000 professionals.', 'metadata': {'auth_level': 1, 'category': 'Public Summary', 'year': '2023'}}, {'documents': 'Internally, XYZ Company emphasizes employee development, with over 20% of the workforce participating in ongoing training each year. Recent internal reports also highlight a push towards sustainability goals.', 'metadata': {'auth_level': 2, 'category': 'Internal Report', 'year': '2023'}}]
Results for User C (auth_level 3): [{'documents': 'Office server password is high