In [20]:
import sys
sys.path.append('..')
sys.path.append('../..')

In [21]:
from os.path import join as pjoin

from qdrant_client import QdrantClient

In [22]:
collection_type = 'general'
root = '/home/quamer23nasim38/Role-Based-Access-Control-of-Qdrant-Vector-Database/data'
data_path = pjoin(root, collection_type)

chunk_size = 500
chunk_overlap = 50
batch_size = 4000
vector_size = 300

api = 'jhvfegfeboihf313fekfgejbv' # 'your_api_key'
host = 'localhost'
port = 6333
url = f'http://{host}:{port}'

collection_name = collection_type

embedding_model_path = '/home/quamer23nasim38/Role-Based-Access-Control-of-Qdrant-Vector-Database/embedding_model/cc.en.300.bin'

In [23]:
from langchain_community.document_loaders import DirectoryLoader
# from langchain_community.document_loaders import TextLoader
from langchain_community.document_loaders.pdf import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter

# Load the documents from the directory
loader = DirectoryLoader(data_path, loader_cls=PyPDFLoader)

# Split the documents into chunks
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=chunk_size,
    chunk_overlap=chunk_overlap,
    length_function=len,
    is_separator_regex=False,
)
docs = loader.load_and_split(text_splitter=text_splitter)

In [24]:
import fasttext as ft

embed_model = ft.load_model(embedding_model_path)



In [25]:
from utils import generate_embeddings_from_fastext_model
df = generate_embeddings_from_fastext_model(docs, embed_model)

  0%|          | 0/16 [00:00<?, ?it/s]

# 1. Without any token, Though RBAC is enabled

In [7]:
from qdrant_client import QdrantClient

client = QdrantClient(url=url)

client.get_collections()

UnexpectedResponse: Unexpected Response: 401 (Unauthorized)
Raw response content:
b'Must provide an API key or an Authorization bearer token'

# 2. Global Read-Ony Access

With global read-only access, the user can only read the resources in the cluster. They cannot create, update, or delete resources. This essentially means that the user can read all the collections available, so be careful when granting this permission.

In [8]:
from utils import generate_jwt
from utils import create_new_collection

In [9]:
import time

current_time = int(time.time())

payload = {
  "access": "r",
  "exp": current_time + 3600, # 1 hour
}

jwt = generate_jwt(api, payload)

In [10]:
client = QdrantClient(url=url, api_key=jwt)

client.get_collections()



CollectionsResponse(collections=[])

In [11]:
client = QdrantClient(url=url, api_key=jwt)

# Delete the collection if it exists
client.delete_collection(collection_name=collection_name)

UnexpectedResponse: Unexpected Response: 403 (Forbidden)
Raw response content:
b'{"status":{"error":"Forbidden: Global manage access is required"},"time":0.000023168}'

# 3. Global Manage Access

With Global Manage Access, the user can read, create, update, and delete collections in the cluster. This essentially means that the user can perform all the operations on all the collections available, so be extremely careful when granting this permission. Only grant this permission to Admins.

In [37]:
import time

current_time = int(time.time())

payload = {
  "access": "m",
  "exp": current_time + 3600, # 1 hour
}

jwt = generate_jwt(api, payload)

In [38]:
from utils import create_new_collection

In [26]:
from qdrant_client import QdrantClient
from qdrant_client.models import VectorParams, Distance, Batch

def create_new_collection(url, jwt, collection_name, df, vector_size, batch_size, delete_prev = False, create_from_scratch = False):

    '''
    This function creates a new collection in Qdrant Vector Database
    and updates the collection with the embeddings

    It starts by creating a connection to the Qdrant Vector Database running using the docker
    Then it deletes the collection if it already exists
    Then it creates a new collection with the specified collection name and vector size
    Then it updates the collection with the embeddings
    Finally, it closes the connection to the Qdrant Vector Database and returns the client object

    Args:
    url: URL of the Qdrant Vector Database
    jwt: JWT token
    collection_name: Name of the collection
    df: Dataframe with the documents, embeddings, metadata and payload

    Returns:
    client: QdrantClient object
    '''

    # Create a QdrantClient object
    # client = QdrantClient('https://localhost:6333')
    client = QdrantClient(url=url, api_key = jwt)

    # delete the collection if it already exists
    # remove or comment this line if you want to keep the existing collection
    # and want to use the existing collection to update new points
    if delete_prev:
        client.delete_collection(collection_name=collection_name)

    # Create a fresh collection in Qdrant
    # remove or comment this line if you do not want to create a new collection
    if create_from_scratch:
        client.create_collection(
        collection_name=collection_name,
        vectors_config=VectorParams(size=vector_size, distance=Distance.COSINE),
        )

    # Update the Qdrant Vector Database with the embeddings
    # We are updating the embeddings in batches
    # Since the data is large, we will only update the first batch of size 4000
    client.upsert(
    collection_name=collection_name,
    points=Batch(
        ids=df['id'].to_list()[:batch_size],
        payloads=df['payload'][:batch_size],
        vectors=df['embeddings'].to_list()[:batch_size],
    ),
    )

    # Close the QdrantClient
    client.close()

    print(f"Collection {collection_name} created and updated with the embeddings")

In [28]:
create_new_collection(url, jwt, collection_name, df, vector_size, batch_size, delete_prev = False, create_from_scratch = False)

Collection general created and updated with the embeddings




# 4. Collection Specific Access

With this access, we can limit the access of the user to a specific collection only. This is the most secure way to grant access to the user. We can also limit the access to the types of documents in that collection. Let's see how we can do this.

In our Qdrant Vector Database, we two collections, 'general' and 'financial'.

## 4.1 Read-Only Access

In [7]:
from utils import generate_jwt
from utils import create_new_collection

In [8]:
import time

current_time = int(time.time())

payload = {
  "exp": current_time + 3600, # 1 hour
  "access": [
    {
      "collection": "general",
      "access": "r"
    },
    # To give access to the financial collection, uncomment the following dictionaries and comment the above dictionaries
    # If you want to give access to both collections, then keep both dictionaries
    # {
    #   "collection": 'financial',
    #   "access": "r"
    # }
  ]
}

jwt = generate_jwt(api, payload)

In [9]:
client = QdrantClient(url=url, api_key=jwt)

collection = client.get_collections()



In [21]:
import numpy as np
from qdrant_client.models import PointStruct

vectors = np.random.rand(100, vector_size)
client.upsert(
   collection_name="general",
   points=[
      PointStruct(
            id=idx,
            vector=vector.tolist(),
            payload={"color": "red", "rand_number": idx % 10}
      )
      for idx, vector in enumerate(vectors)
   ]
)

UnexpectedResponse: Unexpected Response: 403 (Forbidden)
Raw response content:
b'{"status":{"error":"Forbidden: Write access to collection general is required"},"time":0.000079842}'

In [23]:
query_vector = np.random.rand(vector_size)
hits = client.search(
   collection_name="general",
   query_vector=query_vector,
   limit=5  # Return 5 closest points
)
hits

[ScoredPoint(id=25, version=0, score=0.09321264, payload={'metadata': {'page': 9, 'source': '/home/quamer23nasim38/Role-Based-Access-Control-of-Qdrant-Vector-Database/data/general/avengers-endgame-script-pdf.pdf'}, 'page_content': 'him, you put this on, and hide. (drops to his knees) I’m fine! Tony COLLAPSES.  Rhodey catches him.8'}, vector=None, shard_key=None),
 ScoredPoint(id=246, version=0, score=0.06581362, payload={'metadata': {'page': 91, 'source': '/home/quamer23nasim38/Role-Based-Access-Control-of-Qdrant-Vector-Database/data/general/avengers-endgame-script-pdf.pdf'}, 'page_content': 'TONY I got it!  There’s another way to  re-take the Tesseract, and acquire  new particles.   (to Steve) Military installation, Garden  State. Steve eyes Tony, BEGINNING TO UNDERSTAND.90'}, vector=None, shard_key=None),
 ScoredPoint(id=217, version=0, score=0.061686475, payload={'metadata': {'page': 80, 'source': '/home/quamer23nasim38/Role-Based-Access-Control-of-Qdrant-Vector-Database/data/genera

In [24]:
query_vector = np.random.rand(vector_size)
hits = client.search(
   collection_name="financial",
   query_vector=query_vector,
   limit=5  # Return 5 closest points
)
hits

UnexpectedResponse: Unexpected Response: 403 (Forbidden)
Raw response content:
b'{"status":{"error":"Forbidden: Access to collection financial is required"},"time":7.61e-6}'

## 4.2 Read-Writes Access

Here we will grant the user read-write access to the 'general' collection only. And on top of that, we will limit the access of the 'financial' collection to read-only.

In [8]:
from utils import generate_jwt
from utils import create_new_collection

In [31]:
import time

current_time = int(time.time())

payload = {
  "exp": current_time + 3600, # 1 hour
  "access": [
    {
      "collection": "general",
      "access": "rw"
    },
    # To give access to the financial collection, uncomment the following dictionaries and comment the above dictionaries
    # If you want to give access to both collections, then keep both dictionaries
    {
      "collection": 'financial',
      "access": "r"
    }
  ]
}

jwt = generate_jwt(api, payload)

In [10]:
client = QdrantClient(url=url, api_key=jwt)

collection = client.get_collections()
collection



CollectionsResponse(collections=[CollectionDescription(name='financial'), CollectionDescription(name='general')])

In [29]:
import numpy as np
from qdrant_client.models import PointStruct

vectors = np.random.rand(100, vector_size)
client.upsert(
   collection_name="general",
   points=[
      PointStruct(
            id=idx,
            vector=vector.tolist(),
            payload={"color": "red", "rand_number": idx % 10}
      )
      for idx, vector in enumerate(vectors)
   ]
)

UpdateResult(operation_id=1, status=<UpdateStatus.COMPLETED: 'completed'>)

In [30]:
import numpy as np
from qdrant_client.models import PointStruct

vectors = np.random.rand(100, vector_size)
client.upsert(
   collection_name="financial",
   points=[
      PointStruct(
            id=idx,
            vector=vector.tolist(),
            payload={"color": "red", "rand_number": idx % 10}
      )
      for idx, vector in enumerate(vectors)
   ]
)

UnexpectedResponse: Unexpected Response: 403 (Forbidden)
Raw response content:
b'{"status":{"error":"Forbidden: Write access to collection financial is required"},"time":0.000062905}'

In [35]:
x = 'take the security of Qdrant code'
query_vector = embed_model.get_sentence_vector(x).tolist()

hits = client.search(
   collection_name="general",
   query_vector=query_vector,
   limit=20  # Return 5 closest points
)
hits

[ScoredPoint(id=10, version=2, score=0.8121032, payload={'metadata': {'page': 1, 'source': '/home/quamer23nasim38/Role-Based-Access-Control-of-Qdrant-Vector-Database/data/general/security_policy.pdf'}, 'page_content': 'within the Qdrant Cloud platform, and only the necessary ports are opened on each server. All outbound connections pass through the stateless access control rules, whilst inbound connections from the internet must pass through a secure, highly-available load balancer layer, and the stateless access control firewall rules before then being routed to each server. Software security We take the security of the Qdrant code very seriously. The database is built using the Rust language - a static'}, vector=None, shard_key=None),
 ScoredPoint(id=5, version=2, score=0.7998578, payload={'metadata': {'page': 0, 'source': '/home/quamer23nasim38/Role-Based-Access-Control-of-Qdrant-Vector-Database/data/general/security_policy.pdf'}, 'page_content': 'into account the impact of company 

# 5. Document Specific Access

In [40]:
docs[0]

Document(page_content='Qdrant\nSecurity\nPolicy\nWe\nunderstand\nhow\ncritical\ndata\nprotection\nis,\nand\nwe\ntake\nthe\nsecurity\nof\nQdrant\ncode,\nsoftware,\nand\ncloud\nplatform\nvery\nseriously.\nIf\nyou\nbelieve\nyou\nhave\nfound\na\nsecurity\nvulnerability\nin\nQdrant,\nwe\nencourage\nyou\nto\nlet\nus\nknow\nimmediately.\nWe\nwill\ninvestigate\nall\nlegitimate\nreports\nand\ndo\nour\nbest\nto\nfix\nthe\nproblem\nfast.\nNeed\nto\nreport\na\nvulnerability?\nWe\nwould\nlike\nto\nkeep\nQdrant\nsafe\nand\nsecure\nfor\neveryone.\nPlease\nreport\nany\nissues\nor\nvulnerabilities\nto', metadata={'source': '/home/quamer23nasim38/Role-Based-Access-Control-of-Qdrant-Vector-Database/data/general/security_policy.pdf', 'page': 0})

In [73]:
import time

current_time = int(time.time())

payload = {
  "exp": current_time + 3600, # 1 hour
  "access": [
    {
      "collection": "general",
      "access": "rw",
      "payload": {
        "metadata.source": "/home/quamer23nasim38/Role-Based-Access-Control-of-Qdrant-Vector-Database/data/general/security_policy.pdf",
        "metadata.page": 1
      }
    },
  ]
}

jwt = generate_jwt(api, payload)

In [75]:
client = QdrantClient(url=url, api_key=jwt)

x = 'take the security of Qdrant code'
query_vector = embed_model.get_sentence_vector(x).tolist()

hits = client.search(
   collection_name="general",
   query_vector=query_vector,
   limit=20  # Return 5 closest points
)
hits

[ScoredPoint(id=10, version=2, score=0.8121032, payload={'metadata': {'page': 1, 'source': '/home/quamer23nasim38/Role-Based-Access-Control-of-Qdrant-Vector-Database/data/general/security_policy.pdf'}, 'page_content': 'within the Qdrant Cloud platform, and only the necessary ports are opened on each server. All outbound connections pass through the stateless access control rules, whilst inbound connections from the internet must pass through a secure, highly-available load balancer layer, and the stateless access control firewall rules before then being routed to each server. Software security We take the security of the Qdrant code very seriously. The database is built using the Rust language - a static'}, vector=None, shard_key=None),
 ScoredPoint(id=8, version=2, score=0.76660895, payload={'metadata': {'page': 1, 'source': '/home/quamer23nasim38/Role-Based-Access-Control-of-Qdrant-Vector-Database/data/general/security_policy.pdf'}, 'page_content': 'All servers are tested for vulnera

In [78]:
import time

current_time = int(time.time())

payload = {
  "exp": current_time + 3600, # 1 hour
  "value_exists": {
    "collection": "general",
    "matches": [
      { "key": "metadata.source", "value": "/home/quamer23nasim38/Role-Based-Access-Control-of-Qdrant-Vector-Database/data/general/avengers-endgame-script-pdf.pdf" }
    ]
  },
  "access": [
    {
      "collection": "general",
      "access": "rw",
    },
  ]
}

jwt = generate_jwt(api, payload)

In [79]:
client = QdrantClient(url=url, api_key=jwt)

x = 'take the security of Qdrant code'
query_vector = embed_model.get_sentence_vector(x).tolist()

hits = client.search(
   collection_name="general",
   query_vector=query_vector,
   limit=20  # Return 5 closest points
)
hits

[ScoredPoint(id=10, version=2, score=0.8121032, payload={'metadata': {'page': 1, 'source': '/home/quamer23nasim38/Role-Based-Access-Control-of-Qdrant-Vector-Database/data/general/security_policy.pdf'}, 'page_content': 'within the Qdrant Cloud platform, and only the necessary ports are opened on each server. All outbound connections pass through the stateless access control rules, whilst inbound connections from the internet must pass through a secure, highly-available load balancer layer, and the stateless access control firewall rules before then being routed to each server. Software security We take the security of the Qdrant code very seriously. The database is built using the Rust language - a static'}, vector=None, shard_key=None),
 ScoredPoint(id=5, version=2, score=0.7998578, payload={'metadata': {'page': 0, 'source': '/home/quamer23nasim38/Role-Based-Access-Control-of-Qdrant-Vector-Database/data/general/security_policy.pdf'}, 'page_content': 'into account the impact of company 

In [None]:
# docker run -p 6333:6333 -v /home/quamer23nasim38/Role-Based-Access-Control-of-Qdrant-Vector-Database/:/qdrant/storage -v /home/quamer23nasim38/Role-Based-Access-Control-of-Qdrant-Vector-Database/config.yaml:/qdrant/config/config.yaml qdrant/qdrant:v1.9.2