In [None]:
#pip install "qdrant-client[fastembed]>=1.8.2"

In [None]:
# Import client library
from qdrant_client import QdrantClient

client = QdrantClient(url="http://localhost:6333")

In [None]:
client.set_model("sentence-transformers/all-MiniLM-L6-v2")
# comment this line to use dense vectors only
client.set_sparse_model("prithivida/Splade_PP_en_v1")

In [None]:
if client.collection_exists("startups"):
    client.delete_collection("startups")

In [None]:
if not client.collection_exists("startups"):
    client.create_collection(
        collection_name="startups",
        vectors_config=client.get_fastembed_vector_params(),
        # comment this line to use dense vectors only
        sparse_vectors_config=client.get_fastembed_sparse_vector_params(),
    )

In [None]:
import json

payload_path = "startups_demo.json"
metadata = []
documents = []

with open(payload_path) as fd:
    for line in fd:
        obj = json.loads(line)
        documents.append(obj.pop("description"))
        metadata.append(obj)

In [None]:
from tqdm import tqdm

client.add(
    collection_name="startups",
    documents=documents,
    metadata=metadata,
    ids=tqdm(range(len(documents))),
)

In [None]:
from typing import List
import json
import numpy as np
from qdrant_client import models


def named_vectors(vectors: List[float], sparse_vectors: List[models.SparseVector]) -> dict:
    # make sure to use the same client object as previously
    # or `set_model_name` and `set_sparse_model_name` manually
    dense_vector_name = client.get_vector_field_name()
    sparse_vector_name = client.get_sparse_vector_field_name()
    for vector, sparse_vector in zip(vectors, sparse_vectors):
        yield {
            dense_vector_name: vector,
            sparse_vector_name: models.SparseVector(**sparse_vector),
        }

with open("dense_vectors.npy", "rb") as f:
    vectors = np.load(f)

with open("sparse_vectors.json", "r") as f:
    sparse_vectors = json.load(f)

with open("payload.json", "r",) as f:
    payload = json.load(f)

client.upload_collection(
    "startups", vectors=named_vectors(vectors, sparse_vectors), payload=payload, batch_size=256
)