In [1]:
from qdrant_client import QdrantClient, models

In [2]:
client = QdrantClient(url="localhost:6333")

In [3]:
# membuat collection

client.create_collection(collection_name="my_collection")

True

In [4]:
client.create_collection(collection_name="my_collection")

UnexpectedResponse: Unexpected Response: 409 (Conflict)
Raw response content:
b'{"status":{"error":"Wrong input: Collection `my_collection` already exists!"},"time":0.000258305}'

In [5]:
client.collection_exists(collection_name="my_collection")

True

In [6]:
client.create_collection(collection_name="my_collection_1")

True

In [7]:
client.create_collection(collection_name="my_collection_2")

True

In [9]:
client.get_collections()

CollectionsResponse(collections=[CollectionDescription(name='my_collection_1'), CollectionDescription(name='my_collection'), CollectionDescription(name='my_collection_2')])

In [10]:
# menghapus collection

client.delete_collection(collection_name="my_collection_2")

True

In [11]:
client.get_collections()

CollectionsResponse(collections=[CollectionDescription(name='my_collection_1'), CollectionDescription(name='my_collection')])

---

In [12]:
collection_name = "basic_collection"

In [14]:
client.create_collection(
    collection_name=collection_name,
    vectors_config=models.VectorParams(
        size=4,
        distance=models.Distance.COSINE
    )
)

True

In [16]:
# memasukkan vector atau embedding ke dalam collection

client.upsert(
    collection_name=collection_name,
    points=models.Batch(
        ids = [1,2,3,4,5],
        vectors = [
            [0.9, 0.1, 0.1, 0.1], # Vektor untuk ID 1
            [0.1, 0.9, 0.1, 0.1], # Vektor untuk ID 2
            [0.1, 0.1, 0.9, 0.1], # Vektor untuk ID 3
            [0.1, 0.1, 0.1, 0.9], # Vektor untuk ID 4
            [0.5, 0.5, 0.5, 0.5]  # Vektor untuk ID 5
        ],
        payloads =[
            {"color": "red", "size": 5},
            {"color": "green", "size": 5},
            {"color": "blue", "size": 8},
            {"color": "red", "size": 8},
            {"color": "green", "size": 2}
        ]
    ),
    wait=True
)

UpdateResult(operation_id=0, status=<UpdateStatus.COMPLETED: 'completed'>)

In [17]:
# retrieve sederhana

client.retrieve(
    collection_name=collection_name,
    ids=[1,4],
    with_payload=True,
    with_vectors=True
)

[Record(id=1, payload={'color': 'red', 'size': 5}, vector=[0.9819805, 0.10910895, 0.10910895, 0.10910895], shard_key=None, order_value=None),
 Record(id=4, payload={'color': 'red', 'size': 8}, vector=[0.10910895, 0.10910895, 0.10910895, 0.9819805], shard_key=None, order_value=None)]

In [21]:
# scroll

points, next_offset = client.scroll(
    collection_name=collection_name,
    limit=3,
    with_payload=True,
    with_vectors=True
)

In [22]:
points

[Record(id=1, payload={'color': 'red', 'size': 5}, vector=[0.9819805, 0.10910895, 0.10910895, 0.10910895], shard_key=None, order_value=None),
 Record(id=2, payload={'color': 'green', 'size': 5}, vector=[0.10910895, 0.9819805, 0.10910895, 0.10910895], shard_key=None, order_value=None),
 Record(id=3, payload={'color': 'blue', 'size': 8}, vector=[0.10910895, 0.10910895, 0.9819805, 0.10910895], shard_key=None, order_value=None)]

In [23]:
next_offset

4

In [24]:
# delete points

client.delete(
    collection_name=collection_name,
    points_selector=models.PointIdsList(points=[1,3])
)

UpdateResult(operation_id=1, status=<UpdateStatus.COMPLETED: 'completed'>)

In [25]:
client.scroll(
    collection_name=collection_name,
    with_payload=True,
    with_vectors=True
)

([Record(id=2, payload={'color': 'green', 'size': 5}, vector=[0.10910895, 0.9819805, 0.10910895, 0.10910895], shard_key=None, order_value=None),
  Record(id=4, payload={'color': 'red', 'size': 8}, vector=[0.10910895, 0.10910895, 0.10910895, 0.9819805], shard_key=None, order_value=None),
  Record(id=5, payload={'color': 'green', 'size': 2}, vector=[0.5, 0.5, 0.5, 0.5], shard_key=None, order_value=None)],
 None)

In [26]:
client.count(collection_name=collection_name, exact=True)

CountResult(count=3)

---

In [27]:
from sentence_transformers import SentenceTransformer

In [28]:
embedding_model = SentenceTransformer('all-MiniLM-L6-v2')

In [29]:
collection_name = "docs_collection"

In [30]:
vector_size = embedding_model.get_sentence_embedding_dimension()

In [31]:
vector_size

384

In [32]:
client.create_collection(
    collection_name=collection_name,
    vectors_config=models.VectorParams(
        size=vector_size,
        distance=models.Distance.COSINE
    )
)

True

In [35]:
# chunks

chunks = [
    {"doc_id": 1, "chunk_id": 1, "title": "Energi Terbarukan di Indonesia", "text": "Indonesia memiliki potensi besar dalam pengembangan energi terbarukan, seperti tenaga surya, air, dan angin."},
    {"doc_id": 1, "chunk_id": 2, "title": "Energi Terbarukan di Indonesia", "text": "Tenaga surya menjadi salah satu sumber energi yang paling menjanjikan karena wilayah Indonesia yang mendapat sinar matahari sepanjang tahun."},
    {"doc_id": 1, "chunk_id": 3, "title": "Energi Terbarukan di Indonesia", "text": "Namun, tantangan utama terletak pada biaya instalasi awal dan kurangnya infrastruktur penyimpanan energi yang efisien."},
    {"doc_id": 1, "chunk_id": 4, "title": "Energi Terbarukan di Indonesia", "text": "Pemerintah terus berupaya meningkatkan investasi dan memperluas program energi bersih untuk mencapai target emisi nol bersih pada 2060."},
    {"doc_id": 1, "chunk_id": 5, "title": "Energi Terbarukan di Indonesia", "text": "Selain itu, kolaborasi antara sektor publik dan swasta menjadi kunci dalam mempercepat transisi menuju sistem energi yang berkelanjutan."},
]

In [36]:
text_to_embed = [chunk['text'] for chunk in chunks]

In [37]:
text_to_embed

['Indonesia memiliki potensi besar dalam pengembangan energi terbarukan, seperti tenaga surya, air, dan angin.',
 'Tenaga surya menjadi salah satu sumber energi yang paling menjanjikan karena wilayah Indonesia yang mendapat sinar matahari sepanjang tahun.',
 'Namun, tantangan utama terletak pada biaya instalasi awal dan kurangnya infrastruktur penyimpanan energi yang efisien.',
 'Pemerintah terus berupaya meningkatkan investasi dan memperluas program energi bersih untuk mencapai target emisi nol bersih pada 2060.',
 'Selain itu, kolaborasi antara sektor publik dan swasta menjadi kunci dalam mempercepat transisi menuju sistem energi yang berkelanjutan.']

In [38]:
embeddings = embedding_model.encode(text_to_embed, show_progress_bar=True)

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

In [40]:
len(embeddings)

5

In [41]:
embeddings[0]

array([ 1.20882709e-02,  4.91678156e-02,  6.95437659e-03,  3.63822393e-02,
       -2.26044599e-02, -3.20659615e-02,  3.64991389e-02,  3.67059186e-03,
       -4.38350774e-02, -1.06898481e-02,  9.06508267e-02, -7.92270340e-03,
       -6.73373714e-02, -1.66618060e-02,  8.65147337e-02, -4.82995696e-02,
        8.47959053e-03, -1.38725638e-02, -7.83105716e-02, -1.03407770e-01,
        1.98251642e-02, -9.24575701e-03, -1.59598906e-02, -2.17539147e-02,
       -2.76801158e-02, -2.74916785e-03,  4.79672439e-02,  3.53917591e-02,
        1.03228390e-01, -3.59152979e-03,  3.49813476e-02,  6.84012100e-02,
        1.58997886e-02,  1.35628479e-02, -9.18238536e-02,  2.88276188e-02,
       -1.98991038e-02, -2.21658535e-02,  3.60191204e-02,  7.93193281e-02,
       -5.44600142e-03, -5.80755062e-02, -3.04742735e-02, -1.64034382e-01,
        5.66822849e-02, -4.52620797e-02, -6.17019050e-02,  9.86786745e-03,
        1.73589513e-02, -1.42219700e-02, -8.63927007e-02,  3.94458696e-02,
       -2.94139478e-02, -

In [42]:
client.upsert(
    collection_name=collection_name,
    points=models.Batch(
        ids=[c['chunk_id'] for c in chunks],
        vectors=embeddings.tolist(),
        payloads=[
            {
                "doc_id":c["doc_id"],
                "title":c["title"],
                "text":c["text"]
            } for c in chunks
        ]
    )
)

UpdateResult(operation_id=0, status=<UpdateStatus.COMPLETED: 'completed'>)

In [43]:
client.scroll(
    collection_name=collection_name,
    with_payload=True,
    with_vectors=True
)

([Record(id=1, payload={'doc_id': 1, 'title': 'Energi Terbarukan di Indonesia', 'text': 'Indonesia memiliki potensi besar dalam pengembangan energi terbarukan, seperti tenaga surya, air, dan angin.'}, vector=[0.012088271, 0.049167816, 0.0069543766, 0.03638224, -0.02260446, -0.03206596, 0.03649914, 0.0036705919, -0.043835077, -0.010689848, 0.09065083, -0.007922703, -0.06733737, -0.016661806, 0.086514734, -0.04829957, 0.0084795905, -0.013872564, -0.07831057, -0.10340777, 0.019825164, -0.009245757, -0.01595989, -0.021753915, -0.027680116, -0.0027491678, 0.047967244, 0.03539176, 0.10322839, -0.0035915298, 0.034981348, 0.06840121, 0.015899789, 0.013562848, -0.09182385, 0.028827619, -0.019899104, -0.022165854, 0.03601912, 0.07931933, -0.0054460014, -0.058075506, -0.030474273, -0.16403438, 0.056682285, -0.04526208, -0.061701905, 0.009867867, 0.017358951, -0.01422197, -0.0863927, 0.03944587, -0.029413948, -0.017977817, 0.019400792, -0.13041411, 0.016751211, -0.023895023, -0.039675876, -0.05548