In [1]:
import uuid
import pandas as pd
from qdrant_client import QdrantClient, models
from sentence_transformers import SentenceTransformer

In [2]:
client = QdrantClient('localhost:6333')
model = SentenceTransformer('all-MiniLM-L6-v2')
collection_name = 'articles_collection'

In [3]:
articles = [
    {
        'doc_id': 'doc-001',
        'text': 'The James Webb Space Telescope has captured stunning new images of the Pillars of Creation, revealing details never seen before. The near-infrared camera provides a clearer view of star formation within the dense clouds of gas and dust.',
        'metadata': {
            'author': 'NASA Official',
            'category': 'Space Exploration',
            'published_date': '2023-10-26T10:00:00Z',
            'word_count': 58,
            'tags': ['space', 'astronomy', 'jwst', 'science'],
            'is_featured': True,
            'editor_rating': 4.8
        }
    },
    {
        'doc_id': 'doc-002',
        'text': 'A recent study published in Nature highlights the critical role of deep-sea currents in regulating global climate. The research suggests that changes in these currents could have significant long-term impacts.',
        'metadata': {
            'author': 'Dr. Anya Sharma',
            'category': 'Climate Science',
            'published_date': '2023-11-05T14:30:00Z',
            'word_count': 45,
            'tags': ['climate', 'oceanography', 'research'],
            'is_featured': True,
            'editor_rating': 4.5
        }
    },
    {
        'doc_id': 'doc-003',
        'text': 'The rise of generative AI is transforming the creative industries. From art to music, algorithms are now capable of producing novel works, sparking a debate about the nature of creativity and ownership.',
        'metadata': {
            'author': 'John Techson',
            'category': 'Artificial Intelligence',
            'published_date': '2024-01-15T09:00:00Z',
            'word_count': 52,
            'tags': ['ai', 'art', 'technology', 'ethics'],
            'is_featured': False,
            'editor_rating': 4.2
        }
    },
    {
        'doc_id': 'doc-004',
        'text': 'Archaeologists in Egypt have unearthed a new tomb near the Saqqara necropolis, believed to belong to a high-ranking official from the New Kingdom period. The tomb contains well-preserved inscriptions and artifacts.',
        'metadata': {
            'author': 'Zahi Hawass Jr.',
            'category': 'Archaeology',
            'published_date': '2022-05-20T12:00:00Z',
            'word_count': 51,
            'tags': ['egypt', 'history', 'discovery'],
            'is_featured': True,
            'editor_rating': 4.9
        }
    },
    {
        'doc_id': 'doc-005',
        'text': 'A breakthrough in battery technology promises to double the lifespan of electric vehicle batteries. The new lithium-sulfur design is cheaper and more environmentally friendly than current models.',
        'metadata': {
            'author': 'Future Innovations Inc.',
            'category': 'Technology',
            'published_date': '2024-02-01T18:00:00Z',
            'word_count': 49,
            'tags': ['ev', 'battery', 'sustainability', 'technology'],
            'is_featured': False,
            'editor_rating': 4.6
        }
    },
    {
        'doc_id': 'doc-006',
        'text': 'The global stock market experienced significant volatility this week amid concerns over inflation and interest rate hikes. Tech stocks were particularly affected, with major indices seeing a downturn.',
        'metadata': {
            'author': 'Financial Times',
            'category': 'Finance',
            'published_date': '2023-03-10T20:00:00Z',
            'word_count': 47,
            'tags': ['stocks', 'finance', 'economy'],
            'is_featured': False,
            'editor_rating': 4.0
        }
    }
]

In [4]:
# Tampilkan sebagai DataFrame untuk verifikasi
df = pd.DataFrame([{
    'doc_id': doc['doc_id'], 
    'text': doc['text'], 
    **doc['metadata']
} for doc in articles])
df

Unnamed: 0,doc_id,text,author,category,published_date,word_count,tags,is_featured,editor_rating
0,doc-001,The James Webb Space Telescope has captured st...,NASA Official,Space Exploration,2023-10-26T10:00:00Z,58,"[space, astronomy, jwst, science]",True,4.8
1,doc-002,A recent study published in Nature highlights ...,Dr. Anya Sharma,Climate Science,2023-11-05T14:30:00Z,45,"[climate, oceanography, research]",True,4.5
2,doc-003,The rise of generative AI is transforming the ...,John Techson,Artificial Intelligence,2024-01-15T09:00:00Z,52,"[ai, art, technology, ethics]",False,4.2
3,doc-004,Archaeologists in Egypt have unearthed a new t...,Zahi Hawass Jr.,Archaeology,2022-05-20T12:00:00Z,51,"[egypt, history, discovery]",True,4.9
4,doc-005,A breakthrough in battery technology promises ...,Future Innovations Inc.,Technology,2024-02-01T18:00:00Z,49,"[ev, battery, sustainability, technology]",False,4.6
5,doc-006,The global stock market experienced significan...,Financial Times,Finance,2023-03-10T20:00:00Z,47,"[stocks, finance, economy]",False,4.0


In [5]:
vector_size = model.get_sentence_embedding_dimension()

client.create_collection(
    collection_name=collection_name,
    vectors_config=models.VectorParams(
        size=vector_size,
        distance=models.Distance.COSINE
    )
)

True

In [6]:
# embedding untuk setiap teks

points = []

for doc in articles:
    embedding = model.encode(doc['text']).tolist()

    # mengkonstruksi point
    point = models.PointStruct(
            id=str(uuid.uuid4()),
            vector=embedding,
            payload={
                'doc_id':doc['doc_id'],
                'text':doc['text'],
                'metadata':doc['metadata']
            }
        )

    points.append(point)

In [7]:
points

[PointStruct(id='f6fd0126-b995-43eb-94db-c7343717bb5f', vector=[-0.043403349816799164, -0.03683122247457504, 0.028144337236881256, 0.016631295904517174, 0.018658576533198357, -0.07528326660394669, -0.03865521028637886, -0.06519640982151031, 0.09313516318798065, 0.03771258518099785, -0.02571381814777851, -0.013056525029242039, -0.031035983934998512, -0.04840204492211342, -0.02195066772401333, -0.014948836527764797, 0.014902135357260704, -0.09827712923288345, -0.031758032739162445, -0.011600219644606113, 0.012308626435697079, -0.014489448629319668, -0.05017843469977379, -0.0045255739241838455, 0.09724754840135574, 0.038792915642261505, -0.030422162264585495, -0.017384149134159088, 0.050874628126621246, -0.08068545907735825, 0.01364954560995102, -0.0011012989562004805, -0.02880227379500866, 0.06014464795589447, -0.015285433270037174, 0.014005789533257484, 0.06808636337518692, -0.029497193172574043, -0.025225646793842316, -0.0397610068321228, -0.034240271896123886, -0.0773262232542038, 0.0

In [8]:
client.upsert(collection_name=collection_name, points=points, wait=True)

UpdateResult(operation_id=0, status=<UpdateStatus.COMPLETED: 'completed'>)

---

In [9]:
client.scroll(
    collection_name=collection_name
)

([Record(id='512de221-7d82-405f-920c-5605e37fd23c', payload={'doc_id': 'doc-003', 'text': 'The rise of generative AI is transforming the creative industries. From art to music, algorithms are now capable of producing novel works, sparking a debate about the nature of creativity and ownership.', 'metadata': {'author': 'John Techson', 'category': 'Artificial Intelligence', 'published_date': '2024-01-15T09:00:00Z', 'word_count': 52, 'tags': ['ai', 'art', 'technology', 'ethics'], 'is_featured': False, 'editor_rating': 4.2}}, vector=None, shard_key=None, order_value=None),
  Record(id='66e798be-4c47-46f7-ba13-1248fb5dbb4b', payload={'doc_id': 'doc-004', 'text': 'Archaeologists in Egypt have unearthed a new tomb near the Saqqara necropolis, believed to belong to a high-ranking official from the New Kingdom period. The tomb contains well-preserved inscriptions and artifacts.', 'metadata': {'author': 'Zahi Hawass Jr.', 'category': 'Archaeology', 'published_date': '2022-05-20T12:00:00Z', 'word_

In [11]:
articles[0]['metadata']

{'author': 'NASA Official',
 'category': 'Space Exploration',
 'published_date': '2023-10-26T10:00:00Z',
 'word_count': 58,
 'tags': ['space', 'astronomy', 'jwst', 'science'],
 'is_featured': True,
 'editor_rating': 4.8}

In [12]:
# MUST (AND)

client.scroll(
    collection_name=collection_name,
    scroll_filter=models.Filter(
        must=[
            models.FieldCondition(key='metadata.category', match=models.MatchValue(value='Space Exploration')),
            models.FieldCondition(key='metadata.is_featured', match=models.MatchValue(value=True))
        ]
    ),
    with_payload=True
)

([Record(id='f6fd0126-b995-43eb-94db-c7343717bb5f', payload={'doc_id': 'doc-001', 'text': 'The James Webb Space Telescope has captured stunning new images of the Pillars of Creation, revealing details never seen before. The near-infrared camera provides a clearer view of star formation within the dense clouds of gas and dust.', 'metadata': {'author': 'NASA Official', 'category': 'Space Exploration', 'published_date': '2023-10-26T10:00:00Z', 'word_count': 58, 'tags': ['space', 'astronomy', 'jwst', 'science'], 'is_featured': True, 'editor_rating': 4.8}}, vector=None, shard_key=None, order_value=None)],
 None)

In [None]:
# SHOULD (OR)

client.scroll(
    collection_name=collection_name,
    scroll_filter=models.Filter(
        should=[
            models.FieldCondition(key='metadata.category', match=models.MatchValue(value='Space Exploration')),
            models.FieldCondition(key='metadata.category', match=models.MatchValue(value='Archaeology'))
        ]
    ),
    with_payload=True
)

([Record(id='66e798be-4c47-46f7-ba13-1248fb5dbb4b', payload={'doc_id': 'doc-004', 'text': 'Archaeologists in Egypt have unearthed a new tomb near the Saqqara necropolis, believed to belong to a high-ranking official from the New Kingdom period. The tomb contains well-preserved inscriptions and artifacts.', 'metadata': {'author': 'Zahi Hawass Jr.', 'category': 'Archaeology', 'published_date': '2022-05-20T12:00:00Z', 'word_count': 51, 'tags': ['egypt', 'history', 'discovery'], 'is_featured': True, 'editor_rating': 4.9}}, vector=None, shard_key=None, order_value=None),
  Record(id='f6fd0126-b995-43eb-94db-c7343717bb5f', payload={'doc_id': 'doc-001', 'text': 'The James Webb Space Telescope has captured stunning new images of the Pillars of Creation, revealing details never seen before. The near-infrared camera provides a clearer view of star formation within the dense clouds of gas and dust.', 'metadata': {'author': 'NASA Official', 'category': 'Space Exploration', 'published_date': '2023-

In [17]:
#MUST NOT (NOT)
client.scroll(
    collection_name=collection_name,
    scroll_filter=models.Filter(
        must_not=[
            models.FieldCondition(key='metadata.category', match=models.MatchValue(value='Space Exploration'))
        ]
    ),
    with_payload=True
)

([Record(id='512de221-7d82-405f-920c-5605e37fd23c', payload={'doc_id': 'doc-003', 'text': 'The rise of generative AI is transforming the creative industries. From art to music, algorithms are now capable of producing novel works, sparking a debate about the nature of creativity and ownership.', 'metadata': {'author': 'John Techson', 'category': 'Artificial Intelligence', 'published_date': '2024-01-15T09:00:00Z', 'word_count': 52, 'tags': ['ai', 'art', 'technology', 'ethics'], 'is_featured': False, 'editor_rating': 4.2}}, vector=None, shard_key=None, order_value=None),
  Record(id='66e798be-4c47-46f7-ba13-1248fb5dbb4b', payload={'doc_id': 'doc-004', 'text': 'Archaeologists in Egypt have unearthed a new tomb near the Saqqara necropolis, believed to belong to a high-ranking official from the New Kingdom period. The tomb contains well-preserved inscriptions and artifacts.', 'metadata': {'author': 'Zahi Hawass Jr.', 'category': 'Archaeology', 'published_date': '2022-05-20T12:00:00Z', 'word_

In [19]:
# RANGE

# gt (>), gte (>=), lt (<), lte (<=)

client.scroll(
    collection_name=collection_name,
    scroll_filter=models.Filter(
        must=[
            models.FieldCondition(key='metadata.editor_rating', range=models.Range(gte=4.5))
        ]
    ),
    with_payload=True
)

([Record(id='66e798be-4c47-46f7-ba13-1248fb5dbb4b', payload={'doc_id': 'doc-004', 'text': 'Archaeologists in Egypt have unearthed a new tomb near the Saqqara necropolis, believed to belong to a high-ranking official from the New Kingdom period. The tomb contains well-preserved inscriptions and artifacts.', 'metadata': {'author': 'Zahi Hawass Jr.', 'category': 'Archaeology', 'published_date': '2022-05-20T12:00:00Z', 'word_count': 51, 'tags': ['egypt', 'history', 'discovery'], 'is_featured': True, 'editor_rating': 4.9}}, vector=None, shard_key=None, order_value=None),
  Record(id='ce364a59-908b-4278-aa83-a13a7dcda11e', payload={'doc_id': 'doc-005', 'text': 'A breakthrough in battery technology promises to double the lifespan of electric vehicle batteries. The new lithium-sulfur design is cheaper and more environmentally friendly than current models.', 'metadata': {'author': 'Future Innovations Inc.', 'category': 'Technology', 'published_date': '2024-02-01T18:00:00Z', 'word_count': 49, 't

In [20]:
client.scroll(
    collection_name=collection_name,
    scroll_filter=models.Filter(
        must=[
            models.FieldCondition(key='metadata.editor_rating', range=models.Range(gte=4.0, lte=4.5))
        ]
    ),
    with_payload=True
)

([Record(id='512de221-7d82-405f-920c-5605e37fd23c', payload={'doc_id': 'doc-003', 'text': 'The rise of generative AI is transforming the creative industries. From art to music, algorithms are now capable of producing novel works, sparking a debate about the nature of creativity and ownership.', 'metadata': {'author': 'John Techson', 'category': 'Artificial Intelligence', 'published_date': '2024-01-15T09:00:00Z', 'word_count': 52, 'tags': ['ai', 'art', 'technology', 'ethics'], 'is_featured': False, 'editor_rating': 4.2}}, vector=None, shard_key=None, order_value=None),
  Record(id='cafc8992-5186-4936-9369-923c2afd4e62', payload={'doc_id': 'doc-006', 'text': 'The global stock market experienced significant volatility this week amid concerns over inflation and interest rate hikes. Tech stocks were particularly affected, with major indices seeing a downturn.', 'metadata': {'author': 'Financial Times', 'category': 'Finance', 'published_date': '2023-03-10T20:00:00Z', 'word_count': 47, 'tags'

In [21]:
# DATETIME RANGE

client.scroll(
    collection_name=collection_name,
    scroll_filter=models.Filter(
        must=[
            models.FieldCondition(key='metadata.published_date', range=models.DatetimeRange(gte='2023-01-01T00:00:00Z', lt='2024-01-01'))
        ]
    ),
    with_payload=True
)

([Record(id='cafc8992-5186-4936-9369-923c2afd4e62', payload={'doc_id': 'doc-006', 'text': 'The global stock market experienced significant volatility this week amid concerns over inflation and interest rate hikes. Tech stocks were particularly affected, with major indices seeing a downturn.', 'metadata': {'author': 'Financial Times', 'category': 'Finance', 'published_date': '2023-03-10T20:00:00Z', 'word_count': 47, 'tags': ['stocks', 'finance', 'economy'], 'is_featured': False, 'editor_rating': 4.0}}, vector=None, shard_key=None, order_value=None),
  Record(id='eb622152-91f7-489e-b4af-d67685a738bf', payload={'doc_id': 'doc-002', 'text': 'A recent study published in Nature highlights the critical role of deep-sea currents in regulating global climate. The research suggests that changes in these currents could have significant long-term impacts.', 'metadata': {'author': 'Dr. Anya Sharma', 'category': 'Climate Science', 'published_date': '2023-11-05T14:30:00Z', 'word_count': 45, 'tags': [

In [None]:
# is_empty & is_null

client.scroll(
    collection_name=collection_name,
    scroll_filter=models.Filter(
        must=[
            models.IsEmptyCondition(is_empty=models.PayloadField(key='metadata.tags'))
        ]
    ),
    with_payload=True
)

# is_empty: apakah list kosong [], atau ''
# is_null: null

([], None)

In [28]:
# text_search

client.scroll(
    collection_name=collection_name,
    scroll_filter=models.Filter(
        must=[
            models.FieldCondition(
                key='text',
                match=models.MatchText(text='global stock')
            )
        ]
    ),
    with_payload=True
)

# keyword matching

([Record(id='cafc8992-5186-4936-9369-923c2afd4e62', payload={'doc_id': 'doc-006', 'text': 'The global stock market experienced significant volatility this week amid concerns over inflation and interest rate hikes. Tech stocks were particularly affected, with major indices seeing a downturn.', 'metadata': {'author': 'Financial Times', 'category': 'Finance', 'published_date': '2023-03-10T20:00:00Z', 'word_count': 47, 'tags': ['stocks', 'finance', 'economy'], 'is_featured': False, 'editor_rating': 4.0}}, vector=None, shard_key=None, order_value=None)],
 None)

---

In [29]:
query = 'innovation in artificial intelligence'
query_embedding = model.encode(query).tolist()

In [34]:
# (published_date > 2024) AND (category = 'AI' OR category = 'Tec')

client.search(
    collection_name=collection_name,
    query_vector=query_embedding,
    query_filter=models.Filter(
        must=[
            models.FieldCondition(
                key='metadata.published_date',
                range=models.DatetimeRange(gte='2024-01-01T00:00:00Z')
            ),
            models.Filter(
                should=[
                    models.FieldCondition(key='metadata.category', match=models.MatchValue(value='Artificial Intelligence')),
                    models.FieldCondition(key='metadata.category', match=models.MatchValue(value='Technology'))
                ]
            )
        ]
    )
)

  client.search(


[ScoredPoint(id='512de221-7d82-405f-920c-5605e37fd23c', version=0, score=0.51202303, payload={'doc_id': 'doc-003', 'text': 'The rise of generative AI is transforming the creative industries. From art to music, algorithms are now capable of producing novel works, sparking a debate about the nature of creativity and ownership.', 'metadata': {'author': 'John Techson', 'category': 'Artificial Intelligence', 'published_date': '2024-01-15T09:00:00Z', 'word_count': 52, 'tags': ['ai', 'art', 'technology', 'ethics'], 'is_featured': False, 'editor_rating': 4.2}}, vector=None, shard_key=None, order_value=None),
 ScoredPoint(id='ce364a59-908b-4278-aa83-a13a7dcda11e', version=0, score=0.09557949, payload={'doc_id': 'doc-005', 'text': 'A breakthrough in battery technology promises to double the lifespan of electric vehicle batteries. The new lithium-sulfur design is cheaper and more environmentally friendly than current models.', 'metadata': {'author': 'Future Innovations Inc.', 'category': 'Technol