In [19]:
from qdrant_client import QdrantClient
from qdrant_client.http import models
import numpy as np
from faker import Faker


In [20]:
# client = QdrantClient(":memory:") # Create in-memory Qdrant instance, for testing, CI/CD
# OR
client = QdrantClient(host="localhost",port=6333)
client

<qdrant_client.qdrant_client.QdrantClient at 0x128cca910>

In [22]:
my_collection="first_collection"
client.create_collection(
    collection_name=my_collection,
    vectors_config=models.VectorParams(size=100, distance=models.Distance.COSINE)
)

True

In [24]:
data = np.random.uniform(low=-1.0, high=1.0,size=(1_000, 100))
index = list(range(1_000))

In [25]:
data[:2,:10]

array([[-9.79315018e-01,  7.75378270e-01,  6.26436392e-01,
         5.94547273e-01,  8.78138741e-01,  9.01078628e-01,
        -3.51573503e-01,  3.25093346e-01,  7.11653964e-01,
         9.12815010e-05],
       [-3.37259929e-01, -9.25879977e-01, -5.95275466e-01,
         1.17507993e-01,  5.27351776e-02, -1.66159915e-02,
         1.84497939e-01,  1.38202396e-01,  9.42364086e-01,
        -9.94900079e-01]])

In [26]:
index[-10:]

[990, 991, 992, 993, 994, 995, 996, 997, 998, 999]

In [27]:
client.upsert(
    collection_name=my_collection,
    points=models.Batch(
        ids=index,
        vectors=data.tolist()
    )
)

UpdateResult(operation_id=0, status=<UpdateStatus.COMPLETED: 'completed'>)

In [28]:
client.retrieve(
    collection_name = my_collection,
    ids=[10, 14,500],
)

[Record(id=10, payload={}, vector=None, shard_key=None, order_value=None),
 Record(id=14, payload={}, vector=None, shard_key=None, order_value=None),
 Record(id=500, payload={}, vector=None, shard_key=None, order_value=None)]

In [30]:
fake_something = Faker()
fake_something.name(), fake_something.address()

('Calvin Williams', '9316 Rachel Avenue\nWest Patrickside, MI 14643')

In [31]:
payload=[]

for i in range(1_000):
    payload.append(
        {
            "artist": fake_something.name(),
            "song": " ".join(fake_something.words()),
            "url_song": fake_something.url(),
            "year": fake_something.year(),
            "country": fake_something.country(),
        }
    )
payload[:3] 
            

[{'artist': 'Tiffany Woods',
  'song': 'enjoy employee quality',
  'url_song': 'http://www.willis.com/',
  'year': '1993',
  'country': 'Montserrat'},
 {'artist': 'Jacob Smith',
  'song': 'project wait probably',
  'url_song': 'http://decker.com/',
  'year': '1972',
  'country': 'Algeria'},
 {'artist': 'Jeremy Bond',
  'song': 'on little close',
  'url_song': 'http://www.sanders-gomez.com/',
  'year': '1983',
  'country': 'United Arab Emirates'}]

In [32]:
client.upsert(
    collection_name = my_collection,
    points=models.Batch(
        ids=index,
        vectors=data.tolist(),
        payloads=payload
    )
)

UpdateResult(operation_id=1, status=<UpdateStatus.COMPLETED: 'completed'>)

In [33]:
## Semantic Search

In [34]:
living_la_vida_loca = np.random.uniform(low=-1.0, high=1.0, size=(100)).tolist()
living_la_vida_loca[:5]

[0.6578673090658924,
 0.13773399332691016,
 0.4332337149372054,
 -0.3224406657619332,
 0.40259657647617253]

In [37]:
client.search(
    collection_name = my_collection,
    query_vector=living_la_vida_loca,
    limit = 10
)

  client.search(


[ScoredPoint(id=59, version=1, score=0.36187574, payload={'artist': 'Helen Malone', 'song': 'kind piece measure', 'url_song': 'http://watkins.biz/', 'year': '1977', 'country': 'Benin'}, vector=None, shard_key=None, order_value=None),
 ScoredPoint(id=644, version=1, score=0.3346107, payload={'artist': 'Bradley Dodson', 'song': 'former something instead', 'url_song': 'https://miles.org/', 'year': '1994', 'country': 'Guinea-Bissau'}, vector=None, shard_key=None, order_value=None),
 ScoredPoint(id=768, version=1, score=0.26811597, payload={'artist': 'Andrew King', 'song': 'with discussion remain', 'url_song': 'http://www.dillon-stewart.com/', 'year': '1984', 'country': 'Montserrat'}, vector=None, shard_key=None, order_value=None),
 ScoredPoint(id=960, version=1, score=0.25300366, payload={'artist': 'Karen Sanchez', 'song': 'card will American', 'url_song': 'https://www.thompson-daniel.org/', 'year': '2016', 'country': 'Netherlands Antilles'}, vector=None, shard_key=None, order_value=None),

In [38]:
aussie_songs = models.Filter(
    must=[
        models.FieldCondition(
            key="country", match=models.MatchValue(value="Australia")
        )
    ]
)
aussie_songs

Filter(should=None, min_should=None, must=[FieldCondition(key='country', match=MatchValue(value='Australia'), range=None, geo_bounding_box=None, geo_radius=None, geo_polygon=None, values_count=None, is_empty=None, is_null=None)], must_not=None)

In [39]:
client.search(
    collection_name = my_collection,
    query_vector=living_la_vida_loca,
    query_filter=aussie_songs,
    limit = 3
)

  client.search(


[ScoredPoint(id=417, version=1, score=0.07792984, payload={'artist': 'Scott Sanders', 'song': 'force measure school', 'url_song': 'https://www.reese-brown.org/', 'year': '2006', 'country': 'Australia'}, vector=None, shard_key=None, order_value=None),
 ScoredPoint(id=472, version=1, score=0.011704882, payload={'artist': 'Ronald Guzman', 'song': 'guy baby standard', 'url_song': 'http://www.fowler.biz/', 'year': '2010', 'country': 'Australia'}, vector=None, shard_key=None, order_value=None)]