In [9]:
from qdrant_client import QdrantClient
import numpy as np
from qdrant_client.http import models
from faker import Faker

In [11]:
# Create a Qdrant client
client = QdrantClient(host = 'localhost', port = 6333)
client

<qdrant_client.qdrant_client.QdrantClient at 0x1160d2100>

In [73]:
# Create a collection (table) to store points
# Each point has: ID, Vector, and Payload
# ID - a unique identifier for each vector (e.g., image, song, video, document, etc.)
# Vector - a high-dimensional representation of the data item
# Payload - metadata about each vector representation (e.g., artist who created a song, release year, URL to the song, country where the song was created, etc.)

# The number of dimensions is analogous to the number of columns.
# Distance - specifies how we query the data and retrieve results (e.g., COSINE similarity)

In [15]:
my_collection = 'first_collection'
client.create_collection(
    collection_name = my_collection,
    vectors_config=models.VectorParams(size=100, 
                                       distance=models.Distance.COSINE)
)

True

In [29]:
data = np.random.uniform(low=-1.0, high=1.0, size=(1_000, 100))
index = list(range(1_000))

In [30]:
data[:2][0]

array([-0.20120366, -0.5879291 ,  0.54028079, -0.01345534,  0.17859067,
       -0.41382853,  0.39961553, -0.19702934, -0.98607964,  0.19368104,
        0.1129613 , -0.58364989,  0.99267372,  0.80405406, -0.4536265 ,
        0.68555347,  0.26387621,  0.80665498, -0.54486821,  0.54729952,
       -0.4508836 ,  0.22386532, -0.63017481, -0.46890971, -0.71889203,
       -0.38912731, -0.78210185, -0.36294362,  0.78225628,  0.72763   ,
       -0.4655845 ,  0.61647857,  0.44712869, -0.33949648, -0.9435025 ,
       -0.95057288, -0.3482762 ,  0.68498247,  0.21014404,  0.92684916,
        0.70004637, -0.03754544,  0.51445553,  0.53181772,  0.20617193,
        0.92365549, -0.82662193, -0.64739838, -0.39046742, -0.49244077,
        0.63333782, -0.955647  , -0.32975348, -0.7395958 , -0.99167022,
       -0.12387514,  0.21657436, -0.1980394 ,  0.79113388,  0.26161092,
        0.2618423 ,  0.78159403, -0.4308446 , -0.77893568,  0.21844771,
       -0.61707173, -0.52782446, -0.42397291, -0.43538822,  0.43

In [32]:
client.upsert(
    collection_name = my_collection,
    points = models.Batch(ids = index,
                          vectors = data.tolist()
    )
)

UpdateResult(operation_id=0, status=<UpdateStatus.COMPLETED: 'completed'>)

In [37]:
client.retrieve(
    collection_name = my_collection,
    ids = [10,14,5000],
    with_vectors=True)

[Record(id=10, payload={}, vector=[-0.08814841, 0.04631223, 0.0060495012, 0.049282305, -0.012081146, 0.14882924, -0.035293955, -0.15039177, 0.09349682, 0.097557954, 0.041811753, -0.15404606, -0.0037731994, -0.024330718, 0.0071016783, -0.12085816, 0.08140178, 0.09335002, 0.03623206, 0.06765507, 0.06077222, -0.07090134, 0.10480334, 0.13318297, 0.045144938, 0.01685478, -0.019746289, 0.113241166, 0.080382176, 0.17679052, 0.07063528, 0.14437664, -0.06644633, -0.12627079, 0.1221459, -0.09933028, -0.14617415, 0.1623912, 0.10720135, -0.17049706, 0.0637454, -0.09204277, -0.062394116, 0.13212869, 0.1412169, -0.13699423, -0.047225635, -0.07971856, 0.131656, -0.013697356, 0.0437472, -0.12362942, 0.13588275, 0.05747706, 0.02507232, -0.021392215, 0.025937442, -0.075174965, -0.17065023, 0.085493475, -0.15117696, -0.1440295, -0.04227544, -0.0091647105, 0.118854605, 0.04856591, -0.12880267, -0.01373315, -0.16360721, -0.13734166, -0.025777753, 0.03486105, -0.0613785, 0.06568777, 0.15440044, -0.098496646

In [39]:
fake_something = Faker()
fake_something.name(), fake_something.address()

('Kara Wells', '411 Katie Isle Suite 578\nSouth Kellyborough, CA 12041')

In [41]:
payload = []
for i in range(1000):
    payload.append({
                    'artist'  : fake_something.name(),
                    'song'    : ''.join(fake_something.words()),
                    'url'     :  fake_something.url(),
                    'year'    : fake_something.year(),
                    'country' : fake_something.country()
    })

payload[0]

{'artist': 'Francisco Miller',
 'song': 'stuffgoalpersonal',
 'url': 'http://www.holmes-robertson.org/',
 'year': '2014',
 'country': 'Samoa'}

In [43]:
client.upsert(
    collection_name=my_collection,
    points=models.Batch(
        ids = index,
        vectors=data,
        payloads=payload
    )
)

UpdateResult(operation_id=2, status=<UpdateStatus.COMPLETED: 'completed'>)

# Semantic Search

In [50]:
living_la_vida_loca = np.random.uniform(low=-0.1, high=1.0, size=(100)).tolist()
living_la_vida_loca[:5]

[0.5030307615107082,
 0.23788436315737677,
 -0.0016083995498246215,
 -0.005490085510043195,
 -0.07589019419711045]

In [52]:
# Retrive the result 
client.search(
    collection_name=my_collection,
    query_vector=living_la_vida_loca,
    limit = 5
)

[ScoredPoint(id=59, version=2, score=0.35710692, payload={'artist': 'Sierra Osborne', 'song': 'starunderstandfast', 'url': 'https://mullins.com/', 'year': '1995', 'country': 'Netherlands'}, vector=None, shard_key=None, order_value=None),
 ScoredPoint(id=241, version=2, score=0.32570764, payload={'artist': 'David Smith', 'song': 'downpatternexample', 'url': 'https://www.brown-taylor.com/', 'year': '2008', 'country': 'Bahamas'}, vector=None, shard_key=None, order_value=None),
 ScoredPoint(id=935, version=2, score=0.29163417, payload={'artist': 'Maurice Mcclain', 'song': 'generationeducationshould', 'url': 'https://manning.com/', 'year': '1979', 'country': 'Australia'}, vector=None, shard_key=None, order_value=None),
 ScoredPoint(id=842, version=2, score=0.2806603, payload={'artist': 'Mike Smith', 'song': 'eatmeetstop', 'url': 'http://delgado.com/', 'year': '2018', 'country': 'Kenya'}, vector=None, shard_key=None, order_value=None),
 ScoredPoint(id=848, version=2, score=0.27245492, payloa

In [54]:
aussie_songs = models.Filter(
    must = [
        models.FieldCondition(
            key='country', match = models.MatchValue(value = 'Australia')
        )
    ]
)    

In [59]:
# Retrive the result by filter 
client.search(
    collection_name = my_collection,
    query_vector = living_la_vida_loca,
    query_filter = aussie_songs,
    limit = 5
)

[ScoredPoint(id=935, version=2, score=0.29163417, payload={'artist': 'Maurice Mcclain', 'song': 'generationeducationshould', 'url': 'https://manning.com/', 'year': '1979', 'country': 'Australia'}, vector=None, shard_key=None, order_value=None),
 ScoredPoint(id=919, version=2, score=0.2646881, payload={'artist': 'Timothy Cobb', 'song': 'producttheirher', 'url': 'http://pearson.com/', 'year': '2005', 'country': 'Australia'}, vector=None, shard_key=None, order_value=None),
 ScoredPoint(id=400, version=2, score=-0.1791785, payload={'artist': 'Christopher Jones', 'song': 'orderdiscussionhimself', 'url': 'http://miller-williams.org/', 'year': '1975', 'country': 'Australia'}, vector=None, shard_key=None, order_value=None)]

# Recommendation API

In [72]:
client.recommend(
    collection_name = my_collection,
    positive=[17, 123],
    negative=[100, 444],
    query_filter=models.Filter(
                must = [
                    models.FieldCondition(key='country', match = models.MatchValue(value = 'Spain'))
                    ]
    ),    
    limit = 5
)

[ScoredPoint(id=231, version=2, score=0.036168177, payload={'artist': 'Arthur Powell', 'song': 'kindoperationless', 'url': 'https://www.fisher.com/', 'year': '1979', 'country': 'Spain'}, vector=None, shard_key=None, order_value=None),
 ScoredPoint(id=279, version=2, score=-0.005771706, payload={'artist': 'Bryan Griffin', 'song': 'agreesurewhose', 'url': 'http://www.thomas-green.biz/', 'year': '1993', 'country': 'Spain'}, vector=None, shard_key=None, order_value=None),
 ScoredPoint(id=956, version=2, score=-0.15126365, payload={'artist': 'Kelly Edwards', 'song': 'moveaudiencehand', 'url': 'http://fletcher.com/', 'year': '2015', 'country': 'Spain'}, vector=None, shard_key=None, order_value=None)]