# Búsqueda de similaridad de alta velocidad con RUST Qdrant

In [6]:
from qdrant_client import QdrantClient
from qdrant_client.http import models
import numpy as np
from faker import Faker

In [4]:
client = QdrantClient(host="localhost", port=6333)

In [8]:
my_collection = "first_collection"
client .create_collection(collection_name=my_collection,
                          vectors_config=models.VectorParams(size=100, distance=models.Distance.COSINE))

True

In [10]:
data = np.random.uniform(low=-1.0, high=1.0, size=(1_000, 100))
index = list(range(1_000))

In [11]:
data[:2, :10]

array([[ 0.2461217 ,  0.7663252 , -0.70202949, -0.20441976, -0.62032778,
        -0.14336946, -0.70156404,  0.4616521 , -0.29499437, -0.65985848],
       [ 0.56516396, -0.348281  ,  0.4445421 , -0.81619002,  0.87291845,
         0.62688332, -0.05266952, -0.69666567,  0.46402172,  0.34931551]])

In [12]:
index[-10:]

[990, 991, 992, 993, 994, 995, 996, 997, 998, 999]

In [13]:
client.upsert(collection_name=my_collection,
              points=models.Batch(
                  ids=index,
                  vectors=data.tolist()
              ))

UpdateResult(operation_id=0, status=<UpdateStatus.COMPLETED: 'completed'>)

In [15]:
client.retrieve(collection_name=my_collection,
                ids=[10,14, 500],
                #with_vectors==True
                )

[Record(id=14, payload={}, vector=None),
 Record(id=500, payload={}, vector=None),
 Record(id=10, payload={}, vector=None)]

In [16]:
fake_something = Faker()
fake_something.name(), fake_something.address()

('Jasmine Trujillo', '46271 Thomas Roads Apt. 730\nTerriborough, ID 29546')

In [21]:
payload = []

for i in range(1_000):
    payload.append(
        {
            "artist": fake_something.name(),
            "song": " ". join(fake_something.words()),
            "url_song": fake_something.url(),
            "year": fake_something.year(),
            "country": fake_something.country(),

        }
    )

payload[:3]

[{'artist': 'Billy Watkins',
  'song': 'themselves above food',
  'url_song': 'http://jones.info/',
  'year': '2004',
  'country': 'United States Virgin Islands'},
 {'artist': 'Judy Hogan',
  'song': 'listen both listen',
  'url_song': 'https://www.castillo-king.com/',
  'year': '1975',
  'country': 'Gambia'},
 {'artist': 'Elizabeth Cabrera',
  'song': 'data sort factor',
  'url_song': 'https://www.arnold.com/',
  'year': '1974',
  'country': 'Nauru'}]

In [22]:
len(payload)

1000

In [23]:
client.upsert(collection_name=my_collection,
              points=models.Batch(
                  ids=index,
                  vectors=data.tolist(),
                  payloads=payload
              )
            )

UpdateResult(operation_id=1, status=<UpdateStatus.COMPLETED: 'completed'>)

# Semantic Search

In [24]:
living_la_vida_loca = np.random.uniform(low=-1.0, high=1.0, size=(100)).tolist()
living_la_vida_loca[:5]

[-0.8955803772291822,
 -0.7364089806915362,
 0.8467450930161904,
 0.6438872783835188,
 0.9156667403259193]

In [25]:
client.search(
    collection_name=my_collection,
    query_vector=living_la_vida_loca,
    limit=10
)

[ScoredPoint(id=966, version=1, score=0.29277423, payload={'artist': 'Alison Gordon', 'country': 'Vanuatu', 'song': 'person away test', 'url_song': 'http://www.curry-kelly.com/', 'year': '1971'}, vector=None),
 ScoredPoint(id=146, version=1, score=0.28518134, payload={'artist': 'Jordan Adkins', 'country': 'Sri Lanka', 'song': 'yes else crime', 'url_song': 'https://leonard.net/', 'year': '2007'}, vector=None),
 ScoredPoint(id=577, version=1, score=0.2614239, payload={'artist': 'Adrian Lyons', 'country': 'Turkmenistan', 'song': 'forward game determine', 'url_song': 'https://chang.net/', 'year': '1984'}, vector=None),
 ScoredPoint(id=90, version=1, score=0.25654435, payload={'artist': 'Katie Marshall', 'country': 'Puerto Rico', 'song': 'compare across only', 'url_song': 'https://www.tran.com/', 'year': '2013'}, vector=None),
 ScoredPoint(id=749, version=1, score=0.25245646, payload={'artist': 'David Mejia', 'country': 'Svalbard & Jan Mayen Islands', 'song': 'money their bag', 'url_song': 

In [26]:
aussie_songs = models.Filter(
    must = [
        models.FieldCondition(
            key="country",
            match=models.MatchValue(value="Australia")
        )
    ]
)

In [27]:
client.search(
    collection_name=my_collection,
    query_vector=living_la_vida_loca,
    query_filter=aussie_songs,
    limit=3
)

[ScoredPoint(id=722, version=1, score=0.109172136, payload={'artist': 'Elizabeth White', 'country': 'Australia', 'song': 'thank father social', 'url_song': 'https://www.mueller-davis.com/', 'year': '1973'}, vector=None),
 ScoredPoint(id=170, version=1, score=-0.0061663766, payload={'artist': 'Danielle Matthews', 'country': 'Australia', 'song': 'eat project matter', 'url_song': 'http://anderson.com/', 'year': '2019'}, vector=None),
 ScoredPoint(id=94, version=1, score=-0.013421139, payload={'artist': 'Rebecca Patterson', 'country': 'Australia', 'song': 'than I travel', 'url_song': 'https://www.howell.com/', 'year': '2004'}, vector=None)]