# Alguns comandos do qdrant

## Collections

### Criando uma collection

In [None]:
from qdrant_client import QdrantClient, models

client = QdrantClient(url="http://localhost:6333")

client.create_collection(
    collection_name="{collection_name}",
    vectors_config=models.VectorParams(size=100, distance=models.Distance.COSINE),
)

### Criando uma collection com mais de um vetor

In [None]:
from qdrant_client import QdrantClient, models


client = QdrantClient(url="http://localhost:6333")

client.create_collection(
    collection_name="{collection_name}",
    vectors_config={
        "image": models.VectorParams(size=4, distance=models.Distance.DOT),
        "text": models.VectorParams(size=8, distance=models.Distance.COSINE),
    },
)

### Deletando uma collection

In [None]:
client.delete_collection(collection_name="{collection_name}")

### Listando todas as collections

In [1]:
from qdrant_client import QdrantClient

client = QdrantClient(url="http://localhost:6333")

client.get_collections()

  from .autonotebook import tqdm as notebook_tqdm


ResponseHandlingException: [Errno 111] Connection refused

### Subindo dados numa coleção em batch

In [None]:
# Primeira forma

client.upsert(
    collection_name="{collection_name}",
    points=models.Batch(
        ids=[1, 2, 3],
        payloads=[
            {"color": "red"},
            {"color": "green"},
            {"color": "blue"},
        ],
        vectors=[
            [0.9, 0.1, 0.1],
            [0.1, 0.9, 0.1],
            [0.1, 0.1, 0.9],
        ],
    ),
)


In [None]:
# Segunda forma
client.upsert(
    collection_name="{collection_name}",
    points=[
        models.PointStruct(
            id=1,
            payload={
                "color": "red",
            },
            vector=[0.9, 0.1, 0.1],
        ),
        models.PointStruct(
            id=2,
            payload={
                "color": "green",
            },
            vector=[0.1, 0.9, 0.1],
        ),
        models.PointStruct(
            id=3,
            payload={
                "color": "blue",
            },
            vector=[0.1, 0.1, 0.9],
        ),
    ],
)


### Subindo dados numa coleção em batch com paralelização

- paralell: parâmetro que define o número de processos em paralelos que irão ser executados
- max_retries: parâmetro que define o número de vezes que o processo deve ser repetido em caso de erro ao subir dados em uma collection 

In [None]:
client.upload_points(
    collection_name="{collection_name}",
    points=[
        models.PointStruct(
            id=1,
            payload={
                "color": "red",
            },
            vector=[0.9, 0.1, 0.1],
        ),
        models.PointStruct(
            id=2,
            payload={
                "color": "green",
            },
            vector=[0.1, 0.9, 0.1],
        ),
    ],
    parallel=4,
    max_retries=3,
)



## Vetores

### Atualizando um VETOR

In [None]:
# Aqui os points que possuem id=1 e id=2, terão seus vetores mudados pelos vetores abaixo

client.update_vectors(
    collection_name="{collection_name}",
    points=[
        models.PointVectors(
            id=1,
            vector={
                "image": [0.1, 0.2, 0.3, 0.4],
            },
        ),
        models.PointVectors(
            id=2,
            vector={
                "text": [0.9, 0.8, 0.7, 0.6, 0.5, 0.4, 0.3, 0.2],
            },
        ),
    ],
)


### Deletando um vetor

In [None]:
# O vetor é deletado, mas o point não. Caso queira apagar um point total, deverá usar a função que deleta um point.

client.delete_vectors(
    collection_name="{collection_name}",
    points=[0, 3, 100],
    vectors=["text", "image"],
)


## Points

### Apagando um point via ID do point

In [None]:
# Os points de ID 0, 3 e 100 serão deletados.

client.delete(
    collection_name="{collection_name}",
    points_selector=models.PointIdsList(
        points=[0, 3, 100],
    ),
)

### Deletando um point baseado num valor de filtro

In [None]:
# Aqui deleta os points que possuem no payload a chave color="red"

client.delete(
    collection_name="{collection_name}",
    points_selector=models.FilterSelector(
        filter=models.Filter(
            must=[
                models.FieldCondition(
                    key="color",
                    match=models.MatchValue(value="red"),
                ),
            ],
        )
    ),
)


### Trazendo points via ID

In [None]:
client.retrieve(
    collection_name="{collection_name}",
    ids=[0, 3, 100],
)

### Contando o número de points que possuem tal filtro

In [None]:
client.count(
    collection_name="{collection_name}",
    count_filter=models.Filter(
        must=[
            models.FieldCondition(key="color", match=models.MatchValue(value="red")),
        ]
    ),
    exact=True,
)

## Payload

### Update de payload via ID

In [None]:
# Aqui o payload será substituído nos points 0, 3, e 10

client.set_payload(
    collection_name="{collection_name}",
    payload={
        "property1": "string",
        "property2": "string",
    },
    points=[0, 3, 10],
)


### Update de payload via filter

In [None]:
client.set_payload(
    collection_name="{collection_name}",
    payload={
        "property1": "string",
        "property2": "string",
    },
    points=models.Filter(
        must=[
            models.FieldCondition(
                key="color",
                match=models.MatchValue(value="red"),
            ),
        ],
    ),
)


## Buscas

### Busca por ID

In [None]:
client.query_points(
    collection_name="{collection_name}",
    query="43cf51e2-8777-4f52-bc74-c2cbde0c8b04", # <--- point id
)

### Busca com vetor e com filtro

In [None]:
client.query_points(
    collection_name="{collection_name}",
    query=[0.2, 0.1, 0.9, 0.7],
    query_filter=models.Filter(
        must=[
            models.FieldCondition(
                key="city",
                match=models.MatchValue(
                    value="London",
                ),
            )
        ]
    ),
    search_params=models.SearchParams(hnsw_ef=128, exact=False),
    limit=3,
)


### Busca por vetor caso os points tenham mais de um vetor

In [None]:
client.query_points(
    collection_name="{collection_name}",
    query=[0.2, 0.1, 0.9, 0.7],
    using="image",
    limit=3,
)

### Busca que trás o payload e o vetor

In [None]:
client.query_points(
    collection_name="{collection_name}",
    query=[0.2, 0.1, 0.9, 0.7],
    with_vectors=True,
    with_payload=True,
)

### Busca que trás apenas alguns campos do payload

In [None]:
client.query_points(
    collection_name="{collection_name}",
    query=[0.2, 0.1, 0.9, 0.7],
    with_payload=["city", "village", "town"],
)

### Busca com paginação

In [None]:
client.query_points(
    collection_name="{collection_name}",
    query=[0.2, 0.1, 0.9, 0.7],
    with_vectors=True,
    with_payload=True,
    limit=10,
    offset=100,
)

### Busca e agrupamento por grupos

In [None]:
client.query_points_groups(
    collection_name="{collection_name}",
    query=[1.1],
    group_by="document_id",
    limit=4,  # máximo número de grupos
    group_size=2,  # Número máximo de elementos (points) por grupo
)

## Explore search

### Recommend search

Essa busca usa inputs de exemplos postivos e negativos. Dependendo da estratégia abordada, o vetor resultante será a diferença dos exemplos postivos e negativos, isso pode ajudar muito mais nas buscas.


Uma forma de usar é quando uma pessoa gosta de um filme, e gostou de outros filmes. Vc passaria o vetor dos filmes que ela gostou e os que não gostou para a busca, e ela retornaria um valor melhor.

In [None]:
client.query_points(
    collection_name="my_collection",
    query=models.RecommendQuery(
        recommend=models.RecommendInput(
            positive=[
                [0.1, 0.2, 0.3, 0.4],
                [0.5, 0.6, 0.7, 0.8],
            ],
            negative=[
                [0.9, 0.1, 0.1, 0.3],
            ],
            strategy=models.RecommendStrategy.AVERAGE_VECTOR,
        )
    ),
    limit=5,
)

### Discovery Search

- Aqui temos uma método interessante, que divide o espaço vetorial entre pares positivos e negativos. Realiza a busca no campo dos positivos e retornar os melhores valores

In [None]:
discover_queries = [
    models.QueryRequest(
        query=models.DiscoverQuery(
            discover=models.DiscoverInput(
                target=[0.2, 0.1, 0.9, 0.7],
                context=[
                    models.ContextPair(
                        positive=100,
                        negative=718,
                    ),
                    models.ContextPair(
                        positive=200,
                        negative=300,
                    ),
                ],
            )
        ),
        limit=10,
    ),
]

client.query_batch_points(
    collection_name="{collection_name}", requests=discover_queries
)


### Context search

In [None]:
discover_queries = [
    models.QueryRequest(
        query=models.ContextQuery(
            context=[
                models.ContextPair(
                    positive=100,
                    negative=718,
                ),
                models.ContextPair(
                    positive=200,
                    negative=300,
                ),
            ],
        ),
        limit=10,
    ),
]

client.query_batch_points(
    collection_name="{collection_name}", requests=discover_queries
)

# Dataset

In [1]:
documents = [
    {
        "name": "The Time Machine",
        "description": "A man travels through time and witnesses the evolution of humanity.",
        "author": "H.G. Wells",
        "year": 1895,
    },
    {
        "name": "Ender's Game",
        "description": "A young boy is trained to become a military leader in a war against an alien race.",
        "author": "Orson Scott Card",
        "year": 1985,
    },
    {
        "name": "Brave New World",
        "description": "A dystopian society where people are genetically engineered and conditioned to conform to a strict social hierarchy.",
        "author": "Aldous Huxley",
        "year": 1932,
    },
    {
        "name": "The Hitchhiker's Guide to the Galaxy",
        "description": "A comedic science fiction series following the misadventures of an unwitting human and his alien friend.",
        "author": "Douglas Adams",
        "year": 1979,
    },
    {
        "name": "Dune",
        "description": "A desert planet is the site of political intrigue and power struggles.",
        "author": "Frank Herbert",
        "year": 1965,
    },
    {
        "name": "Foundation",
        "description": "A mathematician develops a science to predict the future of humanity and works to save civilization from collapse.",
        "author": "Isaac Asimov",
        "year": 1951,
    },
    {
        "name": "Snow Crash",
        "description": "A futuristic world where the internet has evolved into a virtual reality metaverse.",
        "author": "Neal Stephenson",
        "year": 1992,
    },
    {
        "name": "Neuromancer",
        "description": "A hacker is hired to pull off a near-impossible hack and gets pulled into a web of intrigue.",
        "author": "William Gibson",
        "year": 1984,
    },
    {
        "name": "The War of the Worlds",
        "description": "A Martian invasion of Earth throws humanity into chaos.",
        "author": "H.G. Wells",
        "year": 1898,
    },
    {
        "name": "The Hunger Games",
        "description": "A dystopian society where teenagers are forced to fight to the death in a televised spectacle.",
        "author": "Suzanne Collins",
        "year": 2008,
    },
    {
        "name": "The Andromeda Strain",
        "description": "A deadly virus from outer space threatens to wipe out humanity.",
        "author": "Michael Crichton",
        "year": 1969,
    },
    {
        "name": "The Left Hand of Darkness",
        "description": "A human ambassador is sent to a planet where the inhabitants are genderless and can change gender at will.",
        "author": "Ursula K. Le Guin",
        "year": 1969,
    },
    {
        "name": "The Three-Body Problem",
        "description": "Humans encounter an alien civilization that lives in a dying system.",
        "author": "Liu Cixin",
        "year": 2008,
    },
]


# Baixando modelo para criar embeddings

In [3]:
from sentence_transformers import SentenceTransformer

encoder = SentenceTransformer(model_name_or_path="all-MiniLM-L6-v2")

In [5]:
encoder.get_sentence_embedding_dimension()

384

# Etapa do qdrant

## Criando conexão com o docker do qdrant

In [7]:
from qdrant_client import QdrantClient

qclient = QdrantClient(url="http://localhost:6333")

## Criando collection no qdrant

In [6]:
from qdrant_client import models

qclient.create_collection(
    collection_name="testezao",
    vectors_config=models.VectorParams(
        size=encoder.get_sentence_embedding_dimension(),
        distance=models.Distance.COSINE
    )
)

True

 ## Realizando upload dos arquivos

In [8]:
qclient.upload_points(
    collection_name="testezao",
    points=[
        models.PointStruct(
            id=idx,
            vector=encoder.encode(doc["description"]).tolist(),
            payload=doc
        )
        for idx, doc in enumerate(documents)
    ]
)

  return forward_call(*args, **kwargs)


# Realizando uma busca

In [20]:
hits = qclient.query_points(
    collection_name="testezao",
    query=encoder.encode("alien  film").tolist(),
    limit=3
).points

for hit in hits:
    print(f"SCORE: {hit.score} -> PAYLOAD {hit.payload}")

SCORE: 0.5802816 -> PAYLOAD {'name': "The Hitchhiker's Guide to the Galaxy", 'description': 'A comedic science fiction series following the misadventures of an unwitting human and his alien friend.', 'author': 'Douglas Adams', 'year': 1979}
SCORE: 0.4930028 -> PAYLOAD {'name': 'The Three-Body Problem', 'description': 'Humans encounter an alien civilization that lives in a dying system.', 'author': 'Liu Cixin', 'year': 2008}
SCORE: 0.38336292 -> PAYLOAD {'name': 'The War of the Worlds', 'description': 'A Martian invasion of Earth throws humanity into chaos.', 'author': 'H.G. Wells', 'year': 1898}


## Busca com filtro

In [24]:
hits = qclient.query_points(
    collection_name="testezao",
    query=encoder.encode("alien  film").tolist(),
    query_filter=models.Filter(
        must=[
            models.FieldCondition(key="year", range=models.Range(gte=2000))
        ]
    ),
    limit=3
).points

for hit in hits:
    print(f"SCORE: {hit.score} -> {hit.payload["name"]} YEAR: {hit.payload["year"]}")

SCORE: 0.4930028 -> The Three-Body Problem YEAR: 2008
SCORE: 0.17587417 -> The Hunger Games YEAR: 2008


  return forward_call(*args, **kwargs)
