### Embedding= Sentence Transformer

In [3]:
from qdrant_client import models, QdrantClient

In [5]:
from sentence_transformers import SentenceTransformer



In [6]:
encoder = SentenceTransformer("all-MiniLM-L6-v2")

### 2. Add the dataset  
all-MiniLM-L6-v2 will encode the data you provide. Here you will list all the science fiction books in your library. Each book has metadata, a name, author, publication year and a short description.

In [7]:
documents = [
    {
        "name": "The Time Machine",
        "description": "A man travels through time and witnesses the evolution of humanity.",
        "author": "H.G. Wells",
        "year": 1895,
    },
    {
        "name": "Ender's Game",
        "description": "A young boy is trained to become a military leader in a war against an alien race.",
        "author": "Orson Scott Card",
        "year": 1985,
    },
    {
        "name": "Brave New World",
        "description": "A dystopian society where people are genetically engineered and conditioned to conform to a strict social hierarchy.",
        "author": "Aldous Huxley",
        "year": 1932,
    },
    {
        "name": "The Hitchhiker's Guide to the Galaxy",
        "description": "A comedic science fiction series following the misadventures of an unwitting human and his alien friend.",
        "author": "Douglas Adams",
        "year": 1979,
    },
    {
        "name": "Dune",
        "description": "A desert planet is the site of political intrigue and power struggles.",
        "author": "Frank Herbert",
        "year": 1965,
    },
    {
        "name": "Foundation",
        "description": "A mathematician develops a science to predict the future of humanity and works to save civilization from collapse.",
        "author": "Isaac Asimov",
        "year": 1951,
    },
    {
        "name": "Snow Crash",
        "description": "A futuristic world where the internet has evolved into a virtual reality metaverse.",
        "author": "Neal Stephenson",
        "year": 1992,
    },
    {
        "name": "Neuromancer",
        "description": "A hacker is hired to pull off a near-impossible hack and gets pulled into a web of intrigue.",
        "author": "William Gibson",
        "year": 1984,
    },
    {
        "name": "The War of the Worlds",
        "description": "A Martian invasion of Earth throws humanity into chaos.",
        "author": "H.G. Wells",
        "year": 1898,
    },
    {
        "name": "The Hunger Games",
        "description": "A dystopian society where teenagers are forced to fight to the death in a televised spectacle.",
        "author": "Suzanne Collins",
        "year": 2008,
    },
    {
        "name": "The Andromeda Strain",
        "description": "A deadly virus from outer space threatens to wipe out humanity.",
        "author": "Michael Crichton",
        "year": 1969,
    },
    {
        "name": "The Left Hand of Darkness",
        "description": "A human ambassador is sent to a planet where the inhabitants are genderless and can change gender at will.",
        "author": "Ursula K. Le Guin",
        "year": 1969,
    },
    {
        "name": "The Three-Body Problem",
        "description": "Humans encounter an alien civilization that lives in a dying system.",
        "author": "Liu Cixin",
        "year": 2008,
    },
]

### 3. Define storage location  
You need to tell Qdrant where to store embeddings. This is a basic demo, so your local computer will use its memory as temporary storage.

In [8]:
client = QdrantClient(":memory:")

### 4. Create a collection  
All data in Qdrant is organized by collections. In this case, you are storing books, so we are calling it my_books.

In [9]:
client.recreate_collection(
    collection_name="my_books",
    vectors_config=models.VectorParams(
        size=encoder.get_sentence_embedding_dimension(),  # Vector size is defined by used model
        distance=models.Distance.COSINE,
    ),
)

  client.recreate_collection(


True

Use recreate_collection if you are experimenting and running the script several times. This function will first try to remove an existing collection with the same name.  

The vector_size parameter defines the size of the vectors for a specific collection. If their size is different, it is impossible to calculate the distance between them. 384 is the encoder output dimensionality. You can also use model.get_sentence_embedding_dimension() to get the dimensionality of the model you are using.  

The distance parameter lets you specify the function used to measure the distance between two points.

### 5. Upload data to collection  
Tell the database to upload documents to the my_books collection. This will give each record an id and a payload. The payload is just the metadata from the dataset.

In [15]:
points=[
    models.PointStruct(
        id=idx, 
        vector=encoder.encode(doc["description"]).tolist(), 
        payload=doc
    )
    for idx, doc in enumerate(documents)
]


In [17]:
points

[PointStruct(id=0, vector=[-0.03813491389155388, 0.04517792537808418, -0.022021325305104256, 0.05923553928732872, 0.006653925869613886, 0.018390055745840073, 0.06810275465250015, -0.04166232794523239, 0.04437771067023277, -0.02525182068347931, -0.009794242680072784, -0.022916516289114952, -0.11903539299964905, 0.031318988651037216, -0.0575328953564167, -0.008934118784964085, -0.10448415577411652, 0.007782080676406622, 0.05709608644247055, -0.05635252594947815, -0.005897988099604845, 0.02674950286746025, -0.04767240211367607, -1.942548578881542e-06, -0.03601926565170288, 0.0012949783122166991, 0.06238839402794838, 0.0017686084611341357, 0.05468188226222992, 0.021232403814792633, 0.07367483526468277, -0.012031633406877518, -0.03202754259109497, -0.03724587336182594, -0.0459381602704525, 0.007532370276749134, 0.0013578893849626184, 0.0012013149680569768, 0.04790322482585907, -0.001352161867544055, 0.06363943219184875, -0.03758495673537254, -0.011651752516627312, 0.05344289913773537, 0.030

In [18]:
client.upload_points(
    collection_name="my_books",
    points=[
        models.PointStruct(
            id=idx, 
            vector=encoder.encode(doc["description"]).tolist(), 
            payload=doc
        )
        for idx, doc in enumerate(documents)
    ],
)

In [14]:
#encoder.encode("my name is prakash").tolist()

### 6. Ask the engine a question  
Now that the data is stored in Qdrant, you can ask it questions and receive semantically relevant results.

In [19]:
hits = client.search(
    collection_name="my_books",
    query_vector=encoder.encode("alien invasion").tolist(),
    limit=3,
)
for hit in hits:
    print(hit.payload, "score:", hit.score)

{'name': 'The War of the Worlds', 'description': 'A Martian invasion of Earth throws humanity into chaos.', 'author': 'H.G. Wells', 'year': 1898} score: 0.5700934210167876
{'name': "The Hitchhiker's Guide to the Galaxy", 'description': 'A comedic science fiction series following the misadventures of an unwitting human and his alien friend.', 'author': 'Douglas Adams', 'year': 1979} score: 0.504046819544025
{'name': 'The Three-Body Problem', 'description': 'Humans encounter an alien civilization that lives in a dying system.', 'author': 'Liu Cixin', 'year': 2008} score: 0.4590294080750947


### Narrow down the query  
How about the most recent book from the early 2000s?

In [20]:
hits = client.search(
    collection_name="my_books",
    query_vector=encoder.encode("alien invasion").tolist(),
    query_filter=models.Filter(
        must=[models.FieldCondition(key="year", range=models.Range(gte=2000))]
    ),
    limit=1,
)
for hit in hits:
    print(hit.payload, "score:", hit.score)

{'name': 'The Three-Body Problem', 'description': 'Humans encounter an alien civilization that lives in a dying system.', 'author': 'Liu Cixin', 'year': 2008} score: 0.4590294080750947
