# Lesson 1 - Semantic Search

##### pip install qdrant-client

### Import the Needed Packages

In [None]:
import warnings
warnings.filterwarnings('ignore')

In [None]:
from qdrant_client import QdrantClient

In [None]:
from datasets import load_dataset
from sentence_transformers import SentenceTransformer
from qdrant_client import models, QdrantClient
import pandas as pd

# import zipfile

# import os
# import time
import torch

In [None]:
from tqdm.auto import tqdm

### Check cuda and Setup the model

**Note**: "Checking cuda" refers to checking if you have access to GPUs (faster compute). In this course, we are using CPUs. So, you might notice some code cells taking a little longer to run.

We are using *all-MiniLM-L6-v2* sentence-transformers model that maps sentences to a 384 dimensional dense vector space.

In [None]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'
if device != 'cuda':
    print('Sorry no cuda.')
else:
    print('Using CUDA-GPU')
encoderModel = SentenceTransformer('all-MiniLM-L6-v2', device=device) # Model to create embeddings

In [None]:
query = 'Admin of Flumist Influenza Vaccine'
xq = encoderModel.encode(query)
xq.shape
xq

### Setup Qdrant

In [None]:
# qdrant = QdrantClient(":memory:") # Create in-memory Qdrant instance, for testing, CI/CD
# # OR
# client = QdrantClient(path="path/to/db")  # Persists changes to disk, fast prototyping

qdrant = QdrantClient("http://localhost:6333") # Connect to existing Qdrant instance, for production

In [None]:
qdrant.get_collection(collection_name="HospitalCharges")

In [None]:
# Let's now search for something

hits = qdrant.search(
    collection_name="HospitalCharges",
    query_vector=encoderModel.encode("Admin of Flumist Influenza Vaccine").tolist(),
    limit=30
)
for hit in hits:
  print(hit.payload, "score:", hit.score)

In [None]:
hits[20]

In [None]:
type(hits[0].payload)

In [None]:
pd.DataFrame([hit.payload for hit in hits])

In [None]:
dfResults = pd.DataFrame([hit.payload for hit in hits])

In [None]:
dfResults

In [None]:
# Let's now search only for books from 21st century

hits = qdrant.search(
    collection_name="HospitalCharges",
    query_vector=encoderModel.encode("Admin of Flumist Influenza Vaccine").tolist(),
    query_filter=models.Filter(
        must=[
            models.FieldCondition(
                key="payer",
                range=models.Range(
                    gte=2000
                )
            )
        ]
    ),
    limit=3
)
for hit in hits:
  print(hit.payload, "score:", hit.score)

In [None]:
hits

In [None]:
dfResults