In [65]:
import pandas as pd

df = pd.read_csv('../all_ranks.csv')
df.fillna(0, inplace=True)
data = df.to_dict('records')
df

Unnamed: 0,RankId,Rank Name,RequirementId,Requirement Name,Requirement Short,Requirement Number,Required
0,1,Scout,2007,"Repeat from memory the Scout Oath, Scout Law, ...",Scout Oath & Law,1a,True
1,1,Scout,2009,Explain what Scout spirits is. Describe somewa...,Scout Spirit,1b,True
2,1,Scout,2010,"Demonstrate the Scout sign, salute, and handsh...",Salute & Sign,1c,True
3,1,Scout,2011,Describe the First Class Scout badge and tell ...,First Class Badge,1d,True
4,1,Scout,2012,Repeat from memory the Outdoor Code. List the ...,Outdoor Code,1e,True
...,...,...,...,...,...,...,...
138,7,Eagle Scout,1510,Earn a total of 21 merit badges (10 more than ...,Earn 21 merit badges,3,True
139,7,Eagle Scout,1511,"While a Life Scout, serve actively in your tro...",Position of responsibility,4,True
140,7,Eagle Scout,1512,"While a Life Scout, plan, develop, and give le...",Eagle project,5,True
141,7,Eagle Scout,1513,"While a Life Scout, participate in a Scoutmast...",Scoutmaster conference,6,True


In [66]:
from qdrant_client import QdrantClient, models
from sentence_transformers import SentenceTransformer

encoder = SentenceTransformer('all-mpnet-base-v2')

qdrant = QdrantClient(":memory:")

def create_collection(collection_name):
    return qdrant.create_collection("all_ranks",
                               vectors_config=models.VectorParams(
                                   size=encoder.get_sentence_embedding_dimension(),
                                   distance=models.Distance.COSINE
                                   )
                                   )

if qdrant.collection_exists("all_ranks"):
    qdrant.delete_collection("all_ranks")
    collection = create_collection("all_ranks")
else:
    collection = create_collection("all_ranks")


In [67]:
qdrant.upload_points(
    collection_name="all_ranks",
    points=[
        models.PointStruct(
            id=idx,
            vector=encoder.encode(doc["Requirement Name"]).tolist(),
            payload=doc
        ) for idx, doc in enumerate(data)
    ]
    )

In [68]:
results = qdrant.search(
    collection_name="all_ranks",
    query_vector=encoder.encode("fire").tolist(),
    limit=5
)

results


[ScoredPoint(id=51, version=0, score=0.5173960705922973, payload={'RankId': 3, 'Rank Name': 'Second Class', 'RequirementId': 2142, 'Requirement Name': 'Explain when it is appropriate to use a fire for cooking or other purposes and when it would not be appropriate to do so.', 'Requirement Short': 'Explain cooking fires', 'Requirement Number': '2a', 'Required': True}, vector=None, shard_key=None),
 ScoredPoint(id=53, version=0, score=0.47870928176775585, payload={'RankId': 3, 'Rank Name': 'Second Class', 'RequirementId': 2144, 'Requirement Name': 'Using a minimum-impact method at an approved outdoor location\\n\\nand time, use the tinder, kindling, and fuel wood from Second Class\\n\\nrequirement 2b to demonstrate how to build a fire. Unless prohibited by\\n\\nlocal fire restrictions, light the fire. After allowing the flames to burn safely\\n\\nfor at least two minutes, safely extinguish the flames with minimal impact\\n\\nto the fire site. Properly dispose of the ashes and any charred 