# Init

## Downloads

In [3]:

# !pip install qdrant-client
# !pip install sentence-transformers
# !pip install wasabi

## Imports

In [4]:
import pickle

from wasabi import Printer

from qdrant_client import QdrantClient, models
from sentence_transformers import SentenceTransformer

In [5]:
logs = Printer()

## Variables

In [6]:
qdrant_token = "xxxx"

collection_name = "genshin_lite"
model_name = "sentence-transformers/all-MiniLM-L6-v2"

In [7]:

qdrant_client = QdrantClient(
    url="xxxx", 
    api_key=qdrant_token,
)

In [8]:
model = SentenceTransformer(model_name)

In [9]:
with open('../data/genshin_database.pickle', 'rb') as handle:
    genshin_db = pickle.load(handle)

## Helper functions

In [10]:
def create_collection(collection_name):
    qdrant_client.recreate_collection(
        collection_name=collection_name,
        vectors_config={
            "page_content": models.VectorParams(
            size=384,
            distance=models.Distance.COSINE,
            ),
        }
    )

In [11]:
def insert_collection(collection_name, payload, vector):
    qdrant_client.upsert(
        collection_name=collection_name,
        points=[
            models.PointStruct(
                id=payload["id"],
                payload=payload,
                vector={
                    "page_content": [float(num) for num in vector],
                }
            )
        ]
    )

In [12]:
def keep_relevant_content(page):
    keep = ["data", "title", "class"]
    relevant_content = {}
    for key in keep:
        if len(page[key]) > 0:
            relevant_content[key] = page[key]
        else: return None
    # do not synch if data is empty

    return relevant_content

In [13]:
def generate_embeddings(model, text):
    embeddings = model.encode(text)
    return embeddings

In [24]:
def synchronize_db(model, database, collection_name):
    counter = 0
    try:
        for page in database:
            uuid = str(page["id"]).zfill(32)
            content = keep_relevant_content(page)
            if content:
                logs.info("synchronizing {}".format(page["title"]))
                payload = {
                    "id": uuid,
                    "title": page["title"],
                    "class":page["class"],
                    "content": str(content),
                }
                embeddings = generate_embeddings(model, str(content))

                insert_collection(collection_name, payload, embeddings)
            
                logs.good("successfully upserted {} into collection".format(page["title"]))
            else: logs.fail("failed to upsert {} into collection".format(page["title"]))
            counter +=1
    except Exception as e:
        logs.fail("upserting failed, stopped after {} pages".format(counter))
        logs.info("restating from where it stopped")
        synchronize_db(model, database[counter:], collection_name)

In [25]:
create_collection(collection_name)

In [None]:
synchronize_db(model, genshin_db[:], collection_name)

In [None]:
genshin_db[-1]