# Init

## Downloads

In [138]:

# !pip install qdrant-client
# !pip install sentence-transformers
# !pip install wasabi

## Imports

In [139]:
import pickle

from wasabi import Printer

from qdrant_client import QdrantClient, models
from sentence_transformers import SentenceTransformer

In [140]:
logs = Printer()

## Variables

In [141]:
qdrant_token = "xxxx"

collection_name = "genshin_test"
model_name = "intfloat/e5-large-v2"

In [142]:

qdrant_client = QdrantClient(
    url="xxxx", 
    api_key=qdrant_token,
)

In [143]:
model = SentenceTransformer(model_name)

In [144]:
with open('../data/genshin_database.pickle', 'rb') as handle:
    genshin_db = pickle.load(handle)

## Helper functions

In [145]:
def create_collection(collection_name):
    qdrant_client.recreate_collection(
        collection_name=collection_name,
        vectors_config={
            "page_content": models.VectorParams(
            size=1024,
            distance=models.Distance.COSINE,
            ),
        }
    )

In [146]:
def insert_collection(collection_name, payload, vector):
    qdrant_client.upsert(
        collection_name=collection_name,
        points=[
            models.PointStruct(
                id=payload["id"],
                payload=payload,
                vector={
                    "page_content": [float(num) for num in vector],
                }
            )
        ]
    )

In [147]:
def keep_relevant_content(page):
    keep = ["data", "title", "class"]
    relevant_content = {}
    for key in keep:
        if len(page[key]) > 0:
            relevant_content[key] = page[key]
        else: return None
    # do not synch if data is empty

    return relevant_content

In [148]:
def generate_embeddings(model, text):
    embeddings = model.encode(text)
    return embeddings

In [172]:
def synchronize_db(model, database, collection_name):
    counter = 0
    try:
        for page in database:
            uuid = str(page["id"]).zfill(32)
            content = keep_relevant_content(page)
            if content:
                logs.info("synchronizing {}".format(page["title"]))
                payload = {
                    "id": uuid,
                    "title": page["title"],
                    "class":page["class"],
                    "content": str(content),
                }
                embeddings = generate_embeddings(model, str(content))

                insert_collection(collection_name, payload, embeddings)
            
                logs.good("successfully upserted {} into collection".format(page["title"]))
            else: logs.fail("failed to upsert {} into collection".format(page["title"]))
            counter +=1
    except Exception as e:
        logs.fail("upserting failed, stopped after {} pages".format(counter))
        logs.info("restating from where it stopped")
        synchronize_db(model, database[counter:], collection_name)

In [150]:
# create_collection("genshindb_test")

In [173]:
synchronize_db(model, genshin_db[11557:], "genshindb_test")

[38;5;4mℹ synchronizing Ararycan's Flower[0m
[38;5;2m✔ successfully upserted Ararycan's Flower into collection[0m
[38;5;4mℹ synchronizing Arapandu's Flower[0m
[38;5;2m✔ successfully upserted Arapandu's Flower into collection[0m
[38;5;4mℹ synchronizing Arakunti's Flower[0m
[38;5;2m✔ successfully upserted Arakunti's Flower into collection[0m
[38;5;4mℹ synchronizing Aranakula's Flower[0m
[38;5;2m✔ successfully upserted Aranakula's Flower into collection[0m
[38;5;4mℹ synchronizing Arakara's Flower[0m
[38;5;2m✔ successfully upserted Arakara's Flower into collection[0m
[38;5;4mℹ synchronizing Nameless Flower[0m
[38;5;2m✔ successfully upserted Nameless Flower into collection[0m
[38;5;4mℹ synchronizing Voice of Akasha[0m
[38;5;2m✔ successfully upserted Voice of Akasha into collection[0m
[38;5;4mℹ synchronizing Jahangir[0m
[38;5;2m✔ successfully upserted Jahangir into collection[0m
[38;5;4mℹ synchronizing The Merchant and the Gate of Knowledge[0m
[38;5;2m✔ suc

In [174]:
genshin_db[-1]

{'title': 'Twitch Drops/2024-01-31',
 'id': '1502329',
 'class': 'Wish',
 'data': {'name': 'Twitch Drops 2024-01-31',
  'type': 'Web',
  'time_known': 'yes',
  'time_start': '2024-01-31 11:00:00',
  'time_start_offset': 'GMT+8',
  'time_end': '2024-02-20 23:59:59',
  'time_end_offset': 'GMT+8',
  'link': 'https://act.hoyoverse.com/ys/event/e20220511twitchdrops/index.html'}}