In [1]:
import os
from qdrant_client.http.models import Batch
from qdrant_client.http import models
from qdrant_client import QdrantClient
import json
from sentence_transformers import SentenceTransformer

from dotenv import load_dotenv
load_dotenv("../")

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
encoder = SentenceTransformer("all-MiniLM-L6-v2")

To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to see activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development


In [5]:
annotations = json.load(open("../annotations.json"))


In [3]:

vector_size = encoder.get_sentence_embedding_dimension()

def get_embedding(text: str) -> list:
    # embedding = openai.embeddings.create(
    #     input=[text],
    #     model=embedding_model,
    # ).data[0].embedding
    embedding = encoder.encode(text).tolist()
    
    return embedding


In [6]:
for i, annotation in enumerate(annotations):
    print(annotations[annotation])
    break

{'uid': '15b49e6accea40a8a257d570ab27e449', 'name': 'yugioh Shooting Star Dragon', 'viewerUrl': 'https://sketchfab.com/3d-models/15b49e6accea40a8a257d570ab27e449', 'tags': []}


In [7]:
qc = QdrantClient("localhost", port=6333)

In [15]:
collections = qc.get_collections()
collection_name = "annotations"
if collection_name not in [collection.name for collection in collections.collections]:
    first_collection = qc.recreate_collection(
        collection_name=collection_name,
        vectors_config = models.VectorParams(size=vector_size, distance=models.Distance.COSINE)
    )

qc.get_collections().collections[0].name

'annotations'

In [11]:
def annotation_to_vector(annotation: dict) -> models.Vector:
    combined_text = annotation["name"] + " " + ", ".join([a["name"] for a in annotation['tags']])
    print(f"Combined text: {combined_text}")
    return get_embedding(combined_text)

for i, annotation in enumerate(annotations):
    vector = annotation_to_vector(annotations[annotation])
    print(vector)
    break

Combined text: yugioh Shooting Star Dragon 
[-0.041724272072315216, 0.08754775673151016, 0.01650324836373329, 0.014911428093910217, -0.030000636354088783, -0.032335810363292694, 0.09685679525136948, -0.001409989781677723, 0.023756997659802437, -0.04041255638003349, -0.0436832569539547, -0.03961033746600151, 0.031631067395210266, -0.02165931463241577, 0.06020296737551689, 0.015446254052221775, 0.04170999675989151, 0.06309179961681366, 0.009340706281363964, -0.056938931345939636, -0.06276848912239075, -0.011450639925897121, 0.07055887579917908, 0.04001697897911072, -0.004094758071005344, -0.10540817677974701, 0.0074045066721737385, 0.04093986749649048, -0.029426582157611847, -0.13674752414226532, 0.0521400086581707, 0.02827228605747223, -0.06281668692827225, 0.08331379294395447, -0.09986723214387894, 0.019970472902059555, 0.014846259728074074, -0.0808321014046669, -0.050707362592220306, 0.07134369760751724, 0.03498931974172592, 0.02770926244556904, 0.0146684180945158, -0.0088903699070215

In [12]:
len(annotations)

798759

In [16]:
for id, annotation in enumerate(annotations):
    # check
    # query_filter = models.Filter(must=[models.HasIdCondition(has_id=[annotation])])
    # res = qc.scroll(
        # collection_name=collection_name,
        # scroll_filter=query_filter,
    # )
    # print(f"Res: for {annotations[annotation]} {res}")
    # if len(res[0]) > 0:
        # continue

    if annotations[annotation].keys() != {"uid", "name", "tags", "viewerUrl"}:
        continue
    qc.upsert(
        collection_name=collection_name,
        points=Batch(
            ids=[id],
            vectors=[annotation_to_vector(annotations[annotation])],
            payloads=[
                {
                    "name": annotations[annotation]["name"],
                    "uid": annotations[annotation]["uid"],
                    "tags": annotations[annotation]["tags"],
                }
            ],
        ),
    )

Combined text: yugioh Shooting Star Dragon 
Combined text: ALL 
Combined text: blockout_homework 
Combined text: My scan recfusion, 3dscan
Combined text: UNIT-1- Ramazan Demir 
Combined text: Cairn Liath, wider area, Scotland inventa, elevationapi
Combined text: Sun, 23 Feb 2020 18:38:15 
Combined text: OPC-6C2213 porch
Combined text: 青面金剛（東京都江戸川区東小岩２丁目２４番２号 善養寺（小岩不動尊）） photocatch
Combined text: Bread bread
Combined text: Kluska M1021_06_14.06.2022 pamir
Combined text: New Mace Model 
Combined text: Viking Horn Stylized horn, runes, vikings, wood, stylized, curno
Combined text: Color this Goji white, goji
Combined text: Wooden Round Shield wooden, medieval, roundshield, shield
Combined text: Besancon scaniverse
Combined text: Modern Forest home 3d, design, home
Combined text: Leftover tree leaves tree, walnut, forest, orange, garden, oak, 3d-scan, medieval, dead, park, fallen, leaf, 3d-scanning, fall, felt, downloadable, meadow, freemodel, tre, medievalfantasyassets, autumun, photoscan

KeyboardInterrupt: 

In [None]:
resume = "person"

res = qc.search(
    collection_name=collection_name,
    query_vector=get_embedding(resume),
)

res

[ScoredPoint(id=1, version=28, score=0.8069757, payload={'name': 'ALL', 'tags': [], 'uid': 'afb4a55b847448c4b1d494c6f8188869'}, vector=None, shard_key=None),
 ScoredPoint(id=42, version=69, score=0.8042121, payload={'name': 'q1', 'tags': [], 'uid': 'abc04949e88a435da5527310df7440e6'}, vector=None, shard_key=None),
 ScoredPoint(id=48, version=75, score=0.8032514, payload={'name': 'kurtentobj', 'tags': [], 'uid': '9460fb9072834d9c90b32577b2cf7741'}, vector=None, shard_key=None),
 ScoredPoint(id=44, version=71, score=0.79933935, payload={'name': 'Plant4', 'tags': [], 'uid': 'dd6458bc29344280b07575bbff1e74ed'}, vector=None, shard_key=None),
 ScoredPoint(id=60, version=87, score=0.7987017, payload={'name': 'Priest', 'tags': [{'name': 'priest', 'slug': 'priest', 'uri': 'https://api.sketchfab.com/v3/tags/priest'}, {'name': 'head', 'slug': 'head', 'uri': 'https://api.sketchfab.com/v3/tags/head'}, {'name': 'character-model', 'slug': 'character-model', 'uri': 'https://api.sketchfab.com/v3/tags/c