In [None]:
import pandas as pd
import numpy as np
import os

df = pd.read_parquet("short_articles.parquet")
df = df[df["text"].str.len() > 0].sample(frac=0.25)
df = df.reset_index()

In [None]:
## collect our ids for each article
ids = df["article_id"].tolist()
## collect the properties that we will attach to each vector
properties = df.apply(
    lambda r:{ 
        "url": r.url, 
        "title": r.title, 
        "title_len": r.title_len, 
        "text": r.text, 
        "text_len": r.text_len}
    , axis=1
).tolist()


In [None]:
from qwak_inference import RealTimeClient

data = [{"input": i} for i in df['text']]

client = RealTimeClient(model_id="sentence_transformer")

vectors = []
for i in range(0,len(data),250):
    if i + 250 > len(data):
        resp = client.predict(data[i:])
    else:
        resp = client.predict(data[i: i + 250])
    vectors.extend([r['output'] for r in resp])


In [None]:
from qwak.vector_store import VectorStoreClient

client = VectorStoreClient()
collection = client.get_collection_by_name("wikipedia-article-text-vectors")

collection.upsert(
    ## List of the article ids
    ids=ids,
    ## List of vector values retrieved from the model prediction
    vectors=vectors,
    ## List of dict of the article properties
    properties=properties
)

In [None]:
from qwak.vector_store import VectorStoreClient
from qwak_inference import RealTimeClient

## Create inference client and use model to vectorize query
inference_client = RealTimeClient(model_id="sentence_transformer")
vector = inference_client.predict([{"input": "Ducks"}])

## Create vector client and fetch collection
vector_client = VectorStoreClient()
collection = vector_client.get_collection_by_name("wikipedia-demo")

## Search vector store using vector provided by model
search_results = collection.search(
    vector=vector[0]['output'], 
    top_results=3, 
    output_properties=["title", "title_len", "url"], 
    include_distance=True, 
    include_vector=False
)

In [None]:
[print(x.properties, x.distance) for x in search_results]