In [20]:
from typing import List
from decouple import config
from langchain_nomic.embeddings import NomicEmbeddings
from upstash_vector import Index, Vector

In [4]:
NOMIC_API_KEY = config('NOMIC_API_KEY')
EMBED_CONDIG = {
    "nomic_api_key": NOMIC_API_KEY, 
    "model": 'nomic-embed-text-v1.5',
    'dimensionality': 768
}

In [18]:
UPSTASH_VECTOR_REST_URL = config("UPSTASH_VECTOR_REST_URL")
UPSTASH_VECTOR_REST_TOKEN = config("UPSTASH_VECTOR_REST_TOKEN")
INDEX_CONFIG = {
    "url": UPSTASH_VECTOR_REST_URL,
    "token": UPSTASH_VECTOR_REST_TOKEN
}

In [6]:
embed_model = NomicEmbeddings(**EMBED_CONDIG)

In [9]:
def get_embeddings(text: str) -> List[float]:
    text = text.replace('\n', ' ')
    return embed_model.embed_query(text)

In [10]:
documents = [
    "Кот прыгнул через собаку",
    "Собака прыгнула через кота",
    "Кто ты, путник?"
]

In [11]:
embeddings = [get_embeddings(x) for x in documents]

In [14]:
dataset = {}
for i, embedding in enumerate(embeddings):
    dataset[i] = embedding

In [19]:
index = Index(**INDEX_CONFIG)

In [None]:
vectors = []
for id, emb in dataset.items():
    print(id)
    vectors.append(Vector(id=id, vector=emb))

0
1
2


In [23]:
index.upsert(vectors=vectors)

'Success'

In [24]:
query_str = "Сколько зайцев было у Мазая?"
query_embedding = get_embeddings(query_str)

In [25]:
index.query(vector=query_embedding,
            top_k=3)

[QueryResult(id='0', score=0.8783984, vector=None, metadata=None, data=None),
 QueryResult(id='1', score=0.8731688, vector=None, metadata=None, data=None),
 QueryResult(id='2', score=0.82562757, vector=None, metadata=None, data=None)]