In [1]:
from src.source import (
    start_connection,
    create_datablock,
    close_connection,
    DatabaseOperations,
    Utilities,
    VectorSearchAlgorithms,
)
from llama_cpp import Llama
import numpy as np

In [2]:
model_q4_k_m: Llama = Llama(
    "../models/nomic-embed-large-v1.5-Q4_K_M.gguf",
    embedding=True,
    verbose=False,
    n_threads_batch=700,
    use_mmap=True,
    use_mlock=True,
)

In [3]:
database_name = "example1.db"

### start the database connection

In [4]:
conn = start_connection(database_name=database_name)

### float vectors

In [5]:
datablock_name = "e1"

In [6]:
create_datablock(datablock_name=datablock_name,database_connection=conn)

In [7]:
db_ops1 = DatabaseOperations(
    database_connection=conn, datablock_reference=datablock_name
)

In [8]:
texts = [
    "A quantum computer is a computer that exploits quantum mechanical phenomena",
    "The basic unit of information in quantum computing, the qubit (or 'quantum bit'), serves the same function as the bit in classical computing.",
    "The Amazon rainforest, also called Amazon jungle or Amazonia,",
    "t was thought that the Amazon rainforest was never more than sparsely populated",
    "The first European to travel the length of the Amazon River was Francisco de Orellana ",
    "The modern Olympic Games or Olympics (French: Jeux olympiques)[a][1] are the leading international sporting",
    "Their creation was inspired by the ancient Olympic Games, held in Olympia, Greece from the 8th century BC to the 4th century AD",
    "The evolution of the Olympic Movement during the 20th and 21st centuries has resulted in numerous changes to the Olympic Games",
    "the Amazon River Basin is the world largest drainage system",
    "The Amazon river contains more than twice as many types of fish than any other river",
]

In [9]:
embeddings = np.array(model_q4_k_m.embed(texts))

In [10]:
embeddings.shape

(10, 768)

In [11]:
db_ops1.add_many(texts=texts, vectors=embeddings)

In [12]:
query = "amazon river and rainforest."
query_embedding = np.array(model_q4_k_m.embed([query]))

result = db_ops1.search(
    vector=query_embedding,
    vectorsearch_algo=VectorSearchAlgorithms.cosine_similarity,
    top_k=4,
)




In [13]:
result

[(('63c48c86-e63f-49c3-95ac-358ccca7ae03',
   'The Amazon rainforest, also called Amazon jungle or Amazonia,',
   array([[ 7.12593317e-01,  1.42519617e+00, -4.23015499e+00,
            6.54933453e-02,  8.75553727e-01,  8.35165977e-01,
           -8.21905434e-01, -6.24127328e-01,  9.38241780e-02,
           -1.57520786e-01,  4.84249294e-01, -9.30004299e-01,
            2.16455674e+00,  6.98759913e-01,  2.50059694e-01,
           -1.28240263e+00, -6.32109344e-01, -1.45177603e+00,
            5.34574747e-01,  5.91864169e-01, -1.33972514e+00,
           -5.97215772e-01,  3.73601377e-01,  3.10580552e-01,
            7.22013116e-01,  3.71140033e-01, -2.27068633e-01,
            8.94917667e-01,  7.83902764e-01, -3.74325424e-01,
            3.83838207e-01,  8.30334306e-01,  7.90934324e-01,
            3.44120532e-01,  3.77007760e-03, -5.03548503e-01,
            1.40089238e+00,  9.03489292e-01,  1.54557490e+00,
           -3.24648991e-03,  8.77489984e-01, -7.27499783e-01,
           -4.2953953

### binary vectors

In [14]:

datablock_name = "e2"

In [15]:
create_datablock(database_connection=conn,datablock_name=datablock_name)

In [16]:
db_ops2 = DatabaseOperations(conn, datablock_name)

In [17]:
binary_embedddings = Utilities.convert_float_to_binary(
    np.array(model_q4_k_m.embed(texts))
)

In [18]:
binary_embedddings.shape,binary_embedddings[0][:10]

((10, 768), array([1, 1, 0, 0, 1, 0, 1, 0, 1, 1], dtype=uint8))

In [19]:
db_ops2.add_many(texts=texts,vectors=binary_embedddings)

In [20]:
query = "amazon river and rainforest."
binary_query_embedding = Utilities.convert_float_to_binary(
    np.array(model_q4_k_m.embed([query]))
)

result = db_ops2.search(
    vector=binary_query_embedding,
    vectorsearch_algo=VectorSearchAlgorithms.normalized_hamming_distance,
    top_k=4,
)




In [21]:
result

[(('f8c99eb5-18fd-43cb-bbba-89c48b0ae901',
   'The Amazon rainforest, also called Amazon jungle or Amazonia,',
   array([[1, 1, 0, 1, 1, 1, 0, 0, 1, 0, 1, 0, 1, 1, 1, 0, 0, 0, 1, 1, 0, 0,
           1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 0, 0, 0,
           1, 0, 1, 1, 1, 0, 1, 1, 0, 0, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0,
           0, 1, 0, 0, 1, 0, 0, 1, 0, 1, 0, 1, 0, 1, 1, 1, 0, 0, 1, 1, 0, 0,
           0, 1, 0, 1, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0,
           1, 1, 0, 0, 0, 0, 1, 0, 1, 1, 0, 1, 0, 1, 1, 1, 0, 1, 0, 0, 0, 1,
           1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0, 1, 0, 1, 1, 0, 1,
           1, 0, 0, 1, 1, 0, 1, 1, 1, 1, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1,
           1, 0, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1,
           1, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 1, 1, 0, 0, 1, 0, 1, 0, 0, 0, 1,
           0, 0, 0, 1, 1, 1, 0, 1, 0, 0, 0, 0, 1, 0, 0, 1, 1, 1, 0, 1, 0, 0,
           0, 0, 0, 1, 0, 0, 1, 1, 0, 1, 0

### close the database connection

In [22]:
close_connection(conn)