# LANCEDB utilities

In [None]:
%reload_ext autoreload
%autoreload 2

import sys, os, ollama, torch, logging,datetime, httpx,re, hashlib
from ollama import Client
from mangorest.mango import webapi
from openai import OpenAI
    
logger = logging.getLogger( "geoapp" )

device = "cpu"
if (torch.cuda.is_available() ):
    device = "cuda"
elif torch.backends.mps.is_available():
    device = "mps"

OLLAMA_HOST= 'http://127.0.0.1:11434/v1'
OPENAI_KEY = "NO KEY"

sys.path.append(os.path.expanduser("~/.django") )
if (os.path.exists(os.path.expanduser("~/.django/my_config.py"))):
    import my_config
    try:
        from my_config import OLLAMA_HOST
        OPENAI_KEY=my_config.OPENAI_KEY
    except:
        pass


# REST

In [None]:
#%%writefile ../lancedb.py
#!/usr/bin/env python

# --------------------------------------------------------------------------------
import sys, os, ollama, torch, logging,datetime, httpx,re, hashlib
from ollama import Client
from mangorest.mango import webapi
from openai import OpenAI
    
logger = logging.getLogger( "myapp" )
device = "cpu"
DB_LOC = "~/data/LANCEDB/"

if (torch.cuda.is_available() ):
    device = "cuda"
elif torch.backends.mps.is_available():
    device = "mps"

OLLAMA_HOST= 'http://127.0.0.1:11434/v1'
OPENAI_KEY = "NO KEY"

sys.path.append(os.path.expanduser("~/.django") )
if (os.path.exists(os.path.expanduser("~/.django/my_config.py"))):
    import my_config
    try:
        from my_config import OLLAMA_HOST
        OPENAI_KEY=my_config.OPENAI_KEY
    except:
        pass
# --------------------------------------------------------------------------------


# Lance DB copy

In [None]:
import lancedb

from vision_rag.colpali_tools import (
    embed_document,
    json_embeddings_to_numpy,
)
from vision_rag.objects import Document

def connect_to_db(db_path: str = "./data/tables") -> lancedb.db.DBConnection:
    return lancedb.connect(db_path)

def create_empty_table(db: lancedb.db.DBConnection, table_name: str = "demo") -> lancedb.table.Table:
    schema = pa.schema(
        [
            pa.field("id", pa.string()),
            pa.field("document_name", pa.string()),
            pa.field("text", pa.string()),
            pa.field("image", pa.string()),
            pa.field("page_number", pa.int32()),
            pa.field("vector", pa.list_(pa.float32())),
            pa.field("embedding_shape", pa.list_(pa.float32())),
        ]
    )
    tbl = db.create_table(table_name, schema=schema, mode="overwrite")
    tbl.create_fts_index("text", use_tantivy=False)  # For keyword search
    return tbl

def create_table_from_docs(  docs: list[Document], db: lancedb.db.DBConnection,  table_name: str = "demo",
    embedding_uri: str = "http://necbox2.ast.lmco.com:8080/v1/models/Colpali-embedder:predict", ) -> lancedb.table.Table:

    data = []
    embeds = embed_document(docs, embedding_uri, batch_size=8)
    npembeds = json_embeddings_to_numpy(embeds, "image_embeddings")
    for i, d in enumerate(docs):
        d.embedding_shape = npembeds[i].shape
        d.vector = npembeds[i].flatten()
        data.append(d.to_dict())
    tbl = db.create_table(table_name, data, mode="overwrite")
    tbl.create_fts_index("text", use_tantivy=False)  # For keyword search
    return tbl


def add_to_db(
    docs: list[Document],
    db: lancedb.db.DBConnection,
    table_name: str = "demo",
    embedding_uri: str = "http://necbox2.ast.lmco.com:8080/v1/models/Colpali-embedder:predict",
) -> lancedb.table.Table:
    tbl = db.open_table(table_name)
    data = []
    embeds = embed_document(docs, embedding_uri, batch_size=8)
    npembeds = json_embeddings_to_numpy(embeds, "image_embeddings")
    for i, d in enumerate(docs):
        d.embedding_shape = npembeds[i].shape
        d.vector = npembeds[i].flatten()
        data.append(d.to_dict())
    tbl.add(data)
    return tbl


def tbl_keyword_search(
    query: str, db: lancedb.db.DBConnection, table_name: str = "demo", num_results: int = 10
) -> list[dict]:
    tbl = db.open_table(table_name)
    return tbl.search(query).limit(num_results).to_list()
