# LANCEDB EXAMPLE

Precondition to run the examples:
Install and run OLLAMA and pull llama3.2 model

In [None]:
%reload_ext autoreload
%autoreload 2

import requests, lancedb, pandas as pd
from typing import List

LANCEDB_LOCATION = "./LANCEDB/"
MODEL= "ollama3.2"
# --------------------------------------------------------------------------------
# Function to get embedding using OLLAMA API
# Generate embeddings for a given text using the OLLAMA API.
#
def get_ollama_embedding(text: str, model: str =MODEL) -> List[float]:
    url = "http://localhost:11434/api/embeddings"
    payload = {"model": model, "prompt": text}
    response = requests.post(url, json=payload)

    if response.status_code == 200:
        embedding = response.json().get("embedding", [])
        return embedding
    else:
        raise Exception(f"Failed to get embedding: {response.text}")

# --------------------------------------------------------------------------------
# Load your data
#
def load_example_data():
    return [
        {"id": 1, "text": "LanceDB is a fast and efficient vector database."},
        {"id": 2, "text": "OLLAMA makes it easy to run LLMs locally."},
        {"id": 3, "text": "Combining LLMs with LanceDB enables efficient search."}
    ]
# --------------------------------------------------------------------------------
# Function to search LanceDB table for similar embeddings
# Search the LanceDB table for the closest embeddings to the query text.
#
def lancedb_search(query_text: str, top_k: int = 3, table="test", model=MODEL ):

    # Generate embedding for the query text
    query_embedding = get_ollama_embedding(query_text, model=model )

    
    db = lancedb.connect(LANCEDB_LOCATION)  # Path to your LanceDB directory
    table = db.open_table(table)    # Open the embeddings table


    # Perform similarity search
    results = table.search(query_embedding).limit(top_k).to_pandas()
    #table.search(query).limit(top_k).to_list()


    # Display the results
    print("\nTop Results:")
    for idx, row in results.iterrows():
        print(f"Rank {idx+1}:")
        print(f"ID: {row['id']}")
        print(f"Text: {row['text']}")
        print(f"Distance: {row['_distance']}\n")
    
    return results
# --------------------------------------------------------------------------------
# Main function to create embeddings and store in LanceDB
#
def lancedb_load(model=MODEL, table= "test"):
    data = load_example_data()

    # Generate embeddings
    for item in data:
        item["vector"] = get_ollama_embedding(item["text"], model=model)

    # Convert to DataFrame
    df = pd.DataFrame(data)

    # Define LanceDB schema
    # table_data = [
    #    {"id": row["id"], "text": row["text"], "vector": row["vector"]}
    #    for _, row in df.iterrows()
    # ]
    table_data = df.to_dict(orient='records')

    # Initialize LanceDB
    db = lancedb.connect(LANCEDB_LOCATION)  # Directory where the DB will be stored
    table = db.create_table(table, data=table_data, mode="overwrite")
    print("Data successfully stored in LanceDB.")

    # Query to verify
    print("Sample Data in LanceDB:")
    print(table.to_pandas())
    return df

df = lancedb_load()
lancedb_search("what is an efficient db")

Data successfully stored in LanceDB.
Sample Data in LanceDB:
   id                                               text  \
0   1   LanceDB is a fast and efficient vector database.   
1   2          OLLAMA makes it easy to run LLMs locally.   
2   3  Combining LLMs with LanceDB enables efficient ...   

                                              vector  
0  [-1.8399125, -0.23122776, -2.9665627, 0.381641...  
1  [-2.5748787, 0.18139988, -2.1380749, -1.418047...  
2  [-2.179902, 0.93134046, -1.9709134, -0.7056500...  

Top Results:
Rank 1:
ID: 1
Text: LanceDB is a fast and efficient vector database.
Distance: 12211.1396484375

Rank 2:
ID: 3
Text: Combining LLMs with LanceDB enables efficient search.
Distance: 12429.2587890625

Rank 3:
ID: 2
Text: OLLAMA makes it easy to run LLMs locally.
Distance: 13213.962890625



Unnamed: 0,id,text,vector,_distance
0,1,LanceDB is a fast and efficient vector database.,"[-1.8399125, -0.23122776, -2.9665627, 0.381641...",12211.139648
1,3,Combining LLMs with LanceDB enables efficient ...,"[-2.179902, 0.93134046, -1.9709134, -0.7056500...",12429.258789
2,2,OLLAMA makes it easy to run LLMs locally.,"[-2.5748787, 0.18139988, -2.1380749, -1.418047...",13213.962891
