In [None]:
import os
import pandas as pd
import numpy
import lancedb
import openai
from sentence_transformers import SentenceTransformer
from sentence_transformers.util import cos_sim
from typing import Optional

# Constants
DB_NAME = "lancedb"
TABLE_NAME = "my_table"
DEFAULT_ENGINE = "your-engine-here"
MODEL_NAME = 'thenlper/gte-large-zh'

def set_openai_llm():
    """Set OpenAI parameters from environment variables."""
    openai.api_type = os.getenv("api_type")
    openai.api_base = os.getenv("api_base")
    openai.api_version = os.getenv("api_version")
    openai.api_key = os.getenv("OPENAI_API_KEY")

set_openai_llm()

# Initialize model
model = SentenceTransformer(MODEL_NAME)
    
def embed_func(content):
    """Generate embeddings for the given content."""
    content = str(content)
    try:
        embeddings = model.encode([content])
        return embeddings[0]
    except Exception as e:
        return str(e)

def create_db_table(dbname, tablename, data):
    """Create a new database and table."""
    db = lancedb.connect(dbname)
    print(f"Connected to db: {dbname}")
        
    if tablename in db.table_names():
        db.drop_table(tablename)
        print(f"Table {tablename} dropped")

    table = db.create_table(tablename, data=data)
    print(f"Table {tablename} created")
    
    return db, table


def call_llm(messages, engine=DEFAULT_ENGINE, functions=[], temperature=0.3, max_tokens=800, top_p=0.95, frequency_penalty=0, presence_penalty=0, stop=None):
    """Call OpenAI's language model."""
    kwargs = {
        "engine": engine,
        "messages": messages,
        "functions": functions,
        "temperature": temperature,
        "max_tokens": max_tokens,
        "top_p": top_p,
        "frequency_penalty": frequency_penalty,
        "presence_penalty": presence_penalty,
        "stop": stop
    }

    if not kwargs["functions"]:
        del kwargs["functions"] 
    
    try:
        response = openai.ChatCompletion.create(**kwargs)
    except Exception as e:
        print(f"openai.ChatCompletion.create exception: {e}")
        print(messages)
        raise(e)
    
    return response

def query_db(query, k=3):
    """Query the database."""
    db = lancedb.connect(DB_NAME)
    table = db.open_table(TABLE_NAME)
    query_vector = embed_func(query)
    results = table.search(query_vector).limit(k)
    return results.to_pandas()

def ask(question, bookmarks_csv):
    my_messages= [
    {
        "role": "system", 
        "content": "You are the knowledged person familiar with information collection, and be a senior coach in Google HR for years. You will give advice to growth of the individual. and give constructive advice. You are also good at data analytics, and can read spreadsheet CSV data and tell stories between the lines."},
    {
        "role": "user", 
        "content": """Please review user's questions and the provide related bookmarks title, URL and add date.
        Please give (1) you answer (2) Pick some of the bookmarks as reference (3) analyze the add dates to judge the user's interest level and interest change over time.

        - Question: 
        {}
        - Bookmarks:
        {}        
        """.format(question, bookmarks_csv)},
    ]    
    try:        
        r = call_llm(my_messages, 
                      engine="test-july-4")
        return r["choices"][0]["message"]["content"]
    except Exception as e:
        return "Exception:" + str(e)
    
def ask_rag(question):
    fx = query_db(question,20).loc[:,["element","href","add_date_str"]].drop_duplicates("element").sort_values("add_date_str",ascending=False)
    r= ask(question,fx.to_csv())
    return (r,fx)    


def load_and_process_data(file_path):
    """Load and process data from a CSV file."""
    df = pd.read_csv(file_path)
    df["vector"] = df["element"].apply(embed_func)
    df = df[df.vector.map(lambda x: isinstance(x, numpy.ndarray))]
    return df

def main():
    """Main function."""
    data = load_and_process_data("./bookmarks/bookmarks.csv")
    db, table = create_db_table(DB_NAME, TABLE_NAME, data)
    response, query_results = ask_rag("What is the best way to learn AI?")
    print(response)
    print(query_results)

if __name__ == "__main__":
    main()