In [None]:
import numpy as np
from openai import OpenAI
import pandas as pd

# file "/tmp/snippets/actions.csv"
# input text column lookup_values
# output column embedding

client = OpenAI(max_retries=5)
EMBEDDING_MODEL="text-embedding-ada-002"

def cosine_similarity(a, b):
    return np.dot(a, b) / (np.linalg.norm(a) * np.linalg.norm(b))

def get_embedding(text, model=EMBEDDING_MODEL):
   text = text.replace("\n", " ")
   return client.embeddings.create(input = [text], model=model).data[0].embedding

def search_docs(df, user_query, top_n=4, to_print=True):
    embedding = get_embedding(
        user_query,
        model=EMBEDDING_MODEL 
)
    df["similarities"] = df.embedding.apply(lambda x: cosine_similarity(x, embedding))

    res = (
        df.sort_values("similarities", ascending=False)
        .head(top_n)
    )
    if to_print:
        display(res)
    return res

df = pd.read_csv("/tmp/snippets/actions.csv", index_col=0)
df = df.dropna()
df['embedding'] = df.lookup_values.apply(lambda x: get_embedding(x, model=EMBEDDING_MODEL))

# uncomment tosave embeddings
# df.to_csv("/tmp/snippets/actions_with_embeddings.csv")