In [2]:
import openai
import termcolor
from dotenv import load_dotenv
import os
from supabase import create_client, Client
from openai import OpenAI
from tiktoken import encoding_for_model

load_dotenv()

True

In [3]:
SUPABASE_URL = os.getenv("SUPABASE_URL")
SUPABASE_SERVICE_ROLE_KEY = os.getenv("SUPABASE_SERVICE_ROLE_KEY")
SUPABASE_ANON_KEY = os.getenv("SUPABASE_ANON_KEY")

supabase: Client = create_client(SUPABASE_URL, SUPABASE_SERVICE_ROLE_KEY)

openai = OpenAI()


In [4]:
def get_embedding(text):
    response = openai.embeddings.create(
        input=text,
        model="text-embedding-3-small"
    )
    return response.data[0].embedding

def similarity_search(query, table_name, match_threshold=0.1, match_count=5):
    query_embedding = get_embedding(query)
    
    response = supabase.rpc(
        'match_documents',
        {
            'query_embedding': query_embedding,
            'match_threshold': match_threshold,
            'match_count': match_count,
            'table_name': table_name
        }
    ).execute()
    
    return response.data


# chunking kb

In [6]:
current_user = "user_2lKpUPRJD4g5IErIdhbO7rBMn3K"
items = supabase.table('knowledge_base').select('*').eq('user_id', current_user).execute()

In [33]:
import spacy

nlp = spacy.load("en_core_web_md")
data = items.data[1]['content']

doc = nlp(items.data[1]['content'])

# Clean the text
cleaned_text = ' '.join([token.text for token in doc if not token.is_space and not token.is_punct])

def count_tokens(text, model="gpt-4o"):
    encoder = encoding_for_model(model)
    tokens = encoder.encode(text)
    return len(tokens)

def sliding_window_chunking(text, max_window_size=600, overlap=200):
    encoder = encoding_for_model("gpt-4o")  # Use the same model as in count_tokens
    tokens = encoder.encode(text)
    chunks = []
    start = 0
    while start < len(tokens):
        end = start + max_window_size
        chunk_tokens = tokens[start:end]
        chunk = encoder.decode(chunk_tokens)
        chunks.append(chunk)
        start += max_window_size - overlap
    return chunks

kb_item_chunked = sliding_window_chunking(cleaned_text)


In [43]:
current_user = "user_2lKpUPRJD4g5IErIdhbO7rBMn3K"
items = supabase.table('knowledge_base').select('*').eq('user_id', current_user).execute()

items.data[1]

{'id': 19,
 'created_at': '2024-09-09T10:09:37.765586+00:00',
 'user_id': 'user_2lKpUPRJD4g5IErIdhbO7rBMn3K',
 'item_id': None,
 'title': '## Bundles of Advanced Features',
 'content': "## Bundles of Advanced Features\n\nResults driven AI technology brought to you.\n\n- App Booking\n\n\nFind a time that works for you both. Our AI agent will check your calendar and send a confirmation to both parties.\n\n- Call Routing\n\n\nAgent chats with the caller, then routes a call to anyone inside your organisation, whether its 2 people, or 2,000.\n\n- Web Agent\n\n\nShow your customers you care by having an AI agent reachable direct from your web page\n\n- Email, SMS, Whatsapp notifications\n\n\nKeep connected with your agents however you wish.\n\n- Outbound Calls\n\n\nHave the agents carry out task specific outbound calls. Like appointment reminders, information retrieval, prompts for action, etc.\n\n- Phone Call Recordings & Transcriptions\n\n\nAll calls are recorded and transcriptions provide

In [46]:
def insert_chunk(parent_id, content, chunk_index, embedding):
    print("func insert_chunk...")
    supabase.table('chunks').insert({
        'parent_id': parent_id,
        'content': content,
        'chunk_index': chunk_index,
        'embedding': embedding
    }).execute()

def process_item(item_id, content):
    print("func process_item...")
    chunks = sliding_window_chunking(content) 
    for index, chunk in enumerate(chunks):
        embedding = get_embedding(chunk)
        print("index", index)
        print("chunk", chunk)
        print("embedding", embedding)
        insert_chunk(item_id, chunk, index, embedding)

process_item(item_id=items.data[1]['id'], content=items.data[1]['content'])


index 0
chunk ## Bundles of Advanced Features

Results driven AI technology brought to you.

- App Booking


Find a time that works for you both. Our AI agent will check your calendar and send a confirmation to both parties.

- Call Routing


Agent chats with the caller, then routes a call to anyone inside your organisation, whether its 2 people, or 2,000.

- Web Agent


Show your customers you care by having an AI agent reachable direct from your web page

- Email, SMS, Whatsapp notifications


Keep connected with your agents however you wish.

- Outbound Calls


Have the agents carry out task specific outbound calls. Like appointment reminders, information retrieval, prompts for action, etc.

- Phone Call Recordings & Transcriptions


All calls are recorded and transcriptions provided. Providing full transparency and documentation on interactions.

- Question & Answering


Provide information to your customers through voice.


- [Get Started Today](/onboarding)

## Why Flowon

Our co

In [37]:
user_query = "what features do you offer?"
table_name = "knowledge_base"

In [38]:
results = similarity_search(user_query,table_name)
results

[{'id': 19,
  'content': "## Bundles of Advanced Features\n\nResults driven AI technology brought to you.\n\n- App Booking\n\n\nFind a time that works for you both. Our AI agent will check your calendar and send a confirmation to both parties.\n\n- Call Routing\n\n\nAgent chats with the caller, then routes a call to anyone inside your organisation, whether its 2 people, or 2,000.\n\n- Web Agent\n\n\nShow your customers you care by having an AI agent reachable direct from your web page\n\n- Email, SMS, Whatsapp notifications\n\n\nKeep connected with your agents however you wish.\n\n- Outbound Calls\n\n\nHave the agents carry out task specific outbound calls. Like appointment reminders, information retrieval, prompts for action, etc.\n\n- Phone Call Recordings & Transcriptions\n\n\nAll calls are recorded and transcriptions provided. Providing full transparency and documentation on interactions.\n\n- Question & Answering\n\n\nProvide information to your customers through voice.\n\n\n- [Ge

In [None]:
def agent_cx(system_prompt, user_prompt, conversation_history):
    messages = [
        {"role": "system", "content": system_prompt},
        {"role": "function", "name": "query_knowledge_base", "content": "results"}
    ]
    
    messages.extend(conversation_history["user_history"])
    messages.extend(conversation_history["assistant_history"])
    messages.extend(conversation_history["function_history"])
    messages.append({"role": 'user', "content": user_prompt})

    response = openai_client.chat.completions.create(
        model="gpt-4o",
        messages=messages,
        tools=[query_knowledge_base],
        tool_choice="auto",
        stream=True
    )

    full_response = ""
    tool_calls = []
    for chunk in response:
        delta = chunk.choices[0].delta
        if delta.content:
            yield delta.content
            full_response += delta.content
        if delta.tool_calls:
            tool_calls.extend(delta.tool_calls)

    conversation_history["user_history"].append({"role": 'user', "content": user_prompt})
    conversation_history["assistant_history"].append({"role": 'assistant', "content": full_response})

    # Check for tool calls after the stream is complete
    if tool_calls:
        for tool_call in tool_calls:
            if tool_call.function.name == "query_knowledge_base":
                print(colored(f"\nTool call detected: {tool_call.function.name}", "light_green"))
                try:
                    print('tool_call.function:', tool_call.function)
                    args = json.loads(tool_call.function.arguments) if tool_call.function.arguments else {}
                    print('args:', args)
                    query = args.get('query', user_prompt)
                    print('query:', query)
                    result = weviate_query(query)
                    #print(colored(f"Function result: {result}", "light_green"))
                    conversation_history["function_history"].append({
                        "role": "function",
                        "name": tool_call.function.name,
                        "content": str(result)
                    })
                    # Process the result here instead of calling agent_cx again
                    yield "\n\nBased on the retrieved information:\n"
                    for chunk in generate_response_after_function_call(conversation_history, system_prompt, result):
                        yield chunk
                        full_response += chunk
                except json.JSONDecodeError:
                    print(colored(f"Error decoding function arguments: {tool_call.function.arguments}", "light_green"))
                except Exception as e:
                    print(colored(f"Error processing function call: {str(e)}", "light_green"))

    conversation_history["assistant_history"].append({"role": 'assistant', "content": full_response})
    return full_response