In [1]:
import openai
from termcolor import colored
from dotenv import load_dotenv
import json
import os
import spacy
import requests
from supabase import create_client, Client
from openai import OpenAI
from tiktoken import encoding_for_model

load_dotenv()

True

In [2]:
SUPABASE_URL = os.getenv("SUPABASE_URL")
SUPABASE_SERVICE_ROLE_KEY = os.getenv("SUPABASE_SERVICE_ROLE_KEY")
SUPABASE_ANON_KEY = os.getenv("SUPABASE_ANON_KEY")

supabase: Client = create_client(SUPABASE_URL, SUPABASE_SERVICE_ROLE_KEY)

openai = OpenAI()


# chunking & embedding kb

in post.knowledge_base endpoint. when new item is added. we process that item through the below code.

In [3]:
current_user = "user_2lKpUPRJD4g5IErIdhbO7rBMn3K"
items = supabase.table('knowledge_base').select('*').eq('user_id', current_user).execute()

In [4]:

nlp = spacy.load("en_core_web_md")
data = items.data[1]['content']
doc = nlp(data)
cleaned_text = ' '.join([token.text for token in doc if not token.is_space and not token.is_punct])

def count_tokens(text, model="gpt-4o"):
    encoder = encoding_for_model(model)
    tokens = encoder.encode(text)
    return len(tokens)

def sliding_window_chunking(text, max_window_size=600, overlap=200):
    encoder = encoding_for_model("gpt-4o")  # Use the same model as in count_tokens
    tokens = encoder.encode(text)
    chunks = []
    start = 0
    while start < len(tokens):
        end = start + max_window_size
        chunk_tokens = tokens[start:end]
        chunk = encoder.decode(chunk_tokens)
        chunks.append(chunk)
        start += max_window_size - overlap
    return chunks

def insert_chunk(parent_id, content, chunk_index, embedding):
    print("func insert_chunk...")
    supabase.table('chunks').insert({
        'parent_id': parent_id,
        'content': content,
        'chunk_index': chunk_index,
        'embedding': embedding
    }).execute()

def get_embedding(text):
    response = openai.embeddings.create(
        input=text,
        model="text-embedding-3-small"
    )
    return response.data[0].embedding

def process_item(item_id, content):
    print("func process_item...")
    chunks = sliding_window_chunking(content) 
    for index, chunk in enumerate(chunks):
        embedding = get_embedding(chunk)
        print("index", index)
        print("chunk", chunk)
        print("embedding", embedding)
        insert_chunk(item_id, chunk, index, embedding)
#process_item(item_id=items.data[1]['id'], content=cleaned_text)



In [5]:
def similarity_search(query, table_name, match_threshold=0.2, match_count=10):
    query_embedding = get_embedding(query)
    
    response = supabase.rpc(
        'match_documents',
        {
            'query_embedding': query_embedding,
            'match_threshold': match_threshold,
            'match_count': match_count,
            'table_name': table_name
        }
    ).execute()
    return response.data


In [11]:
user_query = "what features do you offer?"
#user_query = "can flowon handle multiple calls simultaneously?"
table_name = "chunks"

results = similarity_search(user_query,table_name)
docs = [result['content'] for result in results]


### Rerank RAG

In [12]:

def rerank_documents(user_query, top_n, docs):
    url = 'https://api.jina.ai/v1/rerank'
    headers = {
        'Content-Type': 'application/json',
        'Authorization': 'Bearer jina_b716ce28cd1b49bc920e57a5bfb6de061z36vM3vogg6y-_2d5qcoXHe_rdo'
    }
    data = {
        "model": "jina-reranker-v2-base-multilingual",
        "query": user_query,
        "top_n": top_n,
        "documents": docs
    }
    response = requests.post(url, headers=headers, json=data)
    reranked_docs = response.json()['results']
    reranked_docs = [i['document']['text'] for i in reranked_docs]
    return reranked_docs

reranked_docs = rerank_documents(user_query, 3, docs)


In [14]:
from typing import TypedDict, Literal, Dict, Any

query_knowledge_base: Dict[str, Any] = {
                "type": "function",
                "function": {
                    "name": "query_knowledge_base",
                    "description": "Where the User's query is best answered by the knowledge base, call this function",
                    "parameters": {
                        "type": "object",
                        "properties": {
                            "enquiry": {"type": "string", "description": "The enquiry of the user"},
                            "topic": {"type": "string", "description": "any additional context the user has given"},
                        },
                        "required": ["enquiry"]
                    },
    },
}


In [29]:
system_prompt = """
You are a helpful assistant designed to search the company knowledge base, and find relevant information to answer questions from users.
Where a question from the user appears to be specific to, you will use the <context> to augment your response to the user.
"""

conversation_history = {
    "user_history": [],
    "assistant_history": [],
    "function_history": []
}

def llm_response(system_prompt, user_prompt, conversation_history):
    messages = [
        {"role": "system", "content": system_prompt},
        {"role": "user", "content": user_prompt},
    ]

    response = openai.chat.completions.create(
        model="gpt-4o",
        messages=messages,
        stream=True
    )

    full_response = ""
    tool_calls = []

    for chunk in response:
        delta = chunk.choices[0].delta
        if delta.content:
            yield delta.content
            full_response += delta.content
        if delta.tool_calls:
            tool_calls.extend(delta.tool_calls)

    conversation_history["user_history"].append({"role": 'user', "content": user_prompt})
    conversation_history["assistant_history"].append({"role": 'assistant', "content": full_response})
    print("conversation history", conversation_history)
    return full_response

def rag_response(user_query):
    results = similarity_search(user_query,table_name)
    table_name = "chunks"
    results = similarity_search(user_query,table_name)
    docs = [result['content'] for result in results]
    reranked_docs = rerank_documents(user_query, 3, docs)
    return reranked_docs





In [30]:

user_query = "my name is michael, tell me about flowon"

response_received = False
for response_chunk in llm_response(system_prompt, user_prompt, conversation_history):
    response_received = True
    print(response_chunk, end='', flush=True)


Hello Michael! Flowon is a robust AI-driven platform designed to streamline and automate various business processes, particularly in the realm of communication. Here are some key highlights about Flowon:

### Features:
1. **Simultaneous Call Handling**: Flowon can manage multiple calls at once, ensuring no customer call goes unanswered.
2. **Integration with Advanced Technologies**: Utilizes Text to Speech (TTS), Speech to Text (STT), Language Models, and VoIP to deliver exceptional services.
3. **Recorded Calls and Transcriptions**: All calls are recorded and transcriptions are available for full transparency and documentation.
4. **Outbound Calls**: Agents can carry out tasks like appointment reminders, information retrieval, and prompts for action.
5. **Customizable AI Agents**: Tailor the AI agent to communicate according to your preferences and business needs.
6. **Easy Set Up**: Can be set up by instructing the agent in plain English, no technical expertise required.
7. **Always 

In [31]:

user_query = "what is my name?"

table_name = "chunks"
results = similarity_search(user_query,table_name)
docs = [result['content'] for result in results]
reranked_docs = rerank_documents(user_query, 3, docs)

user_prompt = f"""{user_query}
retrieved docs {reranked_docs} """

response_received = False
for response_chunk in llm_response(system_prompt, user_prompt, conversation_history):
    response_received = True
    print(response_chunk, end='', flush=True)

I'm sorry, but I don't have access to personal data and cannot know your name. If you're signed into a company system or account, you might be able to find your name in your profile settings or account information. Is there anything else I can help you with?conversation history {'user_history': [{'role': 'user', 'content': 'my name is michael, tell me about flowon\nretrieved docs [" of our dozen features ✓ https://cdn.unicornplatform.com/static/img/icons/checked--acid.svg Cut out inefficient business practices ✓ https://cdn.unicornplatform.com/static/img/icons/checked--acid.svg Zero commitments FAQ Can I access call transcripts and recordings All call related data is stored and retrievable for your records Can Flowon handle multiple calls simultaneously Yes Flowon is designed to handle multiple calls at the same time ensuring that no customer call goes unanswered What technologies does Flowon use We utilise state of the art technologies including specialised models for Text to Speech T

In [32]:

user_query = "i did mention my name was michael earlier, can you not recall?"

table_name = "chunks"
results = similarity_search(user_query,table_name)
docs = [result['content'] for result in results]
reranked_docs = rerank_documents(user_query, 3, docs)

user_prompt = f"""{user_query}
retrieved docs {reranked_docs} """

response_received = False
for response_chunk in generate_response(system_prompt, user_prompt, conversation_history):
    response_received = True
    print(response_chunk, end='', flush=True)

I'm sorry, but I don't have the capability to recall previous interactions or remember user names due to privacy and security policies. However, I'm here to help you with any questions or needs you have right now. How can I assist you today, Michael?conversation history {'user_history': [{'role': 'user', 'content': 'my name is michael, tell me about flowon\nretrieved docs [" of our dozen features ✓ https://cdn.unicornplatform.com/static/img/icons/checked--acid.svg Cut out inefficient business practices ✓ https://cdn.unicornplatform.com/static/img/icons/checked--acid.svg Zero commitments FAQ Can I access call transcripts and recordings All call related data is stored and retrievable for your records Can Flowon handle multiple calls simultaneously Yes Flowon is designed to handle multiple calls at the same time ensuring that no customer call goes unanswered What technologies does Flowon use We utilise state of the art technologies including specialised models for Text to Speech TTS Speec

In [None]:
from openai import OpenAI

# Set up logging
# logging.basicConfig(level=logging.DEBUG)
# logger = logging.getLogger(__name__)

openai = OpenAI()

conversation_history = {
    "user_history": [],
    "assistant_history": [],
    "function_history": []
}

def agent_cx(system_prompt, user_prompt, conversation_history):
    messages = [
        {"role": "system", "content": system_prompt},
        {"role": "function", "name": "query_knowledge_base", "content": "results"}
    ]
    
    messages.extend(conversation_history["user_history"])
    messages.extend(conversation_history["assistant_history"])
    messages.extend(conversation_history["function_history"])
    messages.append({"role": 'user', "content": user_prompt})


    try:
        response = openai.chat.completions.create(
            model="gpt-4",  # Changed from "gpt-4o" to "gpt-4"
            messages=messages,
            tools=[query_knowledge_base],
            tool_choice="auto",
            stream=True
        )

        full_response = ""
        tool_calls = []
        for chunk in response:
            delta = chunk.choices[0].delta
            if delta.content:
                yield delta.content
                full_response += delta.content
            if delta.tool_calls:
                tool_calls.extend(delta.tool_calls)

        conversation_history["user_history"].append({"role": 'user', "content": user_prompt})
        conversation_history["assistant_history"].append({"role": 'assistant', "content": full_response})

        return full_response
    except Exception as e:
        return "An error occurred while processing your request."


user_prompt = user_query


# To consume the generator and print the output
response_received = False
for response_chunk in agent_cx(system_prompt, user_prompt, conversation_history):
    response_received = True
    print(response_chunk, end='', flush=True)


if not response_received:
    print("No response was received from the LLM. This could be due to an API error or an issue with the model.")


In [None]:
    # # Check for tool calls after the stream is complete
    # if tool_calls:
    #     for tool_call in tool_calls:
    #         if tool_call.function.name == "query_knowledge_base":
    #             print(colored(f"\nTool call detected: {tool_call.function.name}", "light_green"))
    #             try:
    #                 print('tool_call.function:', tool_call.function)
    #                 args = json.loads(tool_call.function.arguments) if tool_call.function.arguments else {}
    #                 print('args:', args)
    #                 query = args.get('query', user_prompt)
    #                 print('query:', query)
    #                 result = similarity_search(query)
    #                 #print(colored(f"Function result: {result}", "light_green"))
    #                 conversation_history["function_history"].append({
    #                     "role": "function",
    #                     "name": tool_call.function.name,
    #                     "content": str(result)
    #                 })
    #                 # Process the result here instead of calling agent_cx again
    #                 yield "\n\nBased on the retrieved information:\n"
    #                 for chunk in generate_response_after_function_call(conversation_history, system_prompt, result):
    #                     yield chunk
    #                     full_response += chunk
    #             except json.JSONDecodeError:
    #                 print(colored(f"Error decoding function arguments: {tool_call.function.arguments}", "light_green"))
    #             except Exception as e:
    #                 print(colored(f"Error processing function call: {str(e)}", "light_green"))

In [None]:
# response = co.rerank(
#     model="rerank-english-v3.0",
#     query=user_query,
#     documents=docs,
#     top_n=5,
# )

# response.results