# __ INIT __

In [2]:
from services.db.supabase_services import supabase_client
supabase = supabase_client()

from app.core.config import settings
import requests
import json
from typing import Dict, Annotated, Optional, List, Union
import os 
from dotenv import load_dotenv
load_dotenv()

from services.cache import get_agent_metadata, get_all_agents

from services.chat.chat import similarity_search, get_embedding

## In_mem kb

In [35]:
import faiss
import numpy as np

# Initialize an in-memory index
dimension = 1024  
index = faiss.IndexFlatL2(dimension)

# Add vectors
vectors = np.random.random((1000, dimension)).astype('float32')
index.add(vectors)

# Perform similarity search
query = np.random.random((1, dimension)).astype('float32')
k = 5  # Number of nearest neighbors
distances, indices = index.search(query, k)

In [40]:
async def get_user_kb(user_id: str) :
    try:
        # Fetch chunks for the user
        chunks_response = supabase.table('chunks') \
            .select('*') \
            .eq('user_id', user_id) \
            .execute()

        # Fetch web data for the user
        web_data_response = supabase.table('user_web_data') \
            .select('*') \
            .eq('user_id', user_id) \
            .execute()

        return {
            'chunks': chunks_response.data,
            'web_data': web_data_response.data
        }
    except Exception as e:
        print(f"Error fetching user data: {str(e)}")
        return None
    
user_kb: Union[Dict, None] = await get_user_kb("user_2mmXezcGmjZCf88gT2v2waCBsXv")

async def filter_agent_kb(data: Union[Dict, None], 
                        data_source: Dict[str, List[Union[str, int]]]):
    data_source: Dict = json.loads(data_source)
    data_source: Dict = {
        "web": [item['title'] for item in data_source if item['data_type'] == 'web'],
        "text_files": [item['id'] for item in data_source if item['data_type'] != 'web']
    }     

    if not data:
        return {'web_data': [], 'chunks': []}
    
    return {
        'web_data': [item for item in data.get('web_data', []) 
                    if item.get('root_url') in data_source.get('web', [])],
        'chunks': [item for item in data.get('chunks', []) 
                if item.get('parent_id') in data_source.get('text_files', [])]}


async def similarity_search_db(data_source: str, query: str):
    if data_source != "all":
        data_source: Dict = json.loads(data_source)
        data_source: Dict = {
            "web": [item['title'] for item in data_source if item['data_type'] == 'web'],
            "text_files": [item['id'] for item in data_source if item['data_type'] != 'web']
        }
        results = await similarity_search(query, data_source=data_source, user_id="user_2mmXezcGmjZCf88gT2v2waCBsXv")
        
        print("supabase results:", results)
    elif data_source == "all":
        data_source = {"web": ["all"], "text_files": ["all"]}
        results = await similarity_search(query, data_source=data_source, user_id="user_2mmXezcGmjZCf88gT2v2waCBsXv")
        
        print("supabase results:", results)
        return results
    

query = "what is the cost of an oil change"


In [None]:
agent_metadata: Dict = await get_agent_metadata("aaf5fce2-c925-4a32-aefc-e4af35d4b8e1")
data_source: str = agent_metadata.get('dataSource', None)

agent_kb_data = await filter_agent_kb(data=user_kb, data_source=data_source)

agent_kb_data


## LLM TEXT-TEXT

In [None]:
from livekit.agents import llm
from livekit.plugins import openai  # or anthropic

async def chat_with_llm():
    # Initialize the LLM
    llm_instance = openai.LLM()  # or anthropic.LLM()
    
    # Create initial chat context
    chat_ctx = llm.ChatContext()
    
    # Add system message if desired
    chat_ctx.append(
        role="system",
        text="You are a helpful assistant."
    )
    
    # Add user message
    chat_ctx.append(
        role="user", 
        text="Hello, how are you?"
    )
    
    # Get response stream
    response_stream = llm_instance.chat(chat_ctx=chat_ctx)
    
    # Collect the response
    full_response = ""
    async for chunk in response_stream:
        # Each chunk.choices[0].delta.content contains a text fragment
        if chunk.choices[0].delta.content:
            text_fragment = chunk.choices[0].delta.content
            full_response += text_fragment
            print(text_fragment, end="", flush=True)  # For real-time output
    
    #print("\nFull response:", full_response)

await chat_with_llm()
# Run with:
# asyncio.run(chat_with_llm())

In [10]:
from livekit.agents import llm
from livekit.plugins import openai
from livekit.agents.llm import USE_DOCSTRING
from services.cache import get_agent_metadata

from services.chat.chat import similarity_search

@llm.ai_callable(
    name="search_products_and_services",
    description="Search the documentation for technical related questions",
    auto_retry=True
)
async def search_products_and_services(
    query: Annotated[
        str,
        llm.TypeInfo(
            description="The search query containing keywords about products or services"
        )
    ],
    category: Annotated[
        str, 
        llm.TypeInfo(
            description="The category to search in: 'products', 'services', or 'both'"
        )
    ] = "both", 
) -> str:
    """
    Performs a semantic search in the database for products and services based on the user's query.
    Returns formatted information about matching products/services.
    """
    print("\n\n\n\n FUNCTION CALL: search_products_and_services")

    try:

        data_source = await get_agent_metadata("aaf5fce2-c925-4a32-aefc-e4af35d4b8e1")
        data_source: str = data_source.get('dataSource', None)
        if data_source != "all":
            data_source: Dict = json.loads(data_source)
            data_source: Dict = {
                "web": [item['title'] for item in data_source if item['data_type'] == 'web'],
                "text_files": [item['id'] for item in data_source if item['data_type'] != 'web']
            }
            results = await similarity_search(query, data_source=data_source, user_id="user_2mmXezcGmjZCf88gT2v2waCBsXv")
        elif data_source == "all":
            data_source = {"web": ["all"], "text_files": ["all"]}
            results = await similarity_search(query, data_source=data_source, user_id="user_2mmXezcGmjZCf88gT2v2waCBsXv")

        print("\n\n\n\n RESULTS: ", results)
        rag_prompt = f"""
        ## User Query: {query}
        ## Found matching products/services: {results}
        """
        chat_ctx = llm.ChatContext()
        chat_ctx.append(
                role="user",
                text=rag_prompt
            )

        return await get_llm_response(chat_ctx)
        
    except Exception as e:
        return "Sorry, I encountered an error while searching for products and services."



async def get_llm_response(chat_ctx: llm.ChatContext, fnc_ctx: llm.FunctionContext = None):
    print("\n\n\n\n GET LLM RESPONSE")
    # Get response stream with function context
    if fnc_ctx: 
        llm_instance = openai.LLM()
        response_stream = llm_instance.chat(
            chat_ctx=chat_ctx,
            fnc_ctx=fnc_ctx
                    )
    else:
        llm_instance = openai.LLM()
        response_stream = llm_instance.chat(
            chat_ctx=chat_ctx
        )


    async for chunk in response_stream:
        if chunk.choices[0].delta.content:
            print(chunk.choices[0].delta.content, end="", flush=True)
        elif chunk.choices[0].delta.tool_calls:
            # Handle function calls
            for tool_call in chunk.choices[0].delta.tool_calls:
                print(f"\nFunction called: {tool_call.function_info.name}")
                print(f"Arguments: {tool_call.arguments}")
                
                # Execute the function
                called_function = tool_call.execute()
                result = await called_function.task
                print(f"Function result: {result}")

async def test_llm_function_calls():
    
    # Create function context
    fnc_ctx = llm.FunctionContext()
    fnc_ctx._register_ai_function(search_products_and_services)
    
    # Create chat context
    chat_ctx = llm.ChatContext()
    chat_ctx.append(
        role="user",
        text="Compare calendar api with the schedule api, how do they differ in their functionality?"
    )
    
    await get_llm_response(chat_ctx, fnc_ctx)

await test_llm_function_calls()
# Run with:
# asyncio.run(test_llm_function_calls())





 GET LLM RESPONSE

Function called: search_products_and_services
Arguments: {'query': 'calendar api', 'category': 'services'}




 FUNCTION CALL: search_products_and_services


similarity_search...




 RESULTS:  [{'id': 386, 'url': 'https://developer.nylas.com/docs/v3/calendar/using-the-events-api', 'header': '## Title: Using the Events API ## Description: Use the v3 Nylas Events API to access and work with calendar and event data.', 'content': '4804,\\\n    "creator": {\\\n      "email": "anna.molly@example.com",\\\n      "name": ""\\\n    },\\\n    "description": null,\\\n    "grant_id": "1e3288f6-124e-405d-a13a-635a2ee54eb2",\\\n    "hide_participants": false,\\\n    "html_link": "https://www.google.com/calendar/event?eid=NmE0dXIwabQAAAA",\\\n    "ical_uid": "6aaaaaaame8kpgcid6hvd0q@google.com",\\\n    "id": "6aaaaaaame8kpgcid6hvd",\\\n    "object": "event",\\\n    "organizer": {\\\n      "email": "anna.molly@example.com",\\\n      "name": ""\\\n    },\\\n    "participants": [\

## Anthropic pdf 

In [None]:
import anthropic
import base64
import httpx
import os 
from dotenv import load_dotenv

load_dotenv()

# First fetch the file
pdf_url = "https://assets.anthropic.com/m/1cd9d098ac3e6467/original/Claude-3-Model-Card-October-Addendum.pdf"
pdf_data = base64.standard_b64encode(httpx.get(pdf_url).content).decode("utf-8")


# Finally send the API request
client = anthropic.Anthropic()
message = client.beta.messages.create(
    model="claude-3-5-sonnet-20241022",
    betas=["pdfs-2024-09-25"],
    max_tokens=1024,
    messages=[
        {
            "role": "user",
            "content": [
                {
                    "type": "document",
                    "source": {
                        "type": "base64",
                        "media_type": "application/pdf",
                        "data": pdf_data
                    }
                },
                {
                    "type": "text",
                    "text": "Which model has the highest human preference win rates across each use-case?"
                }
            ]
        }
    ],
)

print(message)


## DataSource

In [None]:
from services.chat.chat import similarity_search

await similarity_search("pre packaged admin", "all")

In [12]:
from services.cache import get_agent_metadata
import json 
from services.db.supabase_services import supabase_client
supabase = supabase_client()

agents_metadata = await get_agent_metadata("13400af9-0655-46bc-a815-9664910c2abc")
data_source = agents_metadata.get('dataSource', None)
if data_source != "all":
    data_source = json.loads(data_source)
    data_source = {
                "web": [item['title'] for item in data_source if item['data_type'] == 'web'],
                "text_files": [item['id'] for item in data_source if item['data_type'] != 'web']
            }

elif data_source == "all":
    data_source



In [7]:
from services.chat.chat import get_embedding
query_embedding = await get_embedding("jina embedding model")

## Nylas

In [None]:
""" NYLAS WEBHOOK SET UP """

from dotenv import load_dotenv
load_dotenv()

import os
import sys
from nylas import Client
from nylas.models.webhooks import WebhookTriggers

nylas = Client(
  os.environ.get('NYLAS_API_KEY'),
  os.environ.get('NYLAS_API_URI')
)

grant_id = "5ef0555c-25ab-4b4e-b4a1-02fd8ba4d255"
webhook_url = "https://internally-wise-spaniel.eu.ngrok.io/api/v1/nylas/webhook"

email = os.environ.get("EMAIL")

webhook = nylas.webhooks.create(
  request_body={
    "trigger_types": [WebhookTriggers.EVENT_CREATED],
    "webhook_url": webhook_url,
    "description": "My first webhook",
    "notification_email_address": email,
  }
)

print(webhook)   

## NYLAS API

In [6]:
import os 
from dotenv import load_dotenv
from nylas import Client
from typing import List, Dict, Any
import requests

load_dotenv()

NYLAS_API_KEY = os.getenv("NYLAS_API_KEY")
NYLAS_API_URI = os.getenv("NYLAS_API_URI")

# Initialize Nylas client
nylas = Client(
    api_key = NYLAS_API_KEY,
    api_uri = NYLAS_API_URI
)

In [None]:
""" WORKING SEND EMAIL """

grant_id = "5ef0555c-25ab-4b4e-b4a1-02fd8ba4d255"
email = "michael@flowon.ai"

#attachment = utils.file_utils.attach_file_request_builder("Nylas_Logo.png")

message = nylas.messages.send(
  grant_id,
  request_body={
    "to": [{ "name": "Name", "email": email }],
    "reply_to": [{ "name": "Name", "email": email }],
    "subject": "Your Subject Here",
    "body": "Your email body here.",
  }
)

print(message)   

In [None]:
""" get calendar id"""
grant_id = "5ef0555c-25ab-4b4e-b4a1-02fd8ba4d255"

calendar = nylas.calendars.find(
    grant_id,
    "primary"
)

print(calendar)   

In [None]:
""" WORKING GET CALENDAR EVENTS """
calendar_id = "AAkALgAAAAAAHYQDEapmEc2byACqAC-EWg0AT6mu_rvDikK57fYroNKSNAAEWMJ6ZAAA"

def get_calendar_events(grant_id: str, calendar_id: str = "primary", limit: int = 100) -> List[Dict[Any, Any]]:
    """
    Fetch calendar events for a given grant (email) from Nylas API.
    """
    # Use the v3 endpoint format
    url = f"{NYLAS_API_URI}/v3/grants/{grant_id}/events"
    
    headers = {
        "Accept": "application/json",
        "Authorization": f"Bearer {NYLAS_API_KEY}",
        "Content-Type": "application/json"
    }
    
    params = {
        "calendar_id": calendar_id,
        "limit": limit
    }
    
    response = requests.get(url, headers=headers, params=params)
    response.raise_for_status()
    
    return response.json()

# Test the function
get_calendar_events("5ef0555c-25ab-4b4e-b4a1-02fd8ba4d255", calendar_id=calendar_id)


In [None]:
from services.knowledge_base.kb import get_kb_items

response = await get_kb_items("user_2mmXezcGmjZCf88gT2v2waCBsXv")

In [None]:
from itertools import groupby
from operator import itemgetter

def group_by_root_url(items):
    if not isinstance(items, list):
        raise TypeError(f"Expected a list, got {type(items)}")
    
    # Sort items by root_url
    sorted_items = sorted(items, key=itemgetter('root_url'))
    
    # Group items and create consolidated records
    result = []
    for root_url, group in groupby(sorted_items, key=itemgetter('root_url')):
        group_list = list(group)
        
        # Create consolidated record
        consolidated = {
            'title': root_url,  # Using root_url as title
            'root_url': root_url,
            'content': [{  # Group of URLs and their fields
                'url': item.get('url', ''),
                'id': item['id'],
                'token_count': item.get('token_count', 0)
            } for item in group_list],
            'created_at': next(iter(group_list)).get('created_at', ''),  # Take created_at from first item
            'data_type': 'web',
            'user_id': group_list[0].get('user_id')  # Assuming user_id is consistent within group
        }
        result.append(consolidated)
    
    return result

grouped = group_by_root_url(response)
grouped

In [None]:
grouped[0]['content']

In [None]:
from services.chat.chat import similarity_search

await similarity_search("what is the cost of an oil change", table_names=["user_text_files"])