In [None]:
from pinecone import Pinecone, ServerlessSpec
from dotenv import load_dotenv
import os
import time

load_dotenv()

def initialize_pinecone():
    """Initialize Pinecone client"""
    return Pinecone(api_key=os.getenv("PINECONE_API_KEY"))


def get_or_create_index(pc, index_name="quickstart", dimension=1024):
    """Get existing index or create if it doesn't exist"""
    # Check if index already exists
    existing_indexes = [index.name for index in pc.list_indexes()]
    
    if index_name not in existing_indexes:
        print(f"Creating new index: {index_name}")
        pc.create_index(
            name=index_name,
            dimension=dimension,
            metric="cosine",
            spec=ServerlessSpec(
                cloud="aws",
                region="us-east-1"
            )
        )
        
        # Wait for the index to be ready
        while not pc.describe_index(index_name).status['ready']:
            time.sleep(1)
    else:
        print(f"Using existing index: {index_name}")
    
    return pc.Index(index_name)


def store_embeddings(data, index, namespace="ns1", model="multilingual-e5-large", batch_size=100):
    """Create embeddings and store them in Pinecone with batching"""
    pc = initialize_pinecone()
    
    # Process in batches
    for i in range(0, len(data), batch_size):
        batch = data[i:i + batch_size]
        
        # Generate embeddings for batch
        embeddings = pc.inference.embed(
            model=model,
            inputs=[d['text'] for d in batch],
            parameters={"input_type": "passage", "truncate": "END"}
        )
        
        # Prepare vectors
        vectors = [
            {
                "id": d['id'],
                "values": e['values'],
                "metadata": {'text': d['text']}
            }
            for d, e in zip(batch, embeddings)
        ]
        
        # Upload batch to Pinecone
        index.upsert(vectors=vectors, namespace=namespace)
        
        print(f"Processed batch {i//batch_size + 1}")
    
    return index.describe_index_stats()


def query_embeddings(query_text, index, namespace="ns1", top_k=10, model="multilingual-e5-large"):
    """Query the Pinecone index"""
    pc = initialize_pinecone()
    
    # Generate embedding for query
    embedding = pc.inference.embed(
        model=model,
        inputs=[query_text],
        parameters={"input_type": "query"}
    )
    
    # Query the index
    results = index.query(
        namespace=namespace,
        vector=embedding[0].values,
        top_k=top_k,
        include_values=False,
        include_metadata=True
    )
    
    return results


In [None]:

# Initialize Pinecone (do this once)
pc = initialize_pinecone()

# Get or create index (do this once per application startup)
index = get_or_create_index(pc, "web_data", dimension=1024)

# You can now use this index multiple times for different operations

# Example: Adding new data
new_data = [
    {"id": "doc1", "text": "Some new document..."},
    {"id": "doc2", "text": "Another document..."}
]
store_embeddings(new_data, index)

# Example: Querying
results = query_embeddings("your query", index)

In [4]:
from services.db.supabase_services import supabase_client


In [7]:
supabase = supabase_client()

results = supabase.table("user_web_data").select("*").eq("root_url", "https://piqnic.com").execute()
results.data

In [11]:
# First get the data as you did
supabase = supabase_client()
results = supabase.table("user_web_data").select("*").eq("root_url", "https://piqnic.com").execute()

# Define the user_id you want to add
new_user_id = "user_2mmXezcGmjZCf88gT2v2waCBsXv"  # Replace with the actual user_id you want to set

# Update all matching rows
for row in results.data:
    supabase.table("user_web_data")\
           .update({"user_id": new_user_id})\
           .eq("id", row["id"])\
           .execute()

In [21]:
from services.chat.chat import similarity_search

results = await similarity_search(query = "What services do you offer?", data_source = {"web": ["https://piqnic.com/"]}, user_id =  "user_2mmXezcGmjZCf88gT2v2waCBsXv")
results



similarity_search...


 data_source: {'web': ['https://piqnic.com/']}


Error querying table user_text_files: 'text_files'


Length of results: 0



 all_results: []


[]