In [1]:
#| default_exp import_to_pinecone

%load_ext autoreload
%autoreload 2

In [2]:
#| export

import yaml
from typing import Dict
from pinecone.grpc import PineconeGRPC as Pinecone
from pinecone import ServerlessSpec
from tqdm.auto import tqdm
import binascii
from ibmcloudant.cloudant_v1 import CloudantV1
from ibm_cloud_sdk_core.authenticators import BasicAuthenticator
import nltk
import os
from tqdm.auto import tqdm
import sys
from transformers import AutoTokenizer, AutoModel
import torch.nn.functional as F
import torch
import numpy as np
import argparse
import time 
from dotenv import load_dotenv

load_dotenv("../.env")
project_root = os.path.abspath(os.path.join(os.getcwd(), '..'))
sys.path.append(project_root)

from tools.optimal_embeddings_model.mailio_ai_libs.collect_emails import list_emails
from tools.optimal_embeddings_model.data_types.email import Email, MessageType
from api.services.embedding_service import EmbeddingService

  from .autonotebook import tqdm as notebook_tqdm
[nltk_data] Downloading package punkt to /Users/igor/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package punkt_tab to /Users/igor/nltk_data...
[nltk_data]   Package punkt_tab is already up-to-date!


In [3]:
#| export

def load_config(path:str) -> Dict:
    with open(path, 'r') as f:
        config = yaml.safe_load(f)
    return config

In [4]:
cfg = load_config('../config.yaml')

In [5]:
#| export

def connect_pinecone(cfg:Dict) -> Pinecone:
    pinecone_cfg = cfg.get("pinecone")
    api_key = os.getenv("PINECONE_API_KEY")
    pc = Pinecone(api_key=api_key)
    spec = ServerlessSpec(cloud=pinecone_cfg.get("cloud"), region=pinecone_cfg.get("region"))
    index = pc.Index(host=pinecone_cfg.get("index_name"))
    return index

In [6]:
#| export

def get_db_name(address:str) -> str:
    return "userdb-" + binascii.hexlify(address.encode()).decode() 

def connect_couchdb(cfg:Dict) -> CloudantV1:
    couch_cfg = cfg.get("couchdb")
    password = os.getenv("COUCHDB_PASSWORD")
    auth = BasicAuthenticator(couch_cfg.get("username"), password)
    client = CloudantV1(authenticator=auth)
    client.set_service_url(couch_cfg.get("host"))
    client.set_disable_ssl_verification(True)
    return client

In [7]:
#| export

# load transformers model
def load_embedding_service(cfg: Dict) -> EmbeddingService:
    embedding_service = EmbeddingService(cfg)
    return embedding_service

In [8]:
#| export

# default folders for import
DEFAULT_FOLDERS = ["inbox", "goodreads", "archive", "sent"]

def save_progress(address: str, folder: str, bookmark: str, processed_count: int):
    """Save progress to a checkpoint file"""
    progress_file = f"progress_{address}.json"
    import json
    
    try:
        # Load existing progress
        try:
            with open(progress_file, 'r') as f:
                progress = json.load(f)
        except FileNotFoundError:
            progress = {}
        
        # Update progress
        progress[folder] = {
            'bookmark': bookmark,
            'processed_count': processed_count,
            'timestamp': time.time()
        }
        
        # Save progress
        with open(progress_file, 'w') as f:
            json.dump(progress, f, indent=2)
            
    except Exception as e:
        print(f"Warning: Could not save progress: {e}")

def load_progress(address: str):
    """Load progress from checkpoint file"""
    progress_file = f"progress_{address}.json"
    import json
    
    try:
        with open(progress_file, 'r') as f:
            return json.load(f)
    except FileNotFoundError:
        return {}
    except Exception as e:
        print(f"Warning: Could not load progress: {e}")
        return {}


def import_to_pinecode(client, index, embedding_service:EmbeddingService, user_db: str, address:str, folders: str, batch_size:int = 500, resume: bool = True):
    """
    Import emails from couchdb to pinecone index
    Args:
        client: CloudantV1 client
        embedding_service: EmbeddingService object
        user_db: user db name
        folders: list of folders to import
        batch_size: batch size for import
        resume: whether to resume from checkpoint
    Results:
        None
    """
    processed = 0
    all_vectors = []
    
    # Load progress if resuming
    progress = {}
    if resume:
        progress = load_progress(address)
        if progress:
            print(f"Found existing progress for {address}: {progress}")
        else:
            print(f"No existing progress found for {address}, starting fresh")

    for folder in folders:
        # Check if we have progress for this folder
        folder_progress = progress.get(folder, {})
        bookmark = folder_progress.get('bookmark', "")
        folder_processed = folder_progress.get('processed_count', 0)
        
        if bookmark:
            print(f"Resuming {folder} from bookmark: {bookmark[:20]}... (already processed {folder_processed} emails)")
        else:
            print(f"Starting fresh import for {folder}")
        
        while True:
            for emails, new_bookmark in list_emails(client, user_db, folder, bookmark=bookmark, limit=batch_size):
                if len(emails) == 0:
                    bookmark = None
                    break

                # prepare data for import
                vectors = []

                for e in tqdm(emails, desc=f"Importing {folder}", unit="email"):
                    metadata = {
                        "created": e.created,
                        "from": e.sender_email or "",
                        "from_name": e.sender_name or "",
                        "folder": e.folder or "",
                    }
                    
                    embedding = embedding_service.create_embedding(e)

                    vector = {
                        "id": e.message_id,
                        "values": embedding.tolist(),
                        "metadata": metadata,
                    }
                    vectors.append(vector)
                    processed += 1
                
                # upsert to pinecone with retry logic
                max_retries = 3
                retry_delay = 1  # seconds
                
                for attempt in range(max_retries):
                    try:
                        index.upsert(vectors=vectors, namespace=address)
                        print(f"Upserted {len(vectors)} vectors to pinecone")
                        break
                    except Exception as e:
                        if attempt < max_retries - 1:
                            print(f"Upsert attempt {attempt + 1} failed: {e}. Retrying in {retry_delay} seconds...")
                            time.sleep(retry_delay)
                            retry_delay *= 2  # exponential backoff
                        else:
                            print(f"Upsert failed after {max_retries} attempts: {e}")
                            # Save progress before failing
                            save_progress(address, folder, bookmark, processed)
                            raise e

                # Save progress after successful batch
                save_progress(address, folder, new_bookmark, processed)
                bookmark = new_bookmark
                
            if not bookmark:
                break 

    # Clear progress file when complete
    print(f"Processed {processed} emails")
    return all_vectors


In [17]:
#| export

import pickle

def main(address:str, resume: bool = True):
    cfg = load_config('../config.yaml')
    client = connect_couchdb(cfg)
    index = connect_pinecone(cfg)
    embedding_service = load_embedding_service(cfg)
    user_db = get_db_name(address)
    import_to_pinecode(client, index, embedding_service, user_db, address, DEFAULT_FOLDERS, batch_size=500, resume=resume)
    


In [13]:
# testing just a few samples: 
# address = "0x139d1fe7306dd2b22c95c8e8343e5163fcc8aa09"
address = "0x71dd3b23ced7a30f95e4d1dd58f49df9c8ba007b"
cfg = load_config('../config.yaml')
client = connect_couchdb(cfg)
index = connect_pinecone(cfg)
embedding_service = load_embedding_service(cfg)
user_db = get_db_name(address)

folder="inbox"
bookmark=None
batch_size=500

embeddings = []
texts = []
for emails, new_bookmark in list_emails(client, user_db, folder, bookmark=bookmark, limit=batch_size):
    print(emails)
    if len(emails) == 0:
        continue
    batch_texts = []
    for e in tqdm(emails, desc=f"Importing {folder}", unit="email"):
        text = embedding_service.create_passage_text(e)
        texts.append(text)
        batch_texts.append(text)
    embeddings = embedding_service.embedder.batch_embed(batch_texts)



EmbeddingService using device: cpu




[Email(folder='inbox', message_type=<MessageType.HTML: 'html'>, sender_name='Igor Rendulic', sender_email='igor.amplio@gmail.com', message_id='<CADUk_sUzcye9TKWD7cnaNGkV-rRkMpiaE==31ozPU_jZAzFyOQ@mail.gmail.com>', created=1761855283437, subject='check if legacy addr', sentences=['Does it affect it?']), Email(folder='inbox', message_type=<MessageType.HTML: 'html'>, sender_name='Igor Rendulic', sender_email='igor.amplio@gmail.com', message_id='<CADUk_sWv6shXG86OOkWCTgKzgEUKToOV Gq1g22bZo8wkSDPtQ@mail.gmail.com>', created=1761256490214, subject='Re: i forgot to save it (previous email)', sentences=['a..here it is.', 'I see it nowOn Thu, Oct 23, 2025 at 3:54 PM Kaia <> wrote:How about now?', 'Sent with']), Email(folder='inbox', message_type=<MessageType.HTML: 'html'>, sender_name='Igor Rendulic', sender_email='igor.amplio@gmail.com', message_id='<CADUk_sXAcMjqve1fC_OA-TxaT0BF F_ObsQQvm5i 9SfBBhsbA@mail.gmail.com>', created=1761256324419, subject='My darling', sentences=['Hey love..how are 

Importing inbox: 100%|██████████| 3/3 [00:00<00:00, 28793.85email/s]


In [14]:
print(f"len(embeddings), len(texts): {len(embeddings)}, {len(texts)}")

query = "query: Digitalocean billing"
query_emb = embedding_service.embedder.embed(query)
print(query_emb.tolist())
output  = embeddings @ query_emb.T
sorted_indices = np.argsort(-output.squeeze())  # Negate to sort descending
sorted_scores = output.squeeze()[sorted_indices]

top_k = 10

indices = sorted_indices[:top_k].tolist()
output_texts = [texts[i] for i in indices]
for t in output_texts:
    print(t)

# print(output_texts)


len(embeddings), len(texts): 3, 3
[-0.10266619920730591, 0.043341971933841705, 0.02312132902443409, 0.011813398450613022, -0.029836324974894524, -0.029904214665293694, 0.03643130511045456, -0.0701022818684578, 0.017106901854276657, 0.07615597546100616, 0.028293205425143242, -0.02625429630279541, -0.02846384234726429, 0.038880348205566406, 0.03512177616357803, -0.03980278968811035, -0.030840208753943443, 0.05414986237883568, -0.008380327373743057, 0.06767440587282181, 0.08610551804304123, -0.042257554829120636, 0.01105162501335144, -0.058722902089357376, 0.032751500606536865, 0.02109619230031967, 0.07149751484394073, 0.03325236588716507, -0.08340124785900116, -0.1558544784784317, 0.0058023156598210335, 0.03601308912038803, 0.03427319973707199, -0.055207911878824234, 0.08053958415985107, -0.04950195923447609, -0.013273612596094608, 0.014152169227600098, 0.01087226253002882, 0.03176058083772659, -0.052371107041835785, -0.10578788816928864, -0.0342719741165638, -0.06178238242864609, -0.024

In [15]:
address

'0x71dd3b23ced7a30f95e4d1dd58f49df9c8ba007b'

In [18]:
main(address=address, resume=False)

EmbeddingService using device: cpu
Starting fresh import for inbox


Importing inbox: 100%|██████████| 3/3 [00:00<00:00, 26.16email/s]


Upserted 3 vectors to pinecone
Starting fresh import for goodreads
Starting fresh import for archive




Starting fresh import for sent


Importing sent: 100%|██████████| 5/5 [00:00<00:00, 83.15email/s]


Upserted 5 vectors to pinecone
Processed 8 emails


In [None]:
#| export

if __name__ == "__main__":
    parser = argparse.ArgumentParser(description="Run the main function with an address argument.")
    parser.add_argument("address", type=str, help="The address to process")
    parser.add_argument("--no-resume", action="store_true", help="Don't resume from checkpoint, start fresh")
    args = parser.parse_args()
    address = args.address
    if not address:
        print("Please provide an address")
        sys.exit(1)
    resume = not args.no_resume
    main(address, resume=resume)

## Checkpoint/Resume Usage

The import process now supports checkpointing and resuming:

### How it works:
1. **Progress tracking**: Saves progress to `progress_{address}.json` after each successful batch
2. **Automatic resume**: By default, the process will resume from the last checkpoint
3. **Manual control**: Use `--no-resume` flag to start fresh

### Usage examples:

```python
# Resume from checkpoint (default behavior)
main("0x139d1fe7306dd2b22c95c8e8343e5163fcc8aa09")

# Start fresh (ignore existing checkpoints)
main("0x139d1fe7306dd2b22c95c8e8343e5163fcc8aa09", resume=False)

# Command line usage
# python import_to_pinecone.py 0x139d1fe7306dd2b22c95c8e8343e5163fcc8aa09
# python import_to_pinecone.py 0x139d1fe7306dd2b22c95c8e8343e5163fcc8aa09 --no-resume
```

### What gets saved:
- Current bookmark for each folder
- Total number of processed emails
- Timestamp of last update

### What happens on failure:
- Progress is saved before the process fails
- Next run will resume from the last successful batch
- No duplicate processing of emails


In [None]:
query_embedding = [-0.114980519, 0.0352215357, -0.00416095927, 0.0304526258, -0.0307287332, -0.0450988, 0.00264325342, -0.0236028023, 0.00121400377, 0.0592125617, 0.0408993028, -0.0460021868, -0.0613273494, 0.0195900369, 0.026029909, -0.0581343807, -0.050411, 0.0841189921, -0.070286952, 0.052067887, 0.0887702256, -0.0741174221, -0.00412515691, -0.0705656335, -0.0174612589, 0.0146158719, 0.0470724851, 0.05750487, -0.0772210956, -0.152493238, -0.00301737338, 0.0534291044, 0.0637061223, -0.043149136, 0.0933505297, -0.0366310365, -0.0338065214, 0.0147236232, 0.0333913118, 0.0404779725, -0.0319644436, -0.0434156545, -0.060448572, -0.0451199599, 0.0250000693, -0.0418123826, -0.0543156825, -0.040147908, 0.106355153, 0.0534398668, -0.0134380246, -0.0284177773, 0.0514313355, 0.0699627548, 0.026482923, 0.00121419714, 0.0552529879, 0.0230911486, 0.0453069136, 0.0481971391, 0.00644992, 0.0474982, -0.139956176, 0.120583072, 0.0527250953, 0.0920036957, -0.0674039051, 0.0236687679, -0.0586087592, -0.00951450597, -0.0415765308, -0.00664569251, -0.0275016055, -0.00743810367, -0.0403231941, 0.104847685, 0.0533728525, 0.0415390022, -0.0688180774, -0.0432218798, 0.034161415, 0.00487185875, -0.0246647391, -0.00559815113, -0.0066167172, -0.0430837907, 0.0420652889, 0.0145888077, -0.00589514198, -0.0524347685, -0.0158092882, -0.0237218067, 0.0194874145, -0.0881199539, -0.0335740522, 0.00749573158, 0.0426599495, 0.0156665221, -0.0499160513, 0.130260468, 0.0255488977, -0.00419348, -0.0567952208, -0.0217570793, -0.0328402333, -0.0578974672, 0.0583611242, 0.0186041314, 0.0964489654, -0.0214160383, -0.00758902589, -0.0356858149, 0.00500926701, -0.0420710966, 0.0464245602, 0.0445389785, -0.0325320698, -0.0944698304, -0.0169659983, -0.00137118041, 0.0183368977, 0.0488609076, -0.0401366, -0.0835121349, 0.0327090099, -0.0368107259, 0.0523422919, 0.159174457, 0.0343137272, 0.0117963515, 0.0531069674, -0.0323558077, -0.0164605342, 0.0173060484, -0.0665727109, -0.0191536099, -0.0669868886, 0.0283415224, 0.0466173626, 0.0315630808, -0.0463822633, 0.00849553291, -0.04635242, -0.0683984905, -0.00419130921, 0.100941479, -0.0152249131, 0.018213775, -0.0430982709, -0.0779110566, -0.0310253017, -0.0225429833, 0.0660697594, -0.00950285, 0.00453796843, 0.0681438595, 0.00435244432, 0.048565302, -0.0762950331, 0.00143286, -0.0314196572, -0.0338907354, 0.0307576098, 0.0981449, 0.045947846, -0.0992823243, 0.0387779512, 0.0210393146, 0.0304566231, -0.0307758581, -0.0290593375, 0.0412245914, 0.0335935503, -0.0375075713, 0.13695918, -0.0414935462, 0.00625524623, -0.0328760259, 0.0149280233, -0.032054726, 0.0263614375, -0.0436943658, -0.0323341228, 0.0655687, 0.101477839, -0.0176253747, -0.0491968878, -0.0677504241, -0.0126563078, -0.07069996, -0.0924021155, -0.0189639106, -0.0524488948, 0.0504996814, 0.0540605038, 0.0468781926, 0.010754982, -0.0411120392, -0.0313039236, -0.0171229187, 0.0764488205, 0.038904734, -0.0558108, 0.0451041609, -0.0169073772, 0.0484358445, 0.0572798438, 0.0153836319, -0.0371349826, -0.019461073, -0.0362087637, 0.00278575602, 0.038254045, -0.0386589542, -0.0336365104, -0.0276459344, -0.0212518815, 0.0365699492, 0.0174049158, -0.0181509, 0.0363066085, -0.0408866853, -0.0204802491, -0.139210477, 0.0195738114, -0.0273184106, -0.00471845642, 0.0690008, -0.058181759, -0.0283186249, 0.0795539245, -0.0209769, 0.0335785411, 0.0677514076, -0.0297623444, -0.0515822954, -0.0209714472, -0.00509690028, 0.0438739732, 0.0121853305, -0.0352165662, 0.0524843968, -0.0430522822, -0.0286291074, -0.0942530409, 0.0146802776, -0.0414246209, 0.0398090035, -0.0140750911, 0.143130258, 0.0828927681, 0.0225828961, 0.0305497535, 0.0396125205, 0.0035060579, 0.0497903675, -0.0899640769, 0.0794421, -0.0831965879, 0.00569625339, 0.0231964011, -0.0192761831, -0.0615913644, 0.0539445356, 0.0168562345, 0.0287944488, -0.0436991453, 0.0943232551, -0.0525063612, -0.0719280839, 0.0491161905, -0.00977445766, -0.0208875295, 0.0196316335, -0.0277101845, 0.0101582073, 0.020960819, -0.0482847765, -0.0440185331, -0.0501769558, 0.0846821368, -0.0242176782, 0.0278070942, -0.0641063228, -0.0692254379, 0.0491343141, -0.0162321683, 0.0311665405, 0.0250834059, -0.0260984953, 0.0217571966, -0.055072885, -0.0455337279, 0.00373478886, 0.0525268689, -0.0952654332, 0.0835201815, 0.0133449165, 0.0206183307, 0.0765283182, -0.0108406171, -0.0271634702, -0.0210041814, -0.0167688262, -0.079106994, 0.032726109, 0.0928797275, 0.0227644164, -0.0511889234, -0.0171555616, -0.0126958322, 0.0033845671, -0.0832068101, 0.00548145967, -0.0672202632, 0.0928172767, 0.0475181825, 0.0317454711, -0.0386335365, -0.141020745, -0.000572538935, 0.0149013158, 0.00571364211, 0.0254589152, 0.0376027562, 0.052957695, -0.0126001723, -0.0415098332, -0.0461263359, -0.00353924767, 0.0523932725, 0.0554820523, -0.0307412539, -0.0410940759, 0.0283505358, 0.0557425655, 0.00452426262, -0.0234852619, -0.0850478411, 0.0143084722, 0.0347823389, 0.134456575, -0.0243231859, 0.0144525385, 0.00223842962, 0.0371516161, 0.0221516248, 0.0557851978, 0.0248302538, -0.0453927554, -0.0350644775, 0.12124563, 0.00464856904, 0.0222177412, 0.0522906296, -0.0503000058, 0.00991672557, 0.00188151014, -0.0151643213, 0.0463203192, 0.0236962084, -0.0407559909, 0.00477368571, 0.0771854445, -0.0101109818, -0.0575705692, -0.0355248377, -0.0359492227, -0.0674467757, -0.0584249906, -0.0621696189, 0.0493093766, -0.00541766267, 0.0686385483, -0.0149182752, -0.073050268, 0.00521918153, -0.0245368015, -0.0760151446, -0.00473224046, -0.0295894127, 0.028926298, -0.0573166534, 0.0400361568]
qw = [-0.09499797970056534, 0.029128514230251312, 0.03146640583872795, 0.0009419162524864078, -0.028227737173438072, -0.017507895827293396, 0.02060784213244915, -0.08855848014354706, 0.007154428865760565, 0.08917304873466492, 0.053231656551361084, -0.01346743106842041, 0.007562792394310236, 0.05379572510719299, 0.026146337389945984, -0.03180790692567825, -0.030591817572712898, 0.062293365597724915, -0.03585229814052582, 0.0847092866897583, 0.08690711855888367, -0.02181522734463215, 0.03261467069387436, -0.04273358732461929, 0.03295436128973961, 0.01063675805926323, 0.05754604563117027, 0.011331071145832539, -0.07131034880876541, -0.16606220602989197, 0.004777181893587112, 0.03475815802812576, 0.05232963338494301, -0.04504270851612091, 0.08638252317905426, -0.08231708407402039, -0.010607056319713593, -0.005998334847390652, 0.001080535352230072, 0.03760376200079918, -0.04438306763768196, -0.09223751723766327, -0.027440598234534264, -0.05183999240398407, -0.013430285267531872, -0.03946418687701225, -0.03699682652950287, -0.034534960985183716, 0.0852154791355133, -0.00040820264257490635, -0.008096565492451191, -0.024780089035630226, -0.005601174663752317, 0.06535591930150986, 0.039856668561697006, 0.014539605006575584, 0.03980931267142296, 0.06636885553598404, 0.07103949785232544, 0.04562099650502205, 0.004201697185635567, 0.08688514679670334, -0.17309096455574036, 0.12321854382753372, 0.05835258960723877, 0.04637813940644264, -0.06229560449719429, 0.0035279111471027136, -0.04875412955880165, -0.029380515217781067, -0.0011269483948126435, 0.0148195531219244, -0.03344501554965973, 0.013681472279131413, -0.04445071518421173, 0.06652364879846573, 0.0738387405872345, 0.05801070109009743, -0.0756135955452919, -0.04114871844649315, 0.020434916019439697, -0.022201817482709885, -0.04653291031718254, 0.0011349950218573213, -0.04242926463484764, -0.048239488154649734, 0.08664866536855698, 0.01729692332446575, -0.004572239704430103, -0.03655165433883667, -0.06085466593503952, -0.03699799254536629, 0.08126068115234375, -0.07323172688484192, -0.046528249979019165, -0.013039954006671906, 0.06383799761533737, -0.002465524012222886, -0.0072426581755280495, 0.1199926882982254, 0.008451014757156372, -0.025251401588320732, -0.04716149717569351, -0.015196958556771278, -0.05864574760198593, -0.03433197736740112, 0.05597681179642677, 0.022406594827771187, 0.06672647595405579, -0.02996615506708622, -0.022438054904341698, -0.008235105313360691, 0.006674476433545351, -0.050706058740615845, 0.04815921187400818, 0.06450105458498001, -0.04953640699386597, -0.08450368791818619, 0.008418702520430088, -0.03155437484383583, 0.02303234487771988, 0.04389237239956856, -0.04129673168063164, -0.014157838188111782, 0.060671936720609665, -0.01756861060857773, 0.04192772135138512, 0.1261214315891266, 0.06727331131696701, -0.013103251345455647, 0.040874216705560684, -0.03732040151953697, -0.00996123906224966, 0.020859714597463608, -0.05855249986052513, -0.015905320644378662, -0.06147567182779312, 0.006882853340357542, 0.044178951531648636, 0.025215813890099525, -0.04003526642918587, -0.002668035915121436, -0.023172281682491302, -0.12055125087499619, -0.02064639888703823, 0.07715808600187302, -0.07111503183841705, 0.02501450479030609, -0.05434846132993698, -0.0369662307202816, -0.05180608481168747, 0.007879257202148438, 0.06852489709854126, -0.01172626856714487, 0.0019378237193450332, 0.04477853700518608, 0.017243146896362305, 0.07464884966611862, -0.06267546117305756, -0.0134277967736125, -0.06697455793619156, 0.0033934731036424637, 0.07481633871793747, 0.08041734248399734, 0.06326448917388916, -0.09864328056573868, 0.01684730127453804, -0.031284455209970474, 0.006858511827886105, -0.059615880250930786, -0.018575528636574745, 0.03209087625145912, 0.03869028761982918, -0.040682174265384674, 0.07746642082929611, 0.0055586835369467735, 0.006848955526947975, -0.03977039083838463, -0.0014277922455221415, -0.016683151945471764, 0.009759984910488129, -0.08847391605377197, -0.04809774085879326, 0.001686033676378429, 0.04553168639540672, -0.018796220421791077, -0.04408420994877815, -0.030967939645051956, 0.003356264904141426, -0.05182771012187004, -0.026206037029623985, 0.05590151250362396, -0.042725007981061935, 0.052092015743255615, 0.05213687941431999, -0.03328990936279297, -0.007780630607157946, -0.07588626444339752, -0.05213673785328865, -0.028010539710521698, 0.08353652060031891, 0.013610376045107841, -0.049112431704998016, 0.047404270619153976, -0.00435677170753479, 0.019847756251692772, 0.03866015747189522, -0.01741924323141575, -0.028744174167513847, -0.04900563508272171, -0.03350472450256348, -0.007288262713700533, 0.018233522772789, -0.052779629826545715, -0.004595632664859295, -0.0586281381547451, 0.013425507582724094, 0.010493974201381207, 0.03753525763750076, 0.006945278029888868, 0.03330420330166817, -0.06319594383239746, -0.0789480209350586, -0.13526080548763275, -0.0129415113478899, -0.018570320680737495, -0.03727230057120323, 0.06955395638942719, -0.029286107048392296, -0.045031461864709854, 0.04386517032980919, -0.06035478413105011, 0.0620398186147213, 0.07898572087287903, -0.03256421163678169, -0.029893657192587852, -0.036152519285678864, -0.03034583479166031, 0.08943672478199005, 0.030568772926926613, -0.004418622236698866, 0.014275400899350643, -0.0404367558658123, -0.016004249453544617, -0.0714741051197052, -0.010571314021945, -0.05153141915798187, 0.01541473250836134, -0.032838527113199234, 0.11407219618558884, 0.07444799691438675, 0.02404947020113468, 0.000786646269261837, 0.04375502094626427, 0.04184276610612869, 0.0440639853477478, -0.10780753195285797, 0.039906032383441925, -0.08962876349687576, 0.006707129999995232, 0.04991455376148224, -0.012553163804113865, -0.03151647746562958, 0.052663080394268036, 0.019186345860362053, 0.04055867716670036, -0.06284216046333313, 0.11628682166337967, -0.0318412110209465, -0.041494935750961304, 0.0572691336274147, 0.02346021868288517, -0.037087682634592056, -0.004025179892778397, 0.011503447778522968, 0.047996994107961655, 0.00855818297713995, -0.07425287365913391, -0.03379066288471222, -0.06515540182590485, 0.0665188729763031, -0.0363677442073822, 0.031114161014556885, -0.0776011049747467, -0.040173180401325226, 0.05742868781089783, -0.028401803225278854, 0.0013479733606800437, 0.012075678445398808, -0.03472607582807541, 0.020097516477108, -0.05102104693651199, -0.040769826620817184, -0.004003704059869051, 0.12064668536186218, -0.08805646002292633, 0.05164496973156929, 0.04637409374117851, 0.015891708433628082, 0.06642617285251617, -0.013217778876423836, 0.001964165596291423, -0.02806190401315689, 0.015507964417338371, -0.035548269748687744, 0.0416293665766716, 0.0955040454864502, 0.04032804071903229, -0.003166085807606578, 0.01042139995843172, 0.02438596822321415, -0.039585042744874954, -0.0707620233297348, 0.028348227962851524, -0.041449859738349915, 0.06837210804224014, 0.027821550145745277, 0.025965062901377678, -0.025891738012433052, -0.1740310788154602, -0.006702310871332884, 0.013180405832827091, -0.017740659415721893, -0.02896386757493019, 0.025585928931832314, 0.07560205459594727, -0.0017635038821026683, -0.03827787935733795, -0.044473856687545776, -0.0016014132415875793, 0.03341040387749672, 0.09546337276697159, -0.035818856209516525, -0.0036888220347464085, 0.05640873312950134, 0.06554025411605835, 0.004148377105593681, -0.05478261038661003, -0.025334684178233147, 0.00791715644299984, 0.03685177490115166, 0.11005210131406784, 0.0073929354548454285, -0.032504767179489136, 0.011261265724897385, 0.020593229681253433, 0.05075770244002342, 0.07178143411874771, 0.02449669875204563, -0.013685489073395729, -0.024542186409235, 0.12279921025037766, 0.015000931918621063, 0.023237304762005806, 0.08735626190900803, -0.04866902902722359, -0.04304694011807442, -0.02434627339243889, -0.007098366506397724, 0.02811538800597191, 0.050478044897317886, -0.027135908603668213, -0.04823799058794975, 0.09315331280231476, -0.018708985298871994, -0.08641254156827927, 0.00552434241399169, 0.006570098921656609, -0.03492191061377525, -0.049393001943826675, -0.08140885084867477, 0.05337773263454437, 0.021318260580301285, 0.08172635734081268, -0.008706770837306976, -0.07238408178091049, -0.03405585512518883, -0.035889383405447006, -0.10571212321519852, 0.031206782907247543, -0.043101951479911804, 0.022442230954766273, -0.022798821330070496, 0.002914518816396594]

In [61]:
# debugging

id = "<AqhSjohSRRe1FvQz790xag@geopod-ismtpd-30>"
query = "query: digitalocean bill'"
query_emb = embedding_service.embedder.embed(query)
print("query emb: ", query_emb.tolist())
filter = {'$and': [{'created': {'$gte': 1754072985832}}]}
top_k = 300
address = "0x139d1fe7306dd2b22c95c8e8343e5163fcc8aa09"
results = index.query(vector=qw, filter=filter, top_k=top_k, namespace=address, include_metadata=True)

query emb:  [-0.09028814733028412, 0.035210348665714264, 0.026079094037413597, 0.00234251469373703, -0.028606627136468887, -0.01871073804795742, 0.022441234439611435, -0.08360611647367477, 0.006645450834184885, 0.08821713924407959, 0.054387882351875305, -0.010417581535875797, 0.008786002174019814, 0.05210990458726883, 0.025046221911907196, -0.03042721562087536, -0.036040786653757095, 0.06237773224711418, -0.034703612327575684, 0.07371420413255692, 0.08370374888181686, -0.0233642365783453, 0.03209858760237694, -0.04198053106665611, 0.03196745738387108, 0.010264437645673752, 0.0567663349211216, 0.013895807787775993, -0.06714004278182983, -0.1560630351305008, -0.0026192867662757635, 0.037883006036281586, 0.05963945761322975, -0.04065365344285965, 0.08837802708148956, -0.07618649303913116, -0.009074429050087929, -0.011619019322097301, -0.0008945104782469571, 0.032948240637779236, -0.04429006204009056, -0.09418171644210815, -0.019112415611743927, -0.04728122055530548, -0.01182752288877964, 

In [62]:
matches = results.matches or []

In [63]:
matches

[{'id': '<-z6moOVDRbmhRmMM1yLNOw@geopod-ismtpd-3>',
  'metadata': {'created': 1759295656000.0,
               'folder': 'archive',
               'from': 'support@digitalocean.com',
               'from_name': 'DigitalOcean Support'},
  'score': 0.88855934,
  'sparse_values': {'indices': [], 'values': []},
  'values': []},
 {'id': '<S7q1jscNQqqKKiE5hpmLCQ@geopod-ismtpd-35>',
  'metadata': {'created': 1756712344000.0,
               'folder': 'archive',
               'from': 'support@digitalocean.com',
               'from_name': 'DigitalOcean Support'},
  'score': 0.8866225,
  'sparse_values': {'indices': [], 'values': []},
  'values': []},
 {'id': '<eudyxTeMSwi_tEeGsE6y8Q@geopod-ismtpd-37>',
  'metadata': {'created': 1756697740000.0,
               'folder': 'archive',
               'from': 'support@digitalocean.com',
               'from_name': 'DigitalOcean Support'},
  'score': 0.8746634,
  'sparse_values': {'indices': [], 'values': []},
  'values': []},
 {'id': '<aIYEZP3HSk67AHZ