In [9]:
import os
import time
import traceback
import snowflake.connector
import pandas as pd
from transformers import AutoTokenizer, AutoModel
import torch
from pinecone import Pinecone
from dotenv import load_dotenv


def check_index_stats():
    stats = index.describe_index_stats()
    print(f"Current index stats: {stats}")
    return stats['total_vector_count']

def upsert_embeddings(vectors, batch_number):
    try:
        # Check for duplicate IDs
        ids = [v[0] for v in vectors]
        if len(ids) != len(set(ids)):
            print(f"Batch {batch_number} contains duplicate IDs!")
        
        response = index.upsert(vectors=vectors)
        print(f"Batch {batch_number} - Upsert response: {response}")
        time.sleep(1)  # Add a small delay after each upsert
        current_count = check_index_stats()
        print(f"Batch {batch_number} - Current vector count: {current_count}")
        return response, current_count
    except Exception as e:
        print(f"Error upserting embeddings for batch {batch_number}:")
        print(traceback.format_exc())
        return None, None

def main():
    print("Initial index stats:")
    initial_count = check_index_stats()

    offset = 0
    total_records = min(VECTOR_LIMIT, 1000000)
    batch_number = 0
    total_processed = 0
    all_ids = set()

    while offset < total_records:
        batch_number += 1
        print(f"\nProcessing batch {batch_number}")
        
        df = read_data_from_snowflake(offset, BATCH_SIZE)
        
        if df.empty:
            print("No more data to process.")
            break

        records_in_batch = len(df)
        print(f"Records in this batch: {records_in_batch}")

        # Check for duplicate IDs across batches
        batch_ids = set(df['RECIPEID'].astype(str))
        duplicate_ids = batch_ids.intersection(all_ids)
        if duplicate_ids:
            print(f"Batch {batch_number} contains {len(duplicate_ids)} IDs that were seen in previous batches.")
        all_ids.update(batch_ids)

        text_to_embed = [
            ' '.join([str(row[col]) for col in ['NAME', 'DESCRIPTION', 'RECIPECATEGORY', 'KEYWORDS'] if not pd.isnull(row[col])])
            for _, row in df.iterrows()
        ]
        
        try:
            embeddings = generate_embeddings(text_to_embed)
            vectors = [(str(row['RECIPEID']), embeddings[idx].tolist()) for idx, row in df.iterrows()]
            
            upsert_response, current_count = upsert_embeddings(vectors, batch_number)
            if upsert_response:
                total_processed += records_in_batch
                print(f"Total records processed so far: {total_processed}")
                if current_count and current_count < total_processed:
                    print(f"WARNING: Discrepancy detected. Processed: {total_processed}, Stored: {current_count}")
            
        except Exception as e:
            print(f"Error processing batch {batch_number} starting at index {offset}: {e}")
            print(traceback.format_exc())
        
        offset += BATCH_SIZE
        time.sleep(2)  # Add a delay between batches

    print("\nEmbeddings generation and storage process completed.")

    print("Final index stats:")
    final_count = check_index_stats()

    print(f"Initial vector count: {initial_count}")
    print(f"Final vector count: {final_count}")
    print(f"Vectors added according to Pinecone: {final_count - initial_count}")
    print(f"Total records processed according to our script: {total_processed}")

if __name__ == "__main__":
    main()

Initial index stats:
Current index stats: {'dimension': 384,
 'index_fullness': 0.0,
 'namespaces': {'': {'vector_count': 1229}},
 'total_vector_count': 1229}

Processing batch 1


  df = pd.read_sql(query, conn)


Batch size read from Snowflake: 500 records
Records in this batch: 500
Batch 1 - Upsert response: {'upserted_count': 500}
Current index stats: {'dimension': 384,
 'index_fullness': 0.0,
 'namespaces': {'': {'vector_count': 1229}},
 'total_vector_count': 1229}
Batch 1 - Current vector count: 1229
Total records processed so far: 500

Processing batch 2


  df = pd.read_sql(query, conn)


Batch size read from Snowflake: 500 records
Records in this batch: 500
Batch 2 - Upsert response: {'upserted_count': 500}
Current index stats: {'dimension': 384,
 'index_fullness': 0.0,
 'namespaces': {'': {'vector_count': 1229}},
 'total_vector_count': 1229}
Batch 2 - Current vector count: 1229
Total records processed so far: 1000

Processing batch 3


  df = pd.read_sql(query, conn)


Batch size read from Snowflake: 500 records
Records in this batch: 500
Batch 3 contains duplicate IDs!
Batch 3 - Upsert response: {'upserted_count': 500}
Current index stats: {'dimension': 384,
 'index_fullness': 0.0,
 'namespaces': {'': {'vector_count': 1229}},
 'total_vector_count': 1229}
Batch 3 - Current vector count: 1229
Total records processed so far: 1500

Processing batch 4


  df = pd.read_sql(query, conn)


Batch size read from Snowflake: 500 records
Records in this batch: 500
Batch 4 contains 1 IDs that were seen in previous batches.
Batch 4 contains duplicate IDs!
Batch 4 - Upsert response: {'upserted_count': 500}
Current index stats: {'dimension': 384,
 'index_fullness': 0.0,
 'namespaces': {'': {'vector_count': 1229}},
 'total_vector_count': 1229}
Batch 4 - Current vector count: 1229
Total records processed so far: 2000

Processing batch 5


  df = pd.read_sql(query, conn)


Batch size read from Snowflake: 500 records
Records in this batch: 500
Batch 5 contains 1 IDs that were seen in previous batches.
Batch 5 contains duplicate IDs!
Batch 5 - Upsert response: {'upserted_count': 500}
Current index stats: {'dimension': 384,
 'index_fullness': 0.0,
 'namespaces': {'': {'vector_count': 1229}},
 'total_vector_count': 1229}
Batch 5 - Current vector count: 1229
Total records processed so far: 2500

Processing batch 6


  df = pd.read_sql(query, conn)


Batch size read from Snowflake: 500 records
Records in this batch: 500
Batch 6 contains 1 IDs that were seen in previous batches.
Batch 6 contains duplicate IDs!
Batch 6 - Upsert response: {'upserted_count': 500}
Current index stats: {'dimension': 384,
 'index_fullness': 0.0,
 'namespaces': {'': {'vector_count': 1229}},
 'total_vector_count': 1229}
Batch 6 - Current vector count: 1229
Total records processed so far: 3000

Processing batch 7


  df = pd.read_sql(query, conn)


Batch size read from Snowflake: 500 records
Records in this batch: 500
Batch 7 contains 1 IDs that were seen in previous batches.
Batch 7 contains duplicate IDs!
Batch 7 - Upsert response: {'upserted_count': 500}
Current index stats: {'dimension': 384,
 'index_fullness': 0.0,
 'namespaces': {'': {'vector_count': 1229}},
 'total_vector_count': 1229}
Batch 7 - Current vector count: 1229
Total records processed so far: 3500

Processing batch 8


  df = pd.read_sql(query, conn)


Batch size read from Snowflake: 500 records
Records in this batch: 500
Batch 8 contains 1 IDs that were seen in previous batches.
Batch 8 contains duplicate IDs!
Batch 8 - Upsert response: {'upserted_count': 500}
Current index stats: {'dimension': 384,
 'index_fullness': 0.0,
 'namespaces': {'': {'vector_count': 1229}},
 'total_vector_count': 1229}
Batch 8 - Current vector count: 1229
Total records processed so far: 4000

Processing batch 9


  df = pd.read_sql(query, conn)


Batch size read from Snowflake: 500 records
Records in this batch: 500
Batch 9 contains 1 IDs that were seen in previous batches.
Batch 9 contains duplicate IDs!
Batch 9 - Upsert response: {'upserted_count': 500}
Current index stats: {'dimension': 384,
 'index_fullness': 0.0,
 'namespaces': {'': {'vector_count': 1229}},
 'total_vector_count': 1229}
Batch 9 - Current vector count: 1229
Total records processed so far: 4500

Processing batch 10


  df = pd.read_sql(query, conn)


Batch size read from Snowflake: 500 records
Records in this batch: 500
Batch 10 contains 1 IDs that were seen in previous batches.
Batch 10 contains duplicate IDs!
Batch 10 - Upsert response: {'upserted_count': 500}
Current index stats: {'dimension': 384,
 'index_fullness': 0.0,
 'namespaces': {'': {'vector_count': 1229}},
 'total_vector_count': 1229}
Batch 10 - Current vector count: 1229
Total records processed so far: 5000

Processing batch 11


  df = pd.read_sql(query, conn)


Batch size read from Snowflake: 500 records
Records in this batch: 500
Batch 11 contains 1 IDs that were seen in previous batches.
Batch 11 contains duplicate IDs!
Batch 11 - Upsert response: {'upserted_count': 500}
Current index stats: {'dimension': 384,
 'index_fullness': 0.0,
 'namespaces': {'': {'vector_count': 1229}},
 'total_vector_count': 1229}
Batch 11 - Current vector count: 1229
Total records processed so far: 5500

Processing batch 12


  df = pd.read_sql(query, conn)


Batch size read from Snowflake: 500 records
Records in this batch: 500
Batch 12 contains 1 IDs that were seen in previous batches.
Batch 12 contains duplicate IDs!
Batch 12 - Upsert response: {'upserted_count': 500}
Current index stats: {'dimension': 384,
 'index_fullness': 0.0,
 'namespaces': {'': {'vector_count': 1229}},
 'total_vector_count': 1229}
Batch 12 - Current vector count: 1229
Total records processed so far: 6000

Processing batch 13


  df = pd.read_sql(query, conn)


Batch size read from Snowflake: 500 records
Records in this batch: 500
Batch 13 contains 1 IDs that were seen in previous batches.
Batch 13 contains duplicate IDs!
Batch 13 - Upsert response: {'upserted_count': 500}
Current index stats: {'dimension': 384,
 'index_fullness': 0.0,
 'namespaces': {'': {'vector_count': 1229}},
 'total_vector_count': 1229}
Batch 13 - Current vector count: 1229
Total records processed so far: 6500

Processing batch 14


  df = pd.read_sql(query, conn)


Batch size read from Snowflake: 500 records
Records in this batch: 500
Batch 14 contains 1 IDs that were seen in previous batches.
Batch 14 contains duplicate IDs!
Batch 14 - Upsert response: {'upserted_count': 500}
Current index stats: {'dimension': 384,
 'index_fullness': 0.0,
 'namespaces': {'': {'vector_count': 1229}},
 'total_vector_count': 1229}
Batch 14 - Current vector count: 1229
Total records processed so far: 7000

Processing batch 15


  df = pd.read_sql(query, conn)


Batch size read from Snowflake: 500 records
Records in this batch: 500
Batch 15 contains 1 IDs that were seen in previous batches.
Batch 15 contains duplicate IDs!
Batch 15 - Upsert response: {'upserted_count': 500}
Current index stats: {'dimension': 384,
 'index_fullness': 0.0,
 'namespaces': {'': {'vector_count': 1229}},
 'total_vector_count': 1229}
Batch 15 - Current vector count: 1229
Total records processed so far: 7500

Processing batch 16


  df = pd.read_sql(query, conn)


Batch size read from Snowflake: 500 records
Records in this batch: 500
Batch 16 contains 1 IDs that were seen in previous batches.
Batch 16 contains duplicate IDs!
Batch 16 - Upsert response: {'upserted_count': 500}
Current index stats: {'dimension': 384,
 'index_fullness': 0.0,
 'namespaces': {'': {'vector_count': 1229}},
 'total_vector_count': 1229}
Batch 16 - Current vector count: 1229
Total records processed so far: 8000

Processing batch 17


  df = pd.read_sql(query, conn)


Batch size read from Snowflake: 500 records
Records in this batch: 500
Batch 17 contains 1 IDs that were seen in previous batches.
Batch 17 contains duplicate IDs!
Batch 17 - Upsert response: {'upserted_count': 500}
Current index stats: {'dimension': 384,
 'index_fullness': 0.0,
 'namespaces': {'': {'vector_count': 1229}},
 'total_vector_count': 1229}
Batch 17 - Current vector count: 1229
Total records processed so far: 8500

Processing batch 18


  df = pd.read_sql(query, conn)


Batch size read from Snowflake: 500 records
Records in this batch: 500
Batch 18 contains 1 IDs that were seen in previous batches.
Batch 18 contains duplicate IDs!
Batch 18 - Upsert response: {'upserted_count': 500}
Current index stats: {'dimension': 384,
 'index_fullness': 0.0,
 'namespaces': {'': {'vector_count': 1229}},
 'total_vector_count': 1229}
Batch 18 - Current vector count: 1229
Total records processed so far: 9000

Processing batch 19


  df = pd.read_sql(query, conn)


Batch size read from Snowflake: 500 records
Records in this batch: 500
Batch 19 contains 1 IDs that were seen in previous batches.
Batch 19 contains duplicate IDs!
Batch 19 - Upsert response: {'upserted_count': 500}
Current index stats: {'dimension': 384,
 'index_fullness': 0.0,
 'namespaces': {'': {'vector_count': 1229}},
 'total_vector_count': 1229}
Batch 19 - Current vector count: 1229
Total records processed so far: 9500

Processing batch 20


  df = pd.read_sql(query, conn)


Batch size read from Snowflake: 500 records
Records in this batch: 500
Batch 20 contains 1 IDs that were seen in previous batches.
Batch 20 contains duplicate IDs!
Batch 20 - Upsert response: {'upserted_count': 500}
Current index stats: {'dimension': 384,
 'index_fullness': 0.0,
 'namespaces': {'': {'vector_count': 1229}},
 'total_vector_count': 1229}
Batch 20 - Current vector count: 1229
Total records processed so far: 10000

Processing batch 21


  df = pd.read_sql(query, conn)


Batch size read from Snowflake: 500 records
Records in this batch: 500
Batch 21 contains 1 IDs that were seen in previous batches.
Batch 21 contains duplicate IDs!
Batch 21 - Upsert response: {'upserted_count': 500}
Current index stats: {'dimension': 384,
 'index_fullness': 0.0,
 'namespaces': {'': {'vector_count': 1229}},
 'total_vector_count': 1229}
Batch 21 - Current vector count: 1229
Total records processed so far: 10500

Processing batch 22


  df = pd.read_sql(query, conn)


Batch size read from Snowflake: 500 records
Records in this batch: 500
Batch 22 contains 1 IDs that were seen in previous batches.
Batch 22 contains duplicate IDs!
Batch 22 - Upsert response: {'upserted_count': 500}
Current index stats: {'dimension': 384,
 'index_fullness': 0.0,
 'namespaces': {'': {'vector_count': 1229}},
 'total_vector_count': 1229}
Batch 22 - Current vector count: 1229
Total records processed so far: 11000

Processing batch 23


  df = pd.read_sql(query, conn)


Batch size read from Snowflake: 500 records
Records in this batch: 500
Batch 23 contains 1 IDs that were seen in previous batches.
Batch 23 contains duplicate IDs!
Batch 23 - Upsert response: {'upserted_count': 500}
Current index stats: {'dimension': 384,
 'index_fullness': 0.0,
 'namespaces': {'': {'vector_count': 1229}},
 'total_vector_count': 1229}
Batch 23 - Current vector count: 1229
Total records processed so far: 11500

Processing batch 24


  df = pd.read_sql(query, conn)


Batch size read from Snowflake: 500 records
Records in this batch: 500
Batch 24 contains 1 IDs that were seen in previous batches.
Batch 24 contains duplicate IDs!
Batch 24 - Upsert response: {'upserted_count': 500}
Current index stats: {'dimension': 384,
 'index_fullness': 0.0,
 'namespaces': {'': {'vector_count': 1229}},
 'total_vector_count': 1229}
Batch 24 - Current vector count: 1229
Total records processed so far: 12000

Processing batch 25


  df = pd.read_sql(query, conn)


Batch size read from Snowflake: 500 records
Records in this batch: 500
Batch 25 contains 1 IDs that were seen in previous batches.
Batch 25 contains duplicate IDs!
Batch 25 - Upsert response: {'upserted_count': 500}
Current index stats: {'dimension': 384,
 'index_fullness': 0.0,
 'namespaces': {'': {'vector_count': 1229}},
 'total_vector_count': 1229}
Batch 25 - Current vector count: 1229
Total records processed so far: 12500

Processing batch 26


  df = pd.read_sql(query, conn)


Batch size read from Snowflake: 500 records
Records in this batch: 500
Batch 26 contains 1 IDs that were seen in previous batches.
Batch 26 contains duplicate IDs!
Batch 26 - Upsert response: {'upserted_count': 500}
Current index stats: {'dimension': 384,
 'index_fullness': 0.0,
 'namespaces': {'': {'vector_count': 1229}},
 'total_vector_count': 1229}
Batch 26 - Current vector count: 1229
Total records processed so far: 13000

Processing batch 27


  df = pd.read_sql(query, conn)


Batch size read from Snowflake: 500 records
Records in this batch: 500
Batch 27 contains 1 IDs that were seen in previous batches.
Batch 27 contains duplicate IDs!
Batch 27 - Upsert response: {'upserted_count': 500}
Current index stats: {'dimension': 384,
 'index_fullness': 0.0,
 'namespaces': {'': {'vector_count': 1229}},
 'total_vector_count': 1229}
Batch 27 - Current vector count: 1229
Total records processed so far: 13500

Processing batch 28


  df = pd.read_sql(query, conn)


Batch size read from Snowflake: 500 records
Records in this batch: 500
Batch 28 contains 1 IDs that were seen in previous batches.
Batch 28 contains duplicate IDs!
Batch 28 - Upsert response: {'upserted_count': 500}
Current index stats: {'dimension': 384,
 'index_fullness': 0.0,
 'namespaces': {'': {'vector_count': 1229}},
 'total_vector_count': 1229}
Batch 28 - Current vector count: 1229
Total records processed so far: 14000

Processing batch 29


  df = pd.read_sql(query, conn)


Batch size read from Snowflake: 500 records
Records in this batch: 500
Batch 29 contains 1 IDs that were seen in previous batches.
Batch 29 contains duplicate IDs!
Batch 29 - Upsert response: {'upserted_count': 500}
Current index stats: {'dimension': 384,
 'index_fullness': 0.0,
 'namespaces': {'': {'vector_count': 1229}},
 'total_vector_count': 1229}
Batch 29 - Current vector count: 1229
Total records processed so far: 14500

Processing batch 30


  df = pd.read_sql(query, conn)


Batch size read from Snowflake: 500 records
Records in this batch: 500
Batch 30 contains 1 IDs that were seen in previous batches.
Batch 30 contains duplicate IDs!
Batch 30 - Upsert response: {'upserted_count': 500}
Current index stats: {'dimension': 384,
 'index_fullness': 0.0,
 'namespaces': {'': {'vector_count': 1229}},
 'total_vector_count': 1229}
Batch 30 - Current vector count: 1229
Total records processed so far: 15000

Processing batch 31


  df = pd.read_sql(query, conn)


Batch size read from Snowflake: 500 records
Records in this batch: 500
Batch 31 contains 1 IDs that were seen in previous batches.
Batch 31 contains duplicate IDs!
Batch 31 - Upsert response: {'upserted_count': 500}
Current index stats: {'dimension': 384,
 'index_fullness': 0.0,
 'namespaces': {'': {'vector_count': 1229}},
 'total_vector_count': 1229}
Batch 31 - Current vector count: 1229
Total records processed so far: 15500

Processing batch 32


  df = pd.read_sql(query, conn)


Batch size read from Snowflake: 500 records
Records in this batch: 500
Batch 32 contains 1 IDs that were seen in previous batches.
Batch 32 contains duplicate IDs!
Batch 32 - Upsert response: {'upserted_count': 500}
Current index stats: {'dimension': 384,
 'index_fullness': 0.0,
 'namespaces': {'': {'vector_count': 1229}},
 'total_vector_count': 1229}
Batch 32 - Current vector count: 1229
Total records processed so far: 16000

Processing batch 33


  df = pd.read_sql(query, conn)


Batch size read from Snowflake: 500 records
Records in this batch: 500
Batch 33 contains 1 IDs that were seen in previous batches.
Batch 33 contains duplicate IDs!
Batch 33 - Upsert response: {'upserted_count': 500}
Current index stats: {'dimension': 384,
 'index_fullness': 0.0,
 'namespaces': {'': {'vector_count': 1229}},
 'total_vector_count': 1229}
Batch 33 - Current vector count: 1229
Total records processed so far: 16500

Processing batch 34


  df = pd.read_sql(query, conn)


Batch size read from Snowflake: 500 records
Records in this batch: 500
Batch 34 contains 1 IDs that were seen in previous batches.
Batch 34 contains duplicate IDs!
Batch 34 - Upsert response: {'upserted_count': 500}
Current index stats: {'dimension': 384,
 'index_fullness': 0.0,
 'namespaces': {'': {'vector_count': 1229}},
 'total_vector_count': 1229}
Batch 34 - Current vector count: 1229
Total records processed so far: 17000

Processing batch 35


  df = pd.read_sql(query, conn)


Batch size read from Snowflake: 500 records
Records in this batch: 500
Batch 35 contains 1 IDs that were seen in previous batches.
Batch 35 contains duplicate IDs!
Batch 35 - Upsert response: {'upserted_count': 500}
Current index stats: {'dimension': 384,
 'index_fullness': 0.0,
 'namespaces': {'': {'vector_count': 1229}},
 'total_vector_count': 1229}
Batch 35 - Current vector count: 1229
Total records processed so far: 17500

Processing batch 36


  df = pd.read_sql(query, conn)


Batch size read from Snowflake: 500 records
Records in this batch: 500
Batch 36 contains 1 IDs that were seen in previous batches.
Batch 36 contains duplicate IDs!
Batch 36 - Upsert response: {'upserted_count': 500}
Current index stats: {'dimension': 384,
 'index_fullness': 0.0,
 'namespaces': {'': {'vector_count': 1229}},
 'total_vector_count': 1229}
Batch 36 - Current vector count: 1229
Total records processed so far: 18000

Processing batch 37


  df = pd.read_sql(query, conn)


Batch size read from Snowflake: 500 records
Records in this batch: 500
Batch 37 contains 1 IDs that were seen in previous batches.
Batch 37 contains duplicate IDs!
Batch 37 - Upsert response: {'upserted_count': 500}
Current index stats: {'dimension': 384,
 'index_fullness': 0.0,
 'namespaces': {'': {'vector_count': 1229}},
 'total_vector_count': 1229}
Batch 37 - Current vector count: 1229
Total records processed so far: 18500

Processing batch 38


  df = pd.read_sql(query, conn)


Batch size read from Snowflake: 500 records
Records in this batch: 500
Batch 38 contains 1 IDs that were seen in previous batches.
Batch 38 contains duplicate IDs!
Batch 38 - Upsert response: {'upserted_count': 500}
Current index stats: {'dimension': 384,
 'index_fullness': 0.0,
 'namespaces': {'': {'vector_count': 1229}},
 'total_vector_count': 1229}
Batch 38 - Current vector count: 1229
Total records processed so far: 19000

Processing batch 39


  df = pd.read_sql(query, conn)


Batch size read from Snowflake: 500 records
Records in this batch: 500
Batch 39 contains 1 IDs that were seen in previous batches.
Batch 39 contains duplicate IDs!
Batch 39 - Upsert response: {'upserted_count': 500}
Current index stats: {'dimension': 384,
 'index_fullness': 0.0,
 'namespaces': {'': {'vector_count': 1229}},
 'total_vector_count': 1229}
Batch 39 - Current vector count: 1229
Total records processed so far: 19500

Processing batch 40


  df = pd.read_sql(query, conn)


Batch size read from Snowflake: 500 records
Records in this batch: 500
Batch 40 contains 1 IDs that were seen in previous batches.
Batch 40 contains duplicate IDs!
Batch 40 - Upsert response: {'upserted_count': 500}
Current index stats: {'dimension': 384,
 'index_fullness': 0.0,
 'namespaces': {'': {'vector_count': 1229}},
 'total_vector_count': 1229}
Batch 40 - Current vector count: 1229
Total records processed so far: 20000

Processing batch 41


  df = pd.read_sql(query, conn)


Batch size read from Snowflake: 500 records
Records in this batch: 500
Batch 41 contains 1 IDs that were seen in previous batches.
Batch 41 contains duplicate IDs!
Batch 41 - Upsert response: {'upserted_count': 500}
Current index stats: {'dimension': 384,
 'index_fullness': 0.0,
 'namespaces': {'': {'vector_count': 1229}},
 'total_vector_count': 1229}
Batch 41 - Current vector count: 1229
Total records processed so far: 20500

Processing batch 42


  df = pd.read_sql(query, conn)


Batch size read from Snowflake: 500 records
Records in this batch: 500
Batch 42 contains 1 IDs that were seen in previous batches.
Batch 42 contains duplicate IDs!
Batch 42 - Upsert response: {'upserted_count': 500}
Current index stats: {'dimension': 384,
 'index_fullness': 0.0,
 'namespaces': {'': {'vector_count': 1229}},
 'total_vector_count': 1229}
Batch 42 - Current vector count: 1229
Total records processed so far: 21000

Processing batch 43


  df = pd.read_sql(query, conn)


Batch size read from Snowflake: 500 records
Records in this batch: 500
Batch 43 contains 1 IDs that were seen in previous batches.
Batch 43 contains duplicate IDs!
Batch 43 - Upsert response: {'upserted_count': 500}
Current index stats: {'dimension': 384,
 'index_fullness': 0.0,
 'namespaces': {'': {'vector_count': 1229}},
 'total_vector_count': 1229}
Batch 43 - Current vector count: 1229
Total records processed so far: 21500

Processing batch 44


  df = pd.read_sql(query, conn)


Batch size read from Snowflake: 500 records
Records in this batch: 500
Batch 44 contains 1 IDs that were seen in previous batches.
Batch 44 contains duplicate IDs!
Batch 44 - Upsert response: {'upserted_count': 500}
Current index stats: {'dimension': 384,
 'index_fullness': 0.0,
 'namespaces': {'': {'vector_count': 1229}},
 'total_vector_count': 1229}
Batch 44 - Current vector count: 1229
Total records processed so far: 22000

Processing batch 45


  df = pd.read_sql(query, conn)


Batch size read from Snowflake: 500 records
Records in this batch: 500
Batch 45 contains 1 IDs that were seen in previous batches.
Batch 45 contains duplicate IDs!
Batch 45 - Upsert response: {'upserted_count': 500}
Current index stats: {'dimension': 384,
 'index_fullness': 0.0,
 'namespaces': {'': {'vector_count': 1229}},
 'total_vector_count': 1229}
Batch 45 - Current vector count: 1229
Total records processed so far: 22500

Processing batch 46


  df = pd.read_sql(query, conn)


Batch size read from Snowflake: 500 records
Records in this batch: 500
Batch 46 contains 1 IDs that were seen in previous batches.
Batch 46 contains duplicate IDs!
Batch 46 - Upsert response: {'upserted_count': 500}
Current index stats: {'dimension': 384,
 'index_fullness': 0.0,
 'namespaces': {'': {'vector_count': 1229}},
 'total_vector_count': 1229}
Batch 46 - Current vector count: 1229
Total records processed so far: 23000

Processing batch 47


  df = pd.read_sql(query, conn)


Batch size read from Snowflake: 500 records
Records in this batch: 500
Batch 47 contains 1 IDs that were seen in previous batches.
Batch 47 contains duplicate IDs!
Batch 47 - Upsert response: {'upserted_count': 500}
Current index stats: {'dimension': 384,
 'index_fullness': 0.0,
 'namespaces': {'': {'vector_count': 1229}},
 'total_vector_count': 1229}
Batch 47 - Current vector count: 1229
Total records processed so far: 23500

Processing batch 48


  df = pd.read_sql(query, conn)


Batch size read from Snowflake: 500 records
Records in this batch: 500
Batch 48 contains 1 IDs that were seen in previous batches.
Batch 48 contains duplicate IDs!
Batch 48 - Upsert response: {'upserted_count': 500}
Current index stats: {'dimension': 384,
 'index_fullness': 0.0,
 'namespaces': {'': {'vector_count': 1229}},
 'total_vector_count': 1229}
Batch 48 - Current vector count: 1229
Total records processed so far: 24000

Processing batch 49


  df = pd.read_sql(query, conn)


Batch size read from Snowflake: 500 records
Records in this batch: 500
Batch 49 contains 1 IDs that were seen in previous batches.
Batch 49 contains duplicate IDs!
Batch 49 - Upsert response: {'upserted_count': 500}
Current index stats: {'dimension': 384,
 'index_fullness': 0.0,
 'namespaces': {'': {'vector_count': 1229}},
 'total_vector_count': 1229}
Batch 49 - Current vector count: 1229
Total records processed so far: 24500

Processing batch 50


  df = pd.read_sql(query, conn)


Batch size read from Snowflake: 500 records
Records in this batch: 500
Batch 50 contains 1 IDs that were seen in previous batches.
Batch 50 contains duplicate IDs!
Batch 50 - Upsert response: {'upserted_count': 500}
Current index stats: {'dimension': 384,
 'index_fullness': 0.0,
 'namespaces': {'': {'vector_count': 1229}},
 'total_vector_count': 1229}
Batch 50 - Current vector count: 1229
Total records processed so far: 25000

Processing batch 51


  df = pd.read_sql(query, conn)


Batch size read from Snowflake: 500 records
Records in this batch: 500
Batch 51 contains 1 IDs that were seen in previous batches.
Batch 51 contains duplicate IDs!
Batch 51 - Upsert response: {'upserted_count': 500}
Current index stats: {'dimension': 384,
 'index_fullness': 0.0,
 'namespaces': {'': {'vector_count': 1229}},
 'total_vector_count': 1229}
Batch 51 - Current vector count: 1229
Total records processed so far: 25500

Processing batch 52


  df = pd.read_sql(query, conn)


Batch size read from Snowflake: 500 records
Records in this batch: 500
Batch 52 contains 1 IDs that were seen in previous batches.
Batch 52 contains duplicate IDs!
Batch 52 - Upsert response: {'upserted_count': 500}
Current index stats: {'dimension': 384,
 'index_fullness': 0.0,
 'namespaces': {'': {'vector_count': 1229}},
 'total_vector_count': 1229}
Batch 52 - Current vector count: 1229
Total records processed so far: 26000

Processing batch 53


  df = pd.read_sql(query, conn)


Batch size read from Snowflake: 500 records
Records in this batch: 500
Batch 53 contains 1 IDs that were seen in previous batches.
Batch 53 contains duplicate IDs!
Batch 53 - Upsert response: {'upserted_count': 500}
Current index stats: {'dimension': 384,
 'index_fullness': 0.0,
 'namespaces': {'': {'vector_count': 1229}},
 'total_vector_count': 1229}
Batch 53 - Current vector count: 1229
Total records processed so far: 26500

Processing batch 54


  df = pd.read_sql(query, conn)


Batch size read from Snowflake: 500 records
Records in this batch: 500
Batch 54 contains 1 IDs that were seen in previous batches.
Batch 54 contains duplicate IDs!
Batch 54 - Upsert response: {'upserted_count': 500}
Current index stats: {'dimension': 384,
 'index_fullness': 0.0,
 'namespaces': {'': {'vector_count': 1229}},
 'total_vector_count': 1229}
Batch 54 - Current vector count: 1229
Total records processed so far: 27000

Processing batch 55


  df = pd.read_sql(query, conn)


Batch size read from Snowflake: 500 records
Records in this batch: 500
Batch 55 contains 1 IDs that were seen in previous batches.
Batch 55 contains duplicate IDs!
Batch 55 - Upsert response: {'upserted_count': 500}
Current index stats: {'dimension': 384,
 'index_fullness': 0.0,
 'namespaces': {'': {'vector_count': 1229}},
 'total_vector_count': 1229}
Batch 55 - Current vector count: 1229
Total records processed so far: 27500

Processing batch 56


  df = pd.read_sql(query, conn)


Batch size read from Snowflake: 500 records
Records in this batch: 500
Batch 56 contains 1 IDs that were seen in previous batches.
Batch 56 contains duplicate IDs!
Batch 56 - Upsert response: {'upserted_count': 500}
Current index stats: {'dimension': 384,
 'index_fullness': 0.0,
 'namespaces': {'': {'vector_count': 1229}},
 'total_vector_count': 1229}
Batch 56 - Current vector count: 1229
Total records processed so far: 28000

Processing batch 57


  df = pd.read_sql(query, conn)


Batch size read from Snowflake: 500 records
Records in this batch: 500
Batch 57 contains 1 IDs that were seen in previous batches.
Batch 57 contains duplicate IDs!
Batch 57 - Upsert response: {'upserted_count': 500}
Current index stats: {'dimension': 384,
 'index_fullness': 0.0,
 'namespaces': {'': {'vector_count': 1229}},
 'total_vector_count': 1229}
Batch 57 - Current vector count: 1229
Total records processed so far: 28500

Processing batch 58


  df = pd.read_sql(query, conn)


Batch size read from Snowflake: 500 records
Records in this batch: 500
Batch 58 contains 1 IDs that were seen in previous batches.
Batch 58 contains duplicate IDs!
Batch 58 - Upsert response: {'upserted_count': 500}
Current index stats: {'dimension': 384,
 'index_fullness': 0.0,
 'namespaces': {'': {'vector_count': 1229}},
 'total_vector_count': 1229}
Batch 58 - Current vector count: 1229
Total records processed so far: 29000

Processing batch 59


  df = pd.read_sql(query, conn)


Batch size read from Snowflake: 500 records
Records in this batch: 500
Batch 59 contains 1 IDs that were seen in previous batches.
Batch 59 contains duplicate IDs!
Batch 59 - Upsert response: {'upserted_count': 500}
Current index stats: {'dimension': 384,
 'index_fullness': 0.0,
 'namespaces': {'': {'vector_count': 1229}},
 'total_vector_count': 1229}
Batch 59 - Current vector count: 1229
Total records processed so far: 29500

Processing batch 60


  df = pd.read_sql(query, conn)


Batch size read from Snowflake: 500 records
Records in this batch: 500
Batch 60 contains 1 IDs that were seen in previous batches.
Batch 60 contains duplicate IDs!
Batch 60 - Upsert response: {'upserted_count': 500}
Current index stats: {'dimension': 384,
 'index_fullness': 0.0,
 'namespaces': {'': {'vector_count': 1229}},
 'total_vector_count': 1229}
Batch 60 - Current vector count: 1229
Total records processed so far: 30000

Processing batch 61


  df = pd.read_sql(query, conn)


Batch size read from Snowflake: 500 records
Records in this batch: 500
Batch 61 contains 1 IDs that were seen in previous batches.
Batch 61 contains duplicate IDs!
Batch 61 - Upsert response: {'upserted_count': 500}
Current index stats: {'dimension': 384,
 'index_fullness': 0.0,
 'namespaces': {'': {'vector_count': 1229}},
 'total_vector_count': 1229}
Batch 61 - Current vector count: 1229
Total records processed so far: 30500

Processing batch 62


  df = pd.read_sql(query, conn)


Batch size read from Snowflake: 500 records
Records in this batch: 500
Batch 62 contains 1 IDs that were seen in previous batches.
Batch 62 contains duplicate IDs!
Batch 62 - Upsert response: {'upserted_count': 500}
Current index stats: {'dimension': 384,
 'index_fullness': 0.0,
 'namespaces': {'': {'vector_count': 1229}},
 'total_vector_count': 1229}
Batch 62 - Current vector count: 1229
Total records processed so far: 31000

Processing batch 63


  df = pd.read_sql(query, conn)


Batch size read from Snowflake: 500 records
Records in this batch: 500
Batch 63 contains 1 IDs that were seen in previous batches.
Batch 63 contains duplicate IDs!
Batch 63 - Upsert response: {'upserted_count': 500}
Current index stats: {'dimension': 384,
 'index_fullness': 0.0,
 'namespaces': {'': {'vector_count': 1229}},
 'total_vector_count': 1229}
Batch 63 - Current vector count: 1229
Total records processed so far: 31500

Processing batch 64


  df = pd.read_sql(query, conn)


Batch size read from Snowflake: 500 records
Records in this batch: 500
Batch 64 contains 1 IDs that were seen in previous batches.
Batch 64 contains duplicate IDs!
Error upserting embeddings for batch 64:
Traceback (most recent call last):
  File "/Users/sripoojitha/Desktop/recipe-curator/env/lib/python3.11/site-packages/urllib3/connectionpool.py", line 789, in urlopen
    response = self._make_request(
               ^^^^^^^^^^^^^^^^^^^
  File "/Users/sripoojitha/Desktop/recipe-curator/env/lib/python3.11/site-packages/urllib3/connectionpool.py", line 536, in _make_request
    response = conn.getresponse()
               ^^^^^^^^^^^^^^^^^^
  File "/Users/sripoojitha/Desktop/recipe-curator/env/lib/python3.11/site-packages/urllib3/connection.py", line 507, in getresponse
    httplib_response = super().getresponse()
                       ^^^^^^^^^^^^^^^^^^^^^
  File "/Users/sripoojitha/anaconda3/lib/python3.11/http/client.py", line 1375, in getresponse
    response.begin()
  File "/Users

  df = pd.read_sql(query, conn)


Batch size read from Snowflake: 500 records
Records in this batch: 500
Batch 65 contains 1 IDs that were seen in previous batches.
Batch 65 contains duplicate IDs!
Batch 65 - Upsert response: {'upserted_count': 500}
Current index stats: {'dimension': 384,
 'index_fullness': 0.0,
 'namespaces': {'': {'vector_count': 1229}},
 'total_vector_count': 1229}
Batch 65 - Current vector count: 1229
Total records processed so far: 32000

Processing batch 66


  df = pd.read_sql(query, conn)


Batch size read from Snowflake: 500 records
Records in this batch: 500
Batch 66 contains 1 IDs that were seen in previous batches.
Batch 66 contains duplicate IDs!
Batch 66 - Upsert response: {'upserted_count': 500}
Current index stats: {'dimension': 384,
 'index_fullness': 0.0,
 'namespaces': {'': {'vector_count': 1229}},
 'total_vector_count': 1229}
Batch 66 - Current vector count: 1229
Total records processed so far: 32500

Processing batch 67


  df = pd.read_sql(query, conn)


Batch size read from Snowflake: 500 records
Records in this batch: 500
Batch 67 contains 1 IDs that were seen in previous batches.
Batch 67 contains duplicate IDs!
Batch 67 - Upsert response: {'upserted_count': 500}
Current index stats: {'dimension': 384,
 'index_fullness': 0.0,
 'namespaces': {'': {'vector_count': 1229}},
 'total_vector_count': 1229}
Batch 67 - Current vector count: 1229
Total records processed so far: 33000

Processing batch 68


  df = pd.read_sql(query, conn)


Batch size read from Snowflake: 500 records
Records in this batch: 500
Batch 68 contains 1 IDs that were seen in previous batches.
Batch 68 contains duplicate IDs!
Error upserting embeddings for batch 68:
Traceback (most recent call last):
  File "/Users/sripoojitha/Desktop/recipe-curator/env/lib/python3.11/site-packages/urllib3/connectionpool.py", line 789, in urlopen
    response = self._make_request(
               ^^^^^^^^^^^^^^^^^^^
  File "/Users/sripoojitha/Desktop/recipe-curator/env/lib/python3.11/site-packages/urllib3/connectionpool.py", line 536, in _make_request
    response = conn.getresponse()
               ^^^^^^^^^^^^^^^^^^
  File "/Users/sripoojitha/Desktop/recipe-curator/env/lib/python3.11/site-packages/urllib3/connection.py", line 507, in getresponse
    httplib_response = super().getresponse()
                       ^^^^^^^^^^^^^^^^^^^^^
  File "/Users/sripoojitha/anaconda3/lib/python3.11/http/client.py", line 1375, in getresponse
    response.begin()
  File "/Users

  df = pd.read_sql(query, conn)


Batch size read from Snowflake: 500 records
Records in this batch: 500
Batch 69 contains 1 IDs that were seen in previous batches.
Batch 69 contains duplicate IDs!
Error upserting embeddings for batch 69:
Traceback (most recent call last):
  File "/Users/sripoojitha/Desktop/recipe-curator/env/lib/python3.11/site-packages/urllib3/connectionpool.py", line 789, in urlopen
    response = self._make_request(
               ^^^^^^^^^^^^^^^^^^^
  File "/Users/sripoojitha/Desktop/recipe-curator/env/lib/python3.11/site-packages/urllib3/connectionpool.py", line 536, in _make_request
    response = conn.getresponse()
               ^^^^^^^^^^^^^^^^^^
  File "/Users/sripoojitha/Desktop/recipe-curator/env/lib/python3.11/site-packages/urllib3/connection.py", line 507, in getresponse
    httplib_response = super().getresponse()
                       ^^^^^^^^^^^^^^^^^^^^^
  File "/Users/sripoojitha/anaconda3/lib/python3.11/http/client.py", line 1375, in getresponse
    response.begin()
  File "/Users

  df = pd.read_sql(query, conn)


Batch size read from Snowflake: 500 records
Records in this batch: 500
Batch 70 contains 1 IDs that were seen in previous batches.
Batch 70 contains duplicate IDs!
Batch 70 - Upsert response: {'upserted_count': 500}
Current index stats: {'dimension': 384,
 'index_fullness': 0.0,
 'namespaces': {'': {'vector_count': 1229}},
 'total_vector_count': 1229}
Batch 70 - Current vector count: 1229
Total records processed so far: 33500

Processing batch 71


  df = pd.read_sql(query, conn)


Batch size read from Snowflake: 500 records
Records in this batch: 500
Batch 71 contains 1 IDs that were seen in previous batches.
Batch 71 contains duplicate IDs!
Batch 71 - Upsert response: {'upserted_count': 500}
Current index stats: {'dimension': 384,
 'index_fullness': 0.0,
 'namespaces': {'': {'vector_count': 1229}},
 'total_vector_count': 1229}
Batch 71 - Current vector count: 1229
Total records processed so far: 34000

Processing batch 72


  df = pd.read_sql(query, conn)


Batch size read from Snowflake: 500 records
Records in this batch: 500
Batch 72 contains 1 IDs that were seen in previous batches.
Batch 72 contains duplicate IDs!
Batch 72 - Upsert response: {'upserted_count': 500}
Current index stats: {'dimension': 384,
 'index_fullness': 0.0,
 'namespaces': {'': {'vector_count': 1229}},
 'total_vector_count': 1229}
Batch 72 - Current vector count: 1229
Total records processed so far: 34500

Processing batch 73


  df = pd.read_sql(query, conn)


Batch size read from Snowflake: 500 records
Records in this batch: 500
Batch 73 contains 1 IDs that were seen in previous batches.
Batch 73 contains duplicate IDs!
Batch 73 - Upsert response: {'upserted_count': 500}
Current index stats: {'dimension': 384,
 'index_fullness': 0.0,
 'namespaces': {'': {'vector_count': 1229}},
 'total_vector_count': 1229}
Batch 73 - Current vector count: 1229
Total records processed so far: 35000

Processing batch 74


  df = pd.read_sql(query, conn)


Batch size read from Snowflake: 500 records
Records in this batch: 500
Batch 74 contains 1 IDs that were seen in previous batches.
Batch 74 contains duplicate IDs!
Batch 74 - Upsert response: {'upserted_count': 500}
Current index stats: {'dimension': 384,
 'index_fullness': 0.0,
 'namespaces': {'': {'vector_count': 1229}},
 'total_vector_count': 1229}
Batch 74 - Current vector count: 1229
Total records processed so far: 35500

Processing batch 75


  df = pd.read_sql(query, conn)


Batch size read from Snowflake: 500 records
Records in this batch: 500
Batch 75 contains 1 IDs that were seen in previous batches.
Batch 75 contains duplicate IDs!
Batch 75 - Upsert response: {'upserted_count': 500}
Current index stats: {'dimension': 384,
 'index_fullness': 0.0,
 'namespaces': {'': {'vector_count': 1229}},
 'total_vector_count': 1229}
Batch 75 - Current vector count: 1229
Total records processed so far: 36000

Processing batch 76


  df = pd.read_sql(query, conn)


Batch size read from Snowflake: 500 records
Records in this batch: 500
Batch 76 contains 1 IDs that were seen in previous batches.
Batch 76 contains duplicate IDs!
Batch 76 - Upsert response: {'upserted_count': 500}
Current index stats: {'dimension': 384,
 'index_fullness': 0.0,
 'namespaces': {'': {'vector_count': 1229}},
 'total_vector_count': 1229}
Batch 76 - Current vector count: 1229
Total records processed so far: 36500

Processing batch 77


  df = pd.read_sql(query, conn)


Batch size read from Snowflake: 500 records
Records in this batch: 500
Batch 77 contains 1 IDs that were seen in previous batches.
Batch 77 contains duplicate IDs!
Batch 77 - Upsert response: {'upserted_count': 500}
Current index stats: {'dimension': 384,
 'index_fullness': 0.0,
 'namespaces': {'': {'vector_count': 1229}},
 'total_vector_count': 1229}
Batch 77 - Current vector count: 1229
Total records processed so far: 37000

Processing batch 78


  df = pd.read_sql(query, conn)


Batch size read from Snowflake: 500 records
Records in this batch: 500
Batch 78 contains 1 IDs that were seen in previous batches.
Batch 78 contains duplicate IDs!
Batch 78 - Upsert response: {'upserted_count': 500}
Current index stats: {'dimension': 384,
 'index_fullness': 0.0,
 'namespaces': {'': {'vector_count': 1229}},
 'total_vector_count': 1229}
Batch 78 - Current vector count: 1229
Total records processed so far: 37500

Processing batch 79


  df = pd.read_sql(query, conn)


Batch size read from Snowflake: 500 records
Records in this batch: 500
Batch 79 contains 1 IDs that were seen in previous batches.
Batch 79 contains duplicate IDs!
Batch 79 - Upsert response: {'upserted_count': 500}
Current index stats: {'dimension': 384,
 'index_fullness': 0.0,
 'namespaces': {'': {'vector_count': 1229}},
 'total_vector_count': 1229}
Batch 79 - Current vector count: 1229
Total records processed so far: 38000

Processing batch 80


  df = pd.read_sql(query, conn)


Batch size read from Snowflake: 500 records
Records in this batch: 500
Batch 80 contains 1 IDs that were seen in previous batches.
Batch 80 contains duplicate IDs!
Batch 80 - Upsert response: {'upserted_count': 500}
Current index stats: {'dimension': 384,
 'index_fullness': 0.0,
 'namespaces': {'': {'vector_count': 1229}},
 'total_vector_count': 1229}
Batch 80 - Current vector count: 1229
Total records processed so far: 38500

Processing batch 81


  df = pd.read_sql(query, conn)


Batch size read from Snowflake: 500 records
Records in this batch: 500
Batch 81 contains 1 IDs that were seen in previous batches.
Batch 81 contains duplicate IDs!
Batch 81 - Upsert response: {'upserted_count': 500}
Current index stats: {'dimension': 384,
 'index_fullness': 0.0,
 'namespaces': {'': {'vector_count': 1229}},
 'total_vector_count': 1229}
Batch 81 - Current vector count: 1229
Total records processed so far: 39000

Processing batch 82


  df = pd.read_sql(query, conn)


Batch size read from Snowflake: 500 records
Records in this batch: 500
Batch 82 contains 1 IDs that were seen in previous batches.
Batch 82 contains duplicate IDs!
Batch 82 - Upsert response: {'upserted_count': 500}
Current index stats: {'dimension': 384,
 'index_fullness': 0.0,
 'namespaces': {'': {'vector_count': 1229}},
 'total_vector_count': 1229}
Batch 82 - Current vector count: 1229
Total records processed so far: 39500

Processing batch 83


  df = pd.read_sql(query, conn)


Batch size read from Snowflake: 500 records
Records in this batch: 500
Batch 83 contains 1 IDs that were seen in previous batches.
Batch 83 contains duplicate IDs!
Batch 83 - Upsert response: {'upserted_count': 500}
Current index stats: {'dimension': 384,
 'index_fullness': 0.0,
 'namespaces': {'': {'vector_count': 1229}},
 'total_vector_count': 1229}
Batch 83 - Current vector count: 1229
Total records processed so far: 40000

Processing batch 84


  df = pd.read_sql(query, conn)


Batch size read from Snowflake: 500 records
Records in this batch: 500
Batch 84 contains 1 IDs that were seen in previous batches.
Batch 84 contains duplicate IDs!
Batch 84 - Upsert response: {'upserted_count': 500}
Current index stats: {'dimension': 384,
 'index_fullness': 0.0,
 'namespaces': {'': {'vector_count': 1229}},
 'total_vector_count': 1229}
Batch 84 - Current vector count: 1229
Total records processed so far: 40500

Processing batch 85


  df = pd.read_sql(query, conn)


Batch size read from Snowflake: 500 records
Records in this batch: 500
Batch 85 contains 1 IDs that were seen in previous batches.
Batch 85 contains duplicate IDs!
Error upserting embeddings for batch 85:
Traceback (most recent call last):
  File "/Users/sripoojitha/Desktop/recipe-curator/env/lib/python3.11/site-packages/urllib3/connectionpool.py", line 789, in urlopen
    response = self._make_request(
               ^^^^^^^^^^^^^^^^^^^
  File "/Users/sripoojitha/Desktop/recipe-curator/env/lib/python3.11/site-packages/urllib3/connectionpool.py", line 536, in _make_request
    response = conn.getresponse()
               ^^^^^^^^^^^^^^^^^^
  File "/Users/sripoojitha/Desktop/recipe-curator/env/lib/python3.11/site-packages/urllib3/connection.py", line 507, in getresponse
    httplib_response = super().getresponse()
                       ^^^^^^^^^^^^^^^^^^^^^
  File "/Users/sripoojitha/anaconda3/lib/python3.11/http/client.py", line 1375, in getresponse
    response.begin()
  File "/Users

  df = pd.read_sql(query, conn)


Batch size read from Snowflake: 500 records
Records in this batch: 500
Batch 86 contains 1 IDs that were seen in previous batches.
Batch 86 contains duplicate IDs!
Batch 86 - Upsert response: {'upserted_count': 500}
Current index stats: {'dimension': 384,
 'index_fullness': 0.0,
 'namespaces': {'': {'vector_count': 1229}},
 'total_vector_count': 1229}
Batch 86 - Current vector count: 1229
Total records processed so far: 41000

Processing batch 87


  df = pd.read_sql(query, conn)


Batch size read from Snowflake: 500 records
Records in this batch: 500
Batch 87 contains 1 IDs that were seen in previous batches.
Batch 87 contains duplicate IDs!
Batch 87 - Upsert response: {'upserted_count': 500}
Current index stats: {'dimension': 384,
 'index_fullness': 0.0,
 'namespaces': {'': {'vector_count': 1229}},
 'total_vector_count': 1229}
Batch 87 - Current vector count: 1229
Total records processed so far: 41500

Processing batch 88


  df = pd.read_sql(query, conn)


Batch size read from Snowflake: 500 records
Records in this batch: 500
Batch 88 contains 1 IDs that were seen in previous batches.
Batch 88 contains duplicate IDs!
Batch 88 - Upsert response: {'upserted_count': 500}
Current index stats: {'dimension': 384,
 'index_fullness': 0.0,
 'namespaces': {'': {'vector_count': 1229}},
 'total_vector_count': 1229}
Batch 88 - Current vector count: 1229
Total records processed so far: 42000

Processing batch 89


  df = pd.read_sql(query, conn)


Batch size read from Snowflake: 500 records
Records in this batch: 500
Batch 89 contains 1 IDs that were seen in previous batches.
Batch 89 contains duplicate IDs!
Batch 89 - Upsert response: {'upserted_count': 500}
Current index stats: {'dimension': 384,
 'index_fullness': 0.0,
 'namespaces': {'': {'vector_count': 1229}},
 'total_vector_count': 1229}
Batch 89 - Current vector count: 1229
Total records processed so far: 42500

Processing batch 90


  df = pd.read_sql(query, conn)


Batch size read from Snowflake: 500 records
Records in this batch: 500
Batch 90 contains 1 IDs that were seen in previous batches.
Batch 90 contains duplicate IDs!
Batch 90 - Upsert response: {'upserted_count': 500}
Current index stats: {'dimension': 384,
 'index_fullness': 0.0,
 'namespaces': {'': {'vector_count': 1229}},
 'total_vector_count': 1229}
Batch 90 - Current vector count: 1229
Total records processed so far: 43000

Processing batch 91


  df = pd.read_sql(query, conn)


Batch size read from Snowflake: 500 records
Records in this batch: 500
Batch 91 contains 1 IDs that were seen in previous batches.
Batch 91 contains duplicate IDs!
Batch 91 - Upsert response: {'upserted_count': 500}
Current index stats: {'dimension': 384,
 'index_fullness': 0.0,
 'namespaces': {'': {'vector_count': 1229}},
 'total_vector_count': 1229}
Batch 91 - Current vector count: 1229
Total records processed so far: 43500

Processing batch 92


  df = pd.read_sql(query, conn)


Batch size read from Snowflake: 500 records
Records in this batch: 500
Batch 92 contains 1 IDs that were seen in previous batches.
Batch 92 contains duplicate IDs!
Batch 92 - Upsert response: {'upserted_count': 500}
Current index stats: {'dimension': 384,
 'index_fullness': 0.0,
 'namespaces': {'': {'vector_count': 1229}},
 'total_vector_count': 1229}
Batch 92 - Current vector count: 1229
Total records processed so far: 44000

Processing batch 93


  df = pd.read_sql(query, conn)


Batch size read from Snowflake: 500 records
Records in this batch: 500
Batch 93 contains 1 IDs that were seen in previous batches.
Batch 93 contains duplicate IDs!
Batch 93 - Upsert response: {'upserted_count': 500}
Current index stats: {'dimension': 384,
 'index_fullness': 0.0,
 'namespaces': {'': {'vector_count': 1229}},
 'total_vector_count': 1229}
Batch 93 - Current vector count: 1229
Total records processed so far: 44500

Processing batch 94


  df = pd.read_sql(query, conn)


Batch size read from Snowflake: 500 records
Records in this batch: 500
Batch 94 contains 1 IDs that were seen in previous batches.
Batch 94 contains duplicate IDs!
Batch 94 - Upsert response: {'upserted_count': 500}
Current index stats: {'dimension': 384,
 'index_fullness': 0.0,
 'namespaces': {'': {'vector_count': 1229}},
 'total_vector_count': 1229}
Batch 94 - Current vector count: 1229
Total records processed so far: 45000

Processing batch 95


  df = pd.read_sql(query, conn)


Batch size read from Snowflake: 500 records
Records in this batch: 500
Batch 95 contains 1 IDs that were seen in previous batches.
Batch 95 contains duplicate IDs!
Batch 95 - Upsert response: {'upserted_count': 500}
Current index stats: {'dimension': 384,
 'index_fullness': 0.0,
 'namespaces': {'': {'vector_count': 1229}},
 'total_vector_count': 1229}
Batch 95 - Current vector count: 1229
Total records processed so far: 45500

Processing batch 96


  df = pd.read_sql(query, conn)


Batch size read from Snowflake: 500 records
Records in this batch: 500
Batch 96 contains 1 IDs that were seen in previous batches.
Batch 96 contains duplicate IDs!
Batch 96 - Upsert response: {'upserted_count': 500}
Current index stats: {'dimension': 384,
 'index_fullness': 0.0,
 'namespaces': {'': {'vector_count': 1229}},
 'total_vector_count': 1229}
Batch 96 - Current vector count: 1229
Total records processed so far: 46000

Processing batch 97


  df = pd.read_sql(query, conn)


Batch size read from Snowflake: 500 records
Records in this batch: 500
Batch 97 contains 1 IDs that were seen in previous batches.
Batch 97 contains duplicate IDs!
Batch 97 - Upsert response: {'upserted_count': 500}
Current index stats: {'dimension': 384,
 'index_fullness': 0.0,
 'namespaces': {'': {'vector_count': 1229}},
 'total_vector_count': 1229}
Batch 97 - Current vector count: 1229
Total records processed so far: 46500

Processing batch 98


  df = pd.read_sql(query, conn)


Batch size read from Snowflake: 500 records
Records in this batch: 500
Batch 98 contains 1 IDs that were seen in previous batches.
Batch 98 contains duplicate IDs!
Batch 98 - Upsert response: {'upserted_count': 500}
Current index stats: {'dimension': 384,
 'index_fullness': 0.0,
 'namespaces': {'': {'vector_count': 1229}},
 'total_vector_count': 1229}
Batch 98 - Current vector count: 1229
Total records processed so far: 47000

Processing batch 99


  df = pd.read_sql(query, conn)


Batch size read from Snowflake: 500 records
Records in this batch: 500
Batch 99 contains 1 IDs that were seen in previous batches.
Batch 99 contains duplicate IDs!
Batch 99 - Upsert response: {'upserted_count': 500}
Current index stats: {'dimension': 384,
 'index_fullness': 0.0,
 'namespaces': {'': {'vector_count': 1229}},
 'total_vector_count': 1229}
Batch 99 - Current vector count: 1229
Total records processed so far: 47500

Processing batch 100


  df = pd.read_sql(query, conn)


Batch size read from Snowflake: 500 records
Records in this batch: 500
Batch 100 contains 1 IDs that were seen in previous batches.
Batch 100 contains duplicate IDs!
Batch 100 - Upsert response: {'upserted_count': 500}
Current index stats: {'dimension': 384,
 'index_fullness': 0.0,
 'namespaces': {'': {'vector_count': 1229}},
 'total_vector_count': 1229}
Batch 100 - Current vector count: 1229
Total records processed so far: 48000

Processing batch 101


  df = pd.read_sql(query, conn)


Batch size read from Snowflake: 500 records
Records in this batch: 500
Batch 101 contains 1 IDs that were seen in previous batches.
Batch 101 contains duplicate IDs!
Batch 101 - Upsert response: {'upserted_count': 500}
Current index stats: {'dimension': 384,
 'index_fullness': 0.0,
 'namespaces': {'': {'vector_count': 1229}},
 'total_vector_count': 1229}
Batch 101 - Current vector count: 1229
Total records processed so far: 48500

Processing batch 102


  df = pd.read_sql(query, conn)


Batch size read from Snowflake: 500 records
Records in this batch: 500
Batch 102 contains 1 IDs that were seen in previous batches.
Batch 102 contains duplicate IDs!
Batch 102 - Upsert response: {'upserted_count': 500}
Current index stats: {'dimension': 384,
 'index_fullness': 0.0,
 'namespaces': {'': {'vector_count': 1229}},
 'total_vector_count': 1229}
Batch 102 - Current vector count: 1229
Total records processed so far: 49000

Processing batch 103


  df = pd.read_sql(query, conn)


Batch size read from Snowflake: 500 records
Records in this batch: 500
Batch 103 contains 1 IDs that were seen in previous batches.
Batch 103 contains duplicate IDs!
Batch 103 - Upsert response: {'upserted_count': 500}
Current index stats: {'dimension': 384,
 'index_fullness': 0.0,
 'namespaces': {'': {'vector_count': 1229}},
 'total_vector_count': 1229}
Batch 103 - Current vector count: 1229
Total records processed so far: 49500

Processing batch 104


  df = pd.read_sql(query, conn)


Batch size read from Snowflake: 500 records
Records in this batch: 500
Batch 104 contains 1 IDs that were seen in previous batches.
Batch 104 contains duplicate IDs!
Batch 104 - Upsert response: {'upserted_count': 500}
Current index stats: {'dimension': 384,
 'index_fullness': 0.0,
 'namespaces': {'': {'vector_count': 1229}},
 'total_vector_count': 1229}
Batch 104 - Current vector count: 1229
Total records processed so far: 50000

Processing batch 105


  df = pd.read_sql(query, conn)


Batch size read from Snowflake: 500 records
Records in this batch: 500
Batch 105 contains 1 IDs that were seen in previous batches.
Batch 105 contains duplicate IDs!
Batch 105 - Upsert response: {'upserted_count': 500}
Current index stats: {'dimension': 384,
 'index_fullness': 0.0,
 'namespaces': {'': {'vector_count': 1229}},
 'total_vector_count': 1229}
Batch 105 - Current vector count: 1229
Total records processed so far: 50500

Processing batch 106


  df = pd.read_sql(query, conn)


Batch size read from Snowflake: 500 records
Records in this batch: 500
Batch 106 contains 1 IDs that were seen in previous batches.
Batch 106 contains duplicate IDs!
Batch 106 - Upsert response: {'upserted_count': 500}
Current index stats: {'dimension': 384,
 'index_fullness': 0.0,
 'namespaces': {'': {'vector_count': 1229}},
 'total_vector_count': 1229}
Batch 106 - Current vector count: 1229
Total records processed so far: 51000

Processing batch 107


  df = pd.read_sql(query, conn)


Batch size read from Snowflake: 500 records
Records in this batch: 500
Batch 107 contains 1 IDs that were seen in previous batches.
Batch 107 contains duplicate IDs!
Batch 107 - Upsert response: {'upserted_count': 500}
Current index stats: {'dimension': 384,
 'index_fullness': 0.0,
 'namespaces': {'': {'vector_count': 1229}},
 'total_vector_count': 1229}
Batch 107 - Current vector count: 1229
Total records processed so far: 51500

Processing batch 108


  df = pd.read_sql(query, conn)


Batch size read from Snowflake: 500 records
Records in this batch: 500
Batch 108 contains 1 IDs that were seen in previous batches.
Batch 108 contains duplicate IDs!
Batch 108 - Upsert response: {'upserted_count': 500}
Current index stats: {'dimension': 384,
 'index_fullness': 0.0,
 'namespaces': {'': {'vector_count': 1229}},
 'total_vector_count': 1229}
Batch 108 - Current vector count: 1229
Total records processed so far: 52000

Processing batch 109


  df = pd.read_sql(query, conn)


Batch size read from Snowflake: 500 records
Records in this batch: 500
Batch 109 contains 1 IDs that were seen in previous batches.
Batch 109 contains duplicate IDs!
Batch 109 - Upsert response: {'upserted_count': 500}
Current index stats: {'dimension': 384,
 'index_fullness': 0.0,
 'namespaces': {'': {'vector_count': 1229}},
 'total_vector_count': 1229}
Batch 109 - Current vector count: 1229
Total records processed so far: 52500

Processing batch 110


  df = pd.read_sql(query, conn)


Batch size read from Snowflake: 500 records
Records in this batch: 500
Batch 110 contains 1 IDs that were seen in previous batches.
Batch 110 contains duplicate IDs!
Batch 110 - Upsert response: {'upserted_count': 500}
Current index stats: {'dimension': 384,
 'index_fullness': 0.0,
 'namespaces': {'': {'vector_count': 1229}},
 'total_vector_count': 1229}
Batch 110 - Current vector count: 1229
Total records processed so far: 53000

Processing batch 111


  df = pd.read_sql(query, conn)


Batch size read from Snowflake: 500 records
Records in this batch: 500
Batch 111 contains 1 IDs that were seen in previous batches.
Batch 111 contains duplicate IDs!
Error upserting embeddings for batch 111:
Traceback (most recent call last):
  File "/Users/sripoojitha/Desktop/recipe-curator/env/lib/python3.11/site-packages/urllib3/connectionpool.py", line 789, in urlopen
    response = self._make_request(
               ^^^^^^^^^^^^^^^^^^^
  File "/Users/sripoojitha/Desktop/recipe-curator/env/lib/python3.11/site-packages/urllib3/connectionpool.py", line 536, in _make_request
    response = conn.getresponse()
               ^^^^^^^^^^^^^^^^^^
  File "/Users/sripoojitha/Desktop/recipe-curator/env/lib/python3.11/site-packages/urllib3/connection.py", line 507, in getresponse
    httplib_response = super().getresponse()
                       ^^^^^^^^^^^^^^^^^^^^^
  File "/Users/sripoojitha/anaconda3/lib/python3.11/http/client.py", line 1375, in getresponse
    response.begin()
  File "/Us

  df = pd.read_sql(query, conn)


Batch size read from Snowflake: 500 records
Records in this batch: 500
Batch 112 contains 1 IDs that were seen in previous batches.
Batch 112 contains duplicate IDs!
Batch 112 - Upsert response: {'upserted_count': 500}
Current index stats: {'dimension': 384,
 'index_fullness': 0.0,
 'namespaces': {'': {'vector_count': 1229}},
 'total_vector_count': 1229}
Batch 112 - Current vector count: 1229
Total records processed so far: 53500

Processing batch 113


  df = pd.read_sql(query, conn)


Batch size read from Snowflake: 500 records
Records in this batch: 500
Batch 113 contains 1 IDs that were seen in previous batches.
Batch 113 contains duplicate IDs!
Batch 113 - Upsert response: {'upserted_count': 500}
Current index stats: {'dimension': 384,
 'index_fullness': 0.0,
 'namespaces': {'': {'vector_count': 1229}},
 'total_vector_count': 1229}
Batch 113 - Current vector count: 1229
Total records processed so far: 54000

Processing batch 114


  df = pd.read_sql(query, conn)


Batch size read from Snowflake: 500 records
Records in this batch: 500
Batch 114 contains 1 IDs that were seen in previous batches.
Batch 114 contains duplicate IDs!
Batch 114 - Upsert response: {'upserted_count': 500}
Current index stats: {'dimension': 384,
 'index_fullness': 0.0,
 'namespaces': {'': {'vector_count': 1229}},
 'total_vector_count': 1229}
Batch 114 - Current vector count: 1229
Total records processed so far: 54500

Processing batch 115


  df = pd.read_sql(query, conn)


Batch size read from Snowflake: 500 records
Records in this batch: 500
Batch 115 contains 1 IDs that were seen in previous batches.
Batch 115 contains duplicate IDs!
Error upserting embeddings for batch 115:
Traceback (most recent call last):
  File "/Users/sripoojitha/Desktop/recipe-curator/env/lib/python3.11/site-packages/urllib3/connectionpool.py", line 789, in urlopen
    response = self._make_request(
               ^^^^^^^^^^^^^^^^^^^
  File "/Users/sripoojitha/Desktop/recipe-curator/env/lib/python3.11/site-packages/urllib3/connectionpool.py", line 536, in _make_request
    response = conn.getresponse()
               ^^^^^^^^^^^^^^^^^^
  File "/Users/sripoojitha/Desktop/recipe-curator/env/lib/python3.11/site-packages/urllib3/connection.py", line 507, in getresponse
    httplib_response = super().getresponse()
                       ^^^^^^^^^^^^^^^^^^^^^
  File "/Users/sripoojitha/anaconda3/lib/python3.11/http/client.py", line 1375, in getresponse
    response.begin()
  File "/Us

  df = pd.read_sql(query, conn)


Batch size read from Snowflake: 500 records
Records in this batch: 500
Batch 116 contains 1 IDs that were seen in previous batches.
Batch 116 contains duplicate IDs!
Batch 116 - Upsert response: {'upserted_count': 500}
Current index stats: {'dimension': 384,
 'index_fullness': 0.0,
 'namespaces': {'': {'vector_count': 1229}},
 'total_vector_count': 1229}
Batch 116 - Current vector count: 1229
Total records processed so far: 55000

Processing batch 117


  df = pd.read_sql(query, conn)


Batch size read from Snowflake: 500 records
Records in this batch: 500
Batch 117 contains 1 IDs that were seen in previous batches.
Batch 117 contains duplicate IDs!
Batch 117 - Upsert response: {'upserted_count': 500}
Current index stats: {'dimension': 384,
 'index_fullness': 0.0,
 'namespaces': {'': {'vector_count': 1229}},
 'total_vector_count': 1229}
Batch 117 - Current vector count: 1229
Total records processed so far: 55500

Processing batch 118


  df = pd.read_sql(query, conn)


Batch size read from Snowflake: 500 records
Records in this batch: 500
Batch 118 contains 1 IDs that were seen in previous batches.
Batch 118 contains duplicate IDs!
Batch 118 - Upsert response: {'upserted_count': 500}
Current index stats: {'dimension': 384,
 'index_fullness': 0.0,
 'namespaces': {'': {'vector_count': 1229}},
 'total_vector_count': 1229}
Batch 118 - Current vector count: 1229
Total records processed so far: 56000

Processing batch 119


  df = pd.read_sql(query, conn)


Batch size read from Snowflake: 500 records
Records in this batch: 500
Batch 119 contains 1 IDs that were seen in previous batches.
Batch 119 contains duplicate IDs!
Batch 119 - Upsert response: {'upserted_count': 500}
Current index stats: {'dimension': 384,
 'index_fullness': 0.0,
 'namespaces': {'': {'vector_count': 1229}},
 'total_vector_count': 1229}
Batch 119 - Current vector count: 1229
Total records processed so far: 56500

Processing batch 120


  df = pd.read_sql(query, conn)


Batch size read from Snowflake: 500 records
Records in this batch: 500
Batch 120 contains 1 IDs that were seen in previous batches.
Batch 120 contains duplicate IDs!
Batch 120 - Upsert response: {'upserted_count': 500}
Current index stats: {'dimension': 384,
 'index_fullness': 0.0,
 'namespaces': {'': {'vector_count': 1229}},
 'total_vector_count': 1229}
Batch 120 - Current vector count: 1229
Total records processed so far: 57000

Processing batch 121


  df = pd.read_sql(query, conn)


Batch size read from Snowflake: 500 records
Records in this batch: 500
Batch 121 contains 1 IDs that were seen in previous batches.
Batch 121 contains duplicate IDs!
Error upserting embeddings for batch 121:
Traceback (most recent call last):
  File "/Users/sripoojitha/Desktop/recipe-curator/env/lib/python3.11/site-packages/urllib3/connectionpool.py", line 789, in urlopen
    response = self._make_request(
               ^^^^^^^^^^^^^^^^^^^
  File "/Users/sripoojitha/Desktop/recipe-curator/env/lib/python3.11/site-packages/urllib3/connectionpool.py", line 536, in _make_request
    response = conn.getresponse()
               ^^^^^^^^^^^^^^^^^^
  File "/Users/sripoojitha/Desktop/recipe-curator/env/lib/python3.11/site-packages/urllib3/connection.py", line 507, in getresponse
    httplib_response = super().getresponse()
                       ^^^^^^^^^^^^^^^^^^^^^
  File "/Users/sripoojitha/anaconda3/lib/python3.11/http/client.py", line 1375, in getresponse
    response.begin()
  File "/Us

  df = pd.read_sql(query, conn)


Batch size read from Snowflake: 500 records
Records in this batch: 500
Batch 122 contains 1 IDs that were seen in previous batches.
Batch 122 contains duplicate IDs!
Batch 122 - Upsert response: {'upserted_count': 500}
Current index stats: {'dimension': 384,
 'index_fullness': 0.0,
 'namespaces': {'': {'vector_count': 1229}},
 'total_vector_count': 1229}
Batch 122 - Current vector count: 1229
Total records processed so far: 57500

Processing batch 123


  df = pd.read_sql(query, conn)


Batch size read from Snowflake: 500 records
Records in this batch: 500
Batch 123 contains 1 IDs that were seen in previous batches.
Batch 123 contains duplicate IDs!
Batch 123 - Upsert response: {'upserted_count': 500}
Current index stats: {'dimension': 384,
 'index_fullness': 0.0,
 'namespaces': {'': {'vector_count': 1229}},
 'total_vector_count': 1229}
Batch 123 - Current vector count: 1229
Total records processed so far: 58000

Processing batch 124


  df = pd.read_sql(query, conn)


Batch size read from Snowflake: 500 records
Records in this batch: 500
Batch 124 contains 1 IDs that were seen in previous batches.
Batch 124 contains duplicate IDs!
Batch 124 - Upsert response: {'upserted_count': 500}
Current index stats: {'dimension': 384,
 'index_fullness': 0.0,
 'namespaces': {'': {'vector_count': 1229}},
 'total_vector_count': 1229}
Batch 124 - Current vector count: 1229
Total records processed so far: 58500

Processing batch 125


  df = pd.read_sql(query, conn)


Batch size read from Snowflake: 500 records
Records in this batch: 500
Batch 125 contains 1 IDs that were seen in previous batches.
Batch 125 contains duplicate IDs!
Batch 125 - Upsert response: {'upserted_count': 500}
Current index stats: {'dimension': 384,
 'index_fullness': 0.0,
 'namespaces': {'': {'vector_count': 1229}},
 'total_vector_count': 1229}
Batch 125 - Current vector count: 1229
Total records processed so far: 59000

Processing batch 126


  df = pd.read_sql(query, conn)


Batch size read from Snowflake: 500 records
Records in this batch: 500
Batch 126 contains 1 IDs that were seen in previous batches.
Batch 126 contains duplicate IDs!
Batch 126 - Upsert response: {'upserted_count': 500}
Current index stats: {'dimension': 384,
 'index_fullness': 0.0,
 'namespaces': {'': {'vector_count': 1229}},
 'total_vector_count': 1229}
Batch 126 - Current vector count: 1229
Total records processed so far: 59500

Processing batch 127


  df = pd.read_sql(query, conn)


Batch size read from Snowflake: 500 records
Records in this batch: 500
Batch 127 contains 1 IDs that were seen in previous batches.
Batch 127 contains duplicate IDs!
Batch 127 - Upsert response: {'upserted_count': 500}
Current index stats: {'dimension': 384,
 'index_fullness': 0.0,
 'namespaces': {'': {'vector_count': 1229}},
 'total_vector_count': 1229}
Batch 127 - Current vector count: 1229
Total records processed so far: 60000

Processing batch 128


  df = pd.read_sql(query, conn)


Batch size read from Snowflake: 500 records
Records in this batch: 500
Batch 128 contains 1 IDs that were seen in previous batches.
Batch 128 contains duplicate IDs!
Batch 128 - Upsert response: {'upserted_count': 500}
Current index stats: {'dimension': 384,
 'index_fullness': 0.0,
 'namespaces': {'': {'vector_count': 1229}},
 'total_vector_count': 1229}
Batch 128 - Current vector count: 1229
Total records processed so far: 60500

Processing batch 129


  df = pd.read_sql(query, conn)


Batch size read from Snowflake: 500 records
Records in this batch: 500
Batch 129 contains 1 IDs that were seen in previous batches.
Batch 129 contains duplicate IDs!
Batch 129 - Upsert response: {'upserted_count': 500}
Current index stats: {'dimension': 384,
 'index_fullness': 0.0,
 'namespaces': {'': {'vector_count': 1229}},
 'total_vector_count': 1229}
Batch 129 - Current vector count: 1229
Total records processed so far: 61000

Processing batch 130


  df = pd.read_sql(query, conn)


Batch size read from Snowflake: 500 records
Records in this batch: 500
Batch 130 contains 1 IDs that were seen in previous batches.
Batch 130 contains duplicate IDs!
Batch 130 - Upsert response: {'upserted_count': 500}
Current index stats: {'dimension': 384,
 'index_fullness': 0.0,
 'namespaces': {'': {'vector_count': 1229}},
 'total_vector_count': 1229}
Batch 130 - Current vector count: 1229
Total records processed so far: 61500

Processing batch 131


  df = pd.read_sql(query, conn)


Batch size read from Snowflake: 500 records
Records in this batch: 500
Batch 131 contains 1 IDs that were seen in previous batches.
Batch 131 contains duplicate IDs!
Batch 131 - Upsert response: {'upserted_count': 500}
Current index stats: {'dimension': 384,
 'index_fullness': 0.0,
 'namespaces': {'': {'vector_count': 1229}},
 'total_vector_count': 1229}
Batch 131 - Current vector count: 1229
Total records processed so far: 62000

Processing batch 132


  df = pd.read_sql(query, conn)


Batch size read from Snowflake: 500 records
Records in this batch: 500
Batch 132 contains 1 IDs that were seen in previous batches.
Batch 132 contains duplicate IDs!
Batch 132 - Upsert response: {'upserted_count': 500}
Current index stats: {'dimension': 384,
 'index_fullness': 0.0,
 'namespaces': {'': {'vector_count': 1229}},
 'total_vector_count': 1229}
Batch 132 - Current vector count: 1229
Total records processed so far: 62500

Processing batch 133


  df = pd.read_sql(query, conn)


Batch size read from Snowflake: 500 records
Records in this batch: 500
Batch 133 contains 1 IDs that were seen in previous batches.
Batch 133 contains duplicate IDs!
Batch 133 - Upsert response: {'upserted_count': 500}
Current index stats: {'dimension': 384,
 'index_fullness': 0.0,
 'namespaces': {'': {'vector_count': 1229}},
 'total_vector_count': 1229}
Batch 133 - Current vector count: 1229
Total records processed so far: 63000

Processing batch 134


  df = pd.read_sql(query, conn)


Batch size read from Snowflake: 500 records
Records in this batch: 500
Batch 134 contains 1 IDs that were seen in previous batches.
Batch 134 contains duplicate IDs!
Batch 134 - Upsert response: {'upserted_count': 500}
Current index stats: {'dimension': 384,
 'index_fullness': 0.0,
 'namespaces': {'': {'vector_count': 1229}},
 'total_vector_count': 1229}
Batch 134 - Current vector count: 1229
Total records processed so far: 63500

Processing batch 135


  df = pd.read_sql(query, conn)


Batch size read from Snowflake: 500 records
Records in this batch: 500
Batch 135 contains 1 IDs that were seen in previous batches.
Batch 135 contains duplicate IDs!
Batch 135 - Upsert response: {'upserted_count': 500}
Current index stats: {'dimension': 384,
 'index_fullness': 0.0,
 'namespaces': {'': {'vector_count': 1229}},
 'total_vector_count': 1229}
Batch 135 - Current vector count: 1229
Total records processed so far: 64000

Processing batch 136


  df = pd.read_sql(query, conn)


Batch size read from Snowflake: 500 records
Records in this batch: 500
Batch 136 contains 1 IDs that were seen in previous batches.
Batch 136 contains duplicate IDs!
Batch 136 - Upsert response: {'upserted_count': 500}
Current index stats: {'dimension': 384,
 'index_fullness': 0.0,
 'namespaces': {'': {'vector_count': 1229}},
 'total_vector_count': 1229}
Batch 136 - Current vector count: 1229
Total records processed so far: 64500

Processing batch 137


  df = pd.read_sql(query, conn)


Batch size read from Snowflake: 500 records
Records in this batch: 500
Batch 137 contains 1 IDs that were seen in previous batches.
Batch 137 contains duplicate IDs!
Batch 137 - Upsert response: {'upserted_count': 500}
Current index stats: {'dimension': 384,
 'index_fullness': 0.0,
 'namespaces': {'': {'vector_count': 1229}},
 'total_vector_count': 1229}
Batch 137 - Current vector count: 1229
Total records processed so far: 65000

Processing batch 138


  df = pd.read_sql(query, conn)


Batch size read from Snowflake: 500 records
Records in this batch: 500
Batch 138 contains 1 IDs that were seen in previous batches.
Batch 138 contains duplicate IDs!
Batch 138 - Upsert response: {'upserted_count': 500}
Current index stats: {'dimension': 384,
 'index_fullness': 0.0,
 'namespaces': {'': {'vector_count': 1229}},
 'total_vector_count': 1229}
Batch 138 - Current vector count: 1229
Total records processed so far: 65500

Processing batch 139


  df = pd.read_sql(query, conn)


Batch size read from Snowflake: 500 records
Records in this batch: 500
Batch 139 contains 1 IDs that were seen in previous batches.
Batch 139 contains duplicate IDs!
Batch 139 - Upsert response: {'upserted_count': 500}
Current index stats: {'dimension': 384,
 'index_fullness': 0.0,
 'namespaces': {'': {'vector_count': 1229}},
 'total_vector_count': 1229}
Batch 139 - Current vector count: 1229
Total records processed so far: 66000

Processing batch 140


  df = pd.read_sql(query, conn)


Batch size read from Snowflake: 500 records
Records in this batch: 500
Batch 140 contains 1 IDs that were seen in previous batches.
Batch 140 contains duplicate IDs!
Batch 140 - Upsert response: {'upserted_count': 500}
Current index stats: {'dimension': 384,
 'index_fullness': 0.0,
 'namespaces': {'': {'vector_count': 1229}},
 'total_vector_count': 1229}
Batch 140 - Current vector count: 1229
Total records processed so far: 66500

Processing batch 141


  df = pd.read_sql(query, conn)


Batch size read from Snowflake: 500 records
Records in this batch: 500
Batch 141 contains 1 IDs that were seen in previous batches.
Batch 141 contains duplicate IDs!
Batch 141 - Upsert response: {'upserted_count': 500}
Current index stats: {'dimension': 384,
 'index_fullness': 0.0,
 'namespaces': {'': {'vector_count': 1229}},
 'total_vector_count': 1229}
Batch 141 - Current vector count: 1229
Total records processed so far: 67000

Processing batch 142


  df = pd.read_sql(query, conn)


Batch size read from Snowflake: 500 records
Records in this batch: 500
Batch 142 contains 1 IDs that were seen in previous batches.
Batch 142 contains duplicate IDs!
Error upserting embeddings for batch 142:
Traceback (most recent call last):
  File "/Users/sripoojitha/Desktop/recipe-curator/env/lib/python3.11/site-packages/urllib3/connectionpool.py", line 789, in urlopen
    response = self._make_request(
               ^^^^^^^^^^^^^^^^^^^
  File "/Users/sripoojitha/Desktop/recipe-curator/env/lib/python3.11/site-packages/urllib3/connectionpool.py", line 536, in _make_request
    response = conn.getresponse()
               ^^^^^^^^^^^^^^^^^^
  File "/Users/sripoojitha/Desktop/recipe-curator/env/lib/python3.11/site-packages/urllib3/connection.py", line 507, in getresponse
    httplib_response = super().getresponse()
                       ^^^^^^^^^^^^^^^^^^^^^
  File "/Users/sripoojitha/anaconda3/lib/python3.11/http/client.py", line 1375, in getresponse
    response.begin()
  File "/Us

  df = pd.read_sql(query, conn)


Batch size read from Snowflake: 500 records
Records in this batch: 500
Batch 143 contains 1 IDs that were seen in previous batches.
Batch 143 contains duplicate IDs!
Batch 143 - Upsert response: {'upserted_count': 500}
Current index stats: {'dimension': 384,
 'index_fullness': 0.0,
 'namespaces': {'': {'vector_count': 1229}},
 'total_vector_count': 1229}
Batch 143 - Current vector count: 1229
Total records processed so far: 67500

Processing batch 144


  df = pd.read_sql(query, conn)


Batch size read from Snowflake: 500 records
Records in this batch: 500
Batch 144 contains 1 IDs that were seen in previous batches.
Batch 144 contains duplicate IDs!
Batch 144 - Upsert response: {'upserted_count': 500}
Current index stats: {'dimension': 384,
 'index_fullness': 0.0,
 'namespaces': {'': {'vector_count': 1229}},
 'total_vector_count': 1229}
Batch 144 - Current vector count: 1229
Total records processed so far: 68000

Processing batch 145


  df = pd.read_sql(query, conn)


Batch size read from Snowflake: 500 records
Records in this batch: 500
Batch 145 contains 1 IDs that were seen in previous batches.
Batch 145 contains duplicate IDs!
Batch 145 - Upsert response: {'upserted_count': 500}
Current index stats: {'dimension': 384,
 'index_fullness': 0.0,
 'namespaces': {'': {'vector_count': 1229}},
 'total_vector_count': 1229}
Batch 145 - Current vector count: 1229
Total records processed so far: 68500

Processing batch 146


  df = pd.read_sql(query, conn)


Batch size read from Snowflake: 500 records
Records in this batch: 500
Batch 146 contains 1 IDs that were seen in previous batches.
Batch 146 contains duplicate IDs!
Batch 146 - Upsert response: {'upserted_count': 500}
Current index stats: {'dimension': 384,
 'index_fullness': 0.0,
 'namespaces': {'': {'vector_count': 1229}},
 'total_vector_count': 1229}
Batch 146 - Current vector count: 1229
Total records processed so far: 69000

Processing batch 147


  df = pd.read_sql(query, conn)


Batch size read from Snowflake: 500 records
Records in this batch: 500
Batch 147 contains 1 IDs that were seen in previous batches.
Batch 147 contains duplicate IDs!
Batch 147 - Upsert response: {'upserted_count': 500}
Current index stats: {'dimension': 384,
 'index_fullness': 0.0,
 'namespaces': {'': {'vector_count': 1229}},
 'total_vector_count': 1229}
Batch 147 - Current vector count: 1229
Total records processed so far: 69500

Processing batch 148


  df = pd.read_sql(query, conn)


Batch size read from Snowflake: 500 records
Records in this batch: 500
Batch 148 contains 1 IDs that were seen in previous batches.
Batch 148 contains duplicate IDs!
Batch 148 - Upsert response: {'upserted_count': 500}
Current index stats: {'dimension': 384,
 'index_fullness': 0.0,
 'namespaces': {'': {'vector_count': 1229}},
 'total_vector_count': 1229}
Batch 148 - Current vector count: 1229
Total records processed so far: 70000

Processing batch 149


  df = pd.read_sql(query, conn)


Batch size read from Snowflake: 500 records
Records in this batch: 500
Batch 149 contains 1 IDs that were seen in previous batches.
Batch 149 contains duplicate IDs!
Batch 149 - Upsert response: {'upserted_count': 500}
Current index stats: {'dimension': 384,
 'index_fullness': 0.0,
 'namespaces': {'': {'vector_count': 1229}},
 'total_vector_count': 1229}
Batch 149 - Current vector count: 1229
Total records processed so far: 70500

Processing batch 150


  df = pd.read_sql(query, conn)


Batch size read from Snowflake: 500 records
Records in this batch: 500
Batch 150 contains 1 IDs that were seen in previous batches.
Batch 150 contains duplicate IDs!
Batch 150 - Upsert response: {'upserted_count': 500}
Current index stats: {'dimension': 384,
 'index_fullness': 0.0,
 'namespaces': {'': {'vector_count': 1229}},
 'total_vector_count': 1229}
Batch 150 - Current vector count: 1229
Total records processed so far: 71000

Processing batch 151


  df = pd.read_sql(query, conn)


Batch size read from Snowflake: 500 records
Records in this batch: 500
Batch 151 contains 1 IDs that were seen in previous batches.
Batch 151 contains duplicate IDs!
Batch 151 - Upsert response: {'upserted_count': 500}
Current index stats: {'dimension': 384,
 'index_fullness': 0.0,
 'namespaces': {'': {'vector_count': 1229}},
 'total_vector_count': 1229}
Batch 151 - Current vector count: 1229
Total records processed so far: 71500

Processing batch 152


  df = pd.read_sql(query, conn)


Batch size read from Snowflake: 500 records
Records in this batch: 500
Batch 152 contains 1 IDs that were seen in previous batches.
Batch 152 contains duplicate IDs!
Batch 152 - Upsert response: {'upserted_count': 500}
Current index stats: {'dimension': 384,
 'index_fullness': 0.0,
 'namespaces': {'': {'vector_count': 1229}},
 'total_vector_count': 1229}
Batch 152 - Current vector count: 1229
Total records processed so far: 72000

Processing batch 153


  df = pd.read_sql(query, conn)


Batch size read from Snowflake: 500 records
Records in this batch: 500
Batch 153 contains 1 IDs that were seen in previous batches.
Batch 153 contains duplicate IDs!
Batch 153 - Upsert response: {'upserted_count': 500}
Current index stats: {'dimension': 384,
 'index_fullness': 0.0,
 'namespaces': {'': {'vector_count': 1229}},
 'total_vector_count': 1229}
Batch 153 - Current vector count: 1229
Total records processed so far: 72500

Processing batch 154


  df = pd.read_sql(query, conn)


Batch size read from Snowflake: 500 records
Records in this batch: 500
Batch 154 contains 1 IDs that were seen in previous batches.
Batch 154 contains duplicate IDs!
Batch 154 - Upsert response: {'upserted_count': 500}
Current index stats: {'dimension': 384,
 'index_fullness': 0.0,
 'namespaces': {'': {'vector_count': 1229}},
 'total_vector_count': 1229}
Batch 154 - Current vector count: 1229
Total records processed so far: 73000

Processing batch 155


  df = pd.read_sql(query, conn)


Batch size read from Snowflake: 500 records
Records in this batch: 500
Batch 155 contains 1 IDs that were seen in previous batches.
Batch 155 contains duplicate IDs!
Batch 155 - Upsert response: {'upserted_count': 500}
Current index stats: {'dimension': 384,
 'index_fullness': 0.0,
 'namespaces': {'': {'vector_count': 1229}},
 'total_vector_count': 1229}
Batch 155 - Current vector count: 1229
Total records processed so far: 73500

Processing batch 156


  df = pd.read_sql(query, conn)


Batch size read from Snowflake: 500 records
Records in this batch: 500
Batch 156 contains 1 IDs that were seen in previous batches.
Batch 156 contains duplicate IDs!
Batch 156 - Upsert response: {'upserted_count': 500}
Current index stats: {'dimension': 384,
 'index_fullness': 0.0,
 'namespaces': {'': {'vector_count': 1229}},
 'total_vector_count': 1229}
Batch 156 - Current vector count: 1229
Total records processed so far: 74000

Processing batch 157


  df = pd.read_sql(query, conn)


Batch size read from Snowflake: 500 records
Records in this batch: 500
Batch 157 contains 1 IDs that were seen in previous batches.
Batch 157 contains duplicate IDs!
Batch 157 - Upsert response: {'upserted_count': 500}
Current index stats: {'dimension': 384,
 'index_fullness': 0.0,
 'namespaces': {'': {'vector_count': 1229}},
 'total_vector_count': 1229}
Batch 157 - Current vector count: 1229
Total records processed so far: 74500

Processing batch 158


  df = pd.read_sql(query, conn)


Batch size read from Snowflake: 500 records
Records in this batch: 500
Batch 158 contains 1 IDs that were seen in previous batches.
Batch 158 contains duplicate IDs!
Batch 158 - Upsert response: {'upserted_count': 500}
Current index stats: {'dimension': 384,
 'index_fullness': 0.0,
 'namespaces': {'': {'vector_count': 1229}},
 'total_vector_count': 1229}
Batch 158 - Current vector count: 1229
Total records processed so far: 75000

Processing batch 159


  df = pd.read_sql(query, conn)


Batch size read from Snowflake: 500 records
Records in this batch: 500
Batch 159 contains 1 IDs that were seen in previous batches.
Batch 159 contains duplicate IDs!
Batch 159 - Upsert response: {'upserted_count': 500}
Current index stats: {'dimension': 384,
 'index_fullness': 0.0,
 'namespaces': {'': {'vector_count': 1229}},
 'total_vector_count': 1229}
Batch 159 - Current vector count: 1229
Total records processed so far: 75500

Processing batch 160


  df = pd.read_sql(query, conn)


Batch size read from Snowflake: 500 records
Records in this batch: 500
Batch 160 contains 1 IDs that were seen in previous batches.
Batch 160 contains duplicate IDs!
Batch 160 - Upsert response: {'upserted_count': 500}
Current index stats: {'dimension': 384,
 'index_fullness': 0.0,
 'namespaces': {'': {'vector_count': 1229}},
 'total_vector_count': 1229}
Batch 160 - Current vector count: 1229
Total records processed so far: 76000

Processing batch 161


  df = pd.read_sql(query, conn)


Batch size read from Snowflake: 500 records
Records in this batch: 500
Batch 161 contains 1 IDs that were seen in previous batches.
Batch 161 contains duplicate IDs!
Batch 161 - Upsert response: {'upserted_count': 500}
Current index stats: {'dimension': 384,
 'index_fullness': 0.0,
 'namespaces': {'': {'vector_count': 1229}},
 'total_vector_count': 1229}
Batch 161 - Current vector count: 1229
Total records processed so far: 76500

Processing batch 162


  df = pd.read_sql(query, conn)


Batch size read from Snowflake: 500 records
Records in this batch: 500
Batch 162 contains 1 IDs that were seen in previous batches.
Batch 162 contains duplicate IDs!
Batch 162 - Upsert response: {'upserted_count': 500}
Current index stats: {'dimension': 384,
 'index_fullness': 0.0,
 'namespaces': {'': {'vector_count': 1229}},
 'total_vector_count': 1229}
Batch 162 - Current vector count: 1229
Total records processed so far: 77000

Processing batch 163


  df = pd.read_sql(query, conn)


Batch size read from Snowflake: 500 records
Records in this batch: 500
Batch 163 contains 1 IDs that were seen in previous batches.
Batch 163 contains duplicate IDs!
Error upserting embeddings for batch 163:
Traceback (most recent call last):
  File "/Users/sripoojitha/Desktop/recipe-curator/env/lib/python3.11/site-packages/urllib3/connectionpool.py", line 789, in urlopen
    response = self._make_request(
               ^^^^^^^^^^^^^^^^^^^
  File "/Users/sripoojitha/Desktop/recipe-curator/env/lib/python3.11/site-packages/urllib3/connectionpool.py", line 536, in _make_request
    response = conn.getresponse()
               ^^^^^^^^^^^^^^^^^^
  File "/Users/sripoojitha/Desktop/recipe-curator/env/lib/python3.11/site-packages/urllib3/connection.py", line 507, in getresponse
    httplib_response = super().getresponse()
                       ^^^^^^^^^^^^^^^^^^^^^
  File "/Users/sripoojitha/anaconda3/lib/python3.11/http/client.py", line 1375, in getresponse
    response.begin()
  File "/Us

  df = pd.read_sql(query, conn)


Batch size read from Snowflake: 500 records
Records in this batch: 500
Batch 164 contains 1 IDs that were seen in previous batches.
Batch 164 contains duplicate IDs!
Batch 164 - Upsert response: {'upserted_count': 500}
Current index stats: {'dimension': 384,
 'index_fullness': 0.0,
 'namespaces': {'': {'vector_count': 1229}},
 'total_vector_count': 1229}
Batch 164 - Current vector count: 1229
Total records processed so far: 77500

Processing batch 165


  df = pd.read_sql(query, conn)


Batch size read from Snowflake: 500 records
Records in this batch: 500
Batch 165 contains 1 IDs that were seen in previous batches.
Batch 165 contains duplicate IDs!
Batch 165 - Upsert response: {'upserted_count': 500}
Current index stats: {'dimension': 384,
 'index_fullness': 0.0,
 'namespaces': {'': {'vector_count': 1229}},
 'total_vector_count': 1229}
Batch 165 - Current vector count: 1229
Total records processed so far: 78000

Processing batch 166


OperationalError: 250001: Could not connect to Snowflake backend after 1 attempt(s).Aborting