In [12]:
PINECONE_API_KEY = "pcsk_2232sR_5dbMX2ywkArv3PSTeu7hjqJQqwYm7miqSqMsvNmGvaydjgiuzKWv2nYHwmhQD2G"
PINECONE_INDEX_NAME = "vector-spread-demo"
# Demo settings
VECTOR_DIMENSION = 128
NUM_VECTORS = 100

#spec=ServerlessSpec(cloud="aws", region="us-east-1")
CLOUD = 'aws'
REGION = 'us-east-1'

In [2]:
import os
import time
import numpy as np
import matplotlib.pyplot as plt
from pinecone import Pinecone, ServerlessSpec
from sklearn.manifold import TSNE
from sklearn.metrics.pairwise import cosine_similarity
from mpl_toolkits.mplot3d import Axes3D


In [21]:
# UTILITIES & METHODS
def setup_demo_index():
    """
    Connects to Pinecone, creates a new index, and upserts
    NUM_VECTORS random vectors for this demo.
    Returns the index object and the list of vector IDs.
    """
    if PINECONE_API_KEY == "YOUR_PINECONE_API_KEY":
        print("="*80)
        print("!!! ERROR: Please set your PINECONE_API_KEY in the script. !!!")
        print("="*80)
        return None, None

    print(f"Connecting to Pinecone and setting up demo index '{PINECONE_INDEX_NAME}'...")
    pc = Pinecone(api_key=PINECONE_API_KEY)

    # Delete the index if it already exists (for a clean demo)
    if PINECONE_INDEX_NAME in [index['name'] for index in pc.list_indexes()]:
        print(f"Deleting existing index '{PINECONE_INDEX_NAME}'...")
        pc.delete_index(PINECONE_INDEX_NAME)
        time.sleep(5) # Give it a moment

    # Create a new serverless index
    pc.create_index(
        name=PINECONE_INDEX_NAME,
        dimension=VECTOR_DIMENSION,
        metric="cosine",
        spec=ServerlessSpec(cloud=CLOUD, region=REGION)
    )

    index = pc.Index(PINECONE_INDEX_NAME)
    print("Index created. Waiting for it to be ready...")

    # Wait for the index to be ready (can take a moment)
    while not index.describe_index_stats().get('total_vector_count', 0) == 0:
        time.sleep(1)

    print("Index is ready. Upserting demo vectors...")

    # Generate random vectors
    # We normalize them, which is best practice for cosine similarity
    vectors_to_upsert = np.random.rand(NUM_VECTORS, VECTOR_DIMENSION).astype('float32')
    vectors_to_upsert /= np.linalg.norm(vectors_to_upsert, axis=1, keepdims=True)

    vector_ids = [f"id-{i}" for i in range(NUM_VECTORS)]

    # Upsert in batches
    batch_size = 50
    for i in range(0, NUM_VECTORS, batch_size):
        batch_ids = vector_ids[i:i+batch_size]
        batch_vecs = vectors_to_upsert[i:i+batch_size]

        # Create list of (id, vector) tuples
        to_upsert_tuples = list(zip(batch_ids, batch_vecs.tolist()))

        index.upsert(vectors=to_upsert_tuples)

    print(f"Upserted {NUM_VECTORS} vectors. Waiting for indexing...")
    # Wait for vectors to be indexed
    time.sleep(10)

    print("Demo setup complete.")
    return index, vector_ids

def fetch_vectors_from_index(index, vector_ids):
    """
    Fetches the specified vector IDs from the Pinecone index.

    ---
    HOW TO ADAPT FOR YOUR *REAL* INDEX:
    1. You would first need to get all vector IDs.
    2. You can do this by paging through the `index.list()` operation.
       (e.g., `for ids_batch in index.list(namespace='your-ns', limit=1000): ...`)
    3. Then, you would fetch those IDs in batches.
    ---
    """
    print(f"Fetching {len(vector_ids)} vectors from Pinecone...")
    fetch_response = index.fetch(ids=vector_ids)
    print("Fetched from Pinecone: ", fetch_response)
    vectors_dict = fetch_response.get('vectors', {})

    # Ensure vectors are returned in the same order as the IDs
    vectors_list = []
    for vec_id in vector_ids:
        if vec_id in vectors_dict:
            vectors_list.append(vectors_dict[vec_id]['values'])

    if not vectors_list:
        print("Error: No vectors were fetched.")
        return None

    return np.array(vectors_list)

def visualize_pairwise_similarity(vectors_array):
    """
    Calculates the N x N pairwise cosine similarity and plots it as a heatmap.
    This is the most accurate, ground-truth visualization.
    """
    print("Calculating pairwise cosine similarity...")
    # Calculate the (N, N) pairwise similarity matrix
    pairwise_sim = cosine_similarity(vectors_array)

    print(f"Pairwise similarity matrix shape: {pairwise_sim.shape}")



    # Plot the heatmap
    plt.figure(figsize=(10, 8))
    im = plt.imshow(pairwise_sim, cmap='viridis', vmin=0, vmax=1)
    plt.colorbar(im, label="Cosine Similarity")
    plt.title(f"Pairwise Cosine Similarity Heatmap ({NUM_VECTORS} Vectors)")
    plt.xlabel("Vector ID")
    plt.ylabel("Vector ID")

    # Save the plot
    output_file = "pairwise_similarity_heatmap.png"
    plt.savefig(output_file)
    print(f"Saved pairwise similarity heatmap to '{output_file}'")
    plt.close()

def visualize_3d_spread(vectors_array):
    """
    Uses t-SNE (with cosine metric) to reduce vectors to 3D,
    then normalizes and plots them on a unit sphere.
    """
    print("Running t-SNE to reduce to 3 dimensions (metric=cosine)...")
    # t-SNE is computationally intensive.
    # For > 2000 vectors, consider PCA first or use a faster alternative like UMAP.
    perplexity = min(30, NUM_VECTORS - 1) # Perplexity must be < num_samples

    tsne = TSNE(
        n_components=3,
        metric='cosine',
        perplexity=perplexity,
        random_state=42,
        init='random'
    )

    vectors_3d = tsne.fit_transform(vectors_array)

    # Normalize the 3D coordinates to project them onto a unit sphere
    norms = np.linalg.norm(vectors_3d, axis=1, keepdims=True)
    vectors_3d_normalized = vectors_3d / norms

    x = vectors_3d_normalized[:, 0]
    y = vectors_3d_normalized[:, 1]
    z = vectors_3d_normalized[:, 2]

    # Create the 3D scatter plot
    fig = plt.figure(figsize=(12, 10))
    ax = fig.add_subplot(111, projection='3d')

    # Plot the points
    ax.scatter(x, y, z, alpha=0.7, c=z, cmap='viridis')

    # Draw a transparent sphere
    u = np.linspace(0, 2 * np.pi, 100)
    v = np.linspace(0, np.pi, 100)
    sphere_x = 1 * np.outer(np.cos(u), np.sin(v))
    sphere_y = 1 * np.outer(np.sin(u), np.sin(v))
    sphere_z = 1 * np.outer(np.ones(np.size(u)), np.cos(v))
    ax.plot_wireframe(sphere_x, sphere_y, sphere_z, color='gray', alpha=0.1)

    ax.set_xlabel('t-SNE Component 1')
    ax.set_ylabel('t-SNE Component 2')
    ax.set_zlabel('t-SNE Component 3')
    ax.set_title(f'3D t-SNE Projection of {NUM_VECTORS} Vectors (on Unit Sphere)')

    # Save the plot
    output_file = "3d_vector_spread.png"
    plt.savefig(output_file)
    print(f"Saved 3D vector spread plot to '{output_file}'")
    plt.close()

In [19]:
def main():
    print("Running main function...")

    # 1. Setup a demo index and get its handle
    print("Initializing demo-setup...")
    index, vector_ids = setup_demo_index()
    if not index:
        return

    try:
        # 2. Fetch the vectors back from the index
        vectors_array = fetch_vectors_from_index(index, vector_ids)
        if vectors_array is None:
            return

        print(f"Successfully fetched vectors. Array shape: {vectors_array.shape}")

        # 3. Calculate and plot pairwise similarity
        visualize_pairwise_similarity(vectors_array)

        # 4. Reduce to 3D and plot the spherical spread
        visualize_3d_spread(vectors_array)

        print("\nAll tasks complete. Check the generated .png files.")
    except Exception as e:
        print(e)
        raise e

    finally:
        # Clean up the demo index
        # print(f"\nCleaning up demo index '{PINECONE_INDEX_NAME}'...")
        # if PINECONE_INDEX_NAME in [index['name'] for index in pc.list_indexes()]:
        #     pc.delete_index(PINECONE_INDEX_NAME)
        # print("Cleanup complete.")
        pass


In [22]:
# INVOKE

if __name__ == "__main__":
    print('yes')
    main()
else:
    print('no')

yes
Running main function...
Initializing demo-setup...
Connecting to Pinecone and setting up demo index 'vector-spread-demo'...
Deleting existing index 'vector-spread-demo'...
Index created. Waiting for it to be ready...
Index is ready. Upserting demo vectors...
Upserted 100 vectors. Waiting for indexing...
Demo setup complete.
Fetching 100 vectors from Pinecone...
Fetched from Pinecone:  FetchResponse(namespace='', vectors={'id-35': Vector(id='id-35', values=[0.156155944, 0.14261952, 0.058803618, 0.0248025451, 0.0702352747, 0.134635165, 0.141562939, 0.128961056, 0.0635236278, 0.108790219, 0.143116593, 0.0524363294, 0.0193101782, 0.10706453, 0.110370971, 0.0874892846, 0.135063305, 0.121990167, 0.0443942063, 0.106857024, 0.125308827, 0.09337347, 0.0575294159, 0.0274046306, 0.155482978, 0.0776300132, 0.0391875543, 0.051305186, 0.0343113206, 0.0297789425, 0.00974651, 0.033200711, 0.120098412, 0.138471395, 0.043975126, 0.0582923517, 0.020343136, 0.140634134, 0.134501711, 0.152141467, 0.08

AttributeError: 'FetchResponse' object has no attribute 'get'

NameError: name 'FetchResponse' is not defined