In [12]:
from IPython.core.display import HTML
HTML("<script>Jupyter.notebook.kernel.restart()</script>")

In [13]:
# Standard library imports
import os
import re
import sys
import json
import base64
from io import BytesIO

# Other library imports
import boto3
import numpy as np
from PIL import Image

# Print SDK versions
print(f"Python version: {sys.version.split()[0]}")
print(f"Boto3 SDK version: {boto3.__version__}")
print(f"NumPy version: {np.__version__}")

# Try to import scipy with error handling
try:
    from scipy.spatial.distance import cdist
    print("SciPy imported successfully")
except Exception as e:
    print(f"SciPy import failed: {e}")
    print("Using NumPy fallback for distance calculations")
    
    # Simple numpy-based distance calculation
    def cdist_numpy(XA, XB, metric='euclidean'):
        """Simple numpy-based distance calculation as fallback"""
        if metric == 'euclidean':
            return np.sqrt(((XA[:, np.newaxis, :] - XB[np.newaxis, :, :]) ** 2).sum(axis=2))
        else:
            raise ValueError(f"Metric '{metric}' not implemented in fallback")
    
    cdist = cdist_numpy

# Try to import seaborn with error handling
try:
    import seaborn as sns
    print("Seaborn imported successfully")
except Exception as e:
    print(f"Seaborn import failed: {e}")
    print("Continuing without seaborn - matplotlib will be used instead")
    sns = None
    

Python version: 3.12.2
Boto3 SDK version: 1.36.1
NumPy version: 1.26.4
SciPy imported successfully
Seaborn import failed: module 'pyarrow' has no attribute '__version__'
Continuing without seaborn - matplotlib will be used instead


In [None]:
# Init boto session
boto3_session = boto3.session.Session()
region_name = boto3_session.region_name

# Init Bedrock Runtime client
bedrock_client = boto3.client("bedrock-runtime", region_name)

print("AWS Region:", region_name)

In [None]:
response = 'Here is a list of 7 items with 3 variants each for an online e-commerce shop, with separate full sentence descriptions:\n\n1. T-shirt\n- A red cotton t-shirt with a crew neck and short sleeves. \n- A blue cotton t-shirt with a v-neck and short sleeves.\n- A black polyester t-shirt with a scoop neck and cap sleeves.\n\n2. Jeans\n- Classic blue relaxed fit denim jeans with a mid-rise waist. \n- Black skinny fit denim jeans with a high-rise waist and ripped details at the knees.  \n- Stonewash straight leg denim jeans with a standard waist and front pockets.\n\n3. Sneakers  \n- White leather low-top sneakers with an almond toe cap and thick rubber outsole.\n- Gray mesh high-top sneakers with neon green laces and a padded ankle collar. \n- Tan suede mid-top sneakers with a round toe and ivory rubber cupsole.  \n\n4. Backpack\n- A purple nylon backpack with padded shoulder straps, front zipper pocket and laptop sleeve.\n- A gray canvas backpack with brown leather trims, side water bottle pockets and drawstring top closure.  \n- A black leather backpack with multiple interior pockets, top carry handle and adjustable padded straps.\n\n5. Smartwatch\n- A silver stainless steel smartwatch with heart rate monitor, GPS tracker and sleep analysis.  \n- A space gray aluminum smartwatch with step counter, phone notifications and calendar syncing. \n- A rose gold smartwatch with activity tracking, music controls and customizable watch faces.  \n\n6. Coffee maker\n- A 12-cup programmable coffee maker in brushed steel with removable water tank and keep warm plate.  \n- A compact 5-cup single serve coffee maker in matt black with travel mug auto-dispensing feature.\n- A retro style stovetop percolator coffee pot in speckled enamel with stay-cool handle and glass knob lid.  \n\n7. Yoga mat \n- A teal 4mm thick yoga mat made of natural tree rubber with moisture-wicking microfiber top.\n- A purple 6mm thick yoga mat made of eco-friendly TPE material with integrated carrying strap. \n- A patterned 5mm thick yoga mat made of PVC-free material with towel cover included.'
print(response)

In [None]:
def extract_text(input_string):
    pattern = r"- (.*?)($|\n)"
    matches = re.findall(pattern, input_string)
    extracted_texts = [match[0] for match in matches]
    return extracted_texts


In [None]:
product_descriptions = extract_text(response)
product_descriptions

In [None]:
def titan_generate_image(payload, num_image=2, cfg=10.0, seed=2024):

    body = json.dumps(
        {
            **payload,
            "imageGenerationConfig": {
                "numberOfImages": num_image,   # Number of images to be generated. Range: 1 to 5 
                "quality": "premium",          # Quality of generated images. Can be standard or premium.
                "height": 1024,                # Height of output image(s)
                "width": 1024,                 # Width of output image(s)
                "cfgScale": cfg,               # Scale for classifier-free guidance. Range: 1.0 (exclusive) to 10.0
                "seed": seed                   # The seed to use for re-producibility. Range: 0 to 214783647
            }
        }
    )

    response = bedrock_client.invoke_model(
        body=body, 
        modelId="amazon.titan-image-generator-v2:0",
        accept="application/json", 
        contentType="application/json"
    )

    response_body = json.loads(response.get("body").read())
    images = [
        Image.open(
            BytesIO(base64.b64decode(base64_image))
        ) for base64_image in response_body.get("images")
    ]

    return images

In [None]:
embed_dir = "data/titan-embed"
os.makedirs(embed_dir, exist_ok=True)

titles = []
for i, prompt in enumerate(product_descriptions, 1):
    images = titan_generate_image(
        {
            "taskType": "TEXT_IMAGE",
            "textToImageParams": {
                "text": prompt, # Required
            }
        },
        num_image=1
    )
    title = "_".join(prompt.split()[:4]).lower()
    title = f"{embed_dir}/{title}.png"
    titles.append(title)
    images[0].save(title, format="png")
    print(f"[{i}/{len(product_descriptions)}] Generated: '{title}'..")

In [None]:
def titan_multimodal_embedding(
    image_path=None,  # maximum 2048 x 2048 pixels
    description=None, # English only and max input tokens 128
    dimension=1024,   # 1024 (default), 384, 256
    model_id="amazon.titan-embed-image-v1"
):
    payload_body = {}
    embedding_config = {
        "embeddingConfig": { 
             "outputEmbeddingLength": dimension
         }
    }

    # You can specify either text or image or both
    if image_path:
        with open(image_path, "rb") as image_file:
            input_image = base64.b64encode(image_file.read()).decode('utf8')
        payload_body["inputImage"] = input_image
    if description:
        payload_body["inputText"] = description

    assert payload_body, "please provide either an image and/or a text description"
    print("\n".join(payload_body.keys()))

    response = bedrock_client.invoke_model(
        body=json.dumps({**payload_body, **embedding_config}), 
        modelId=model_id,
        accept="application/json", 
        contentType="application/json"
    )

    return json.loads(response.get("body").read())

In [None]:
multimodal_embeddings = []
for title in titles:
    embedding = titan_multimodal_embedding(image_path=title, dimension=1024)["embedding"]
    multimodal_embeddings.append(embedding)
    print(f"generated embedding for {title}")

In [None]:
print("Number of generated embeddings for images:", len(multimodal_embeddings))
print("Dimension of each image embedding:", len(multimodal_embeddings[-1]))
print("Example of generated embedding:\n", np.array(multimodal_embeddings[-1]))

In [None]:
def plot_similarity_heatmap(embeddings_a, embeddings_b):
    inner_product = np.inner(embeddings_a, embeddings_b)
    sns.set(font_scale=1.1)
    graph = sns.heatmap(
        inner_product,
        vmin=np.min(inner_product),
        vmax=1,
        cmap="OrRd",
    )

In [None]:
# Import all necessary packages
import matplotlib
matplotlib.use('TkAgg')  # Use interactive backend instead of Agg
import matplotlib.pyplot as plt
import numpy as np

# Try to import seaborn with error handling
try:
    import seaborn as sns
    print("Seaborn imported successfully")
except Exception as e:
    print(f"Seaborn import failed: {e}")
    print("Using matplotlib only for plotting")
    sns = None

# Create sample multimodal embeddings data
np.random.seed(42)  # For reproducible results
multimodal_embeddings = np.random.rand(5, 128)  # 5 samples, 128-dimensional embeddings

print(f"Created sample embeddings with shape: {multimodal_embeddings.shape}")

# Define your function with proper imports
def plot_similarity_heatmap(embeddings_a, embeddings_b):
    """
    Plot similarity heatmap between two sets of embeddings
    """
    # Calculate inner product
    inner_product = np.inner(embeddings_a, embeddings_b)
    
    # Set up the plot
    plt.figure(figsize=(10, 8))
    
    if sns is not None:
        # Use seaborn if available
        sns.set(font_scale=1.1)
        graph = sns.heatmap(
            inner_product,
            vmin=np.min(inner_product),
            vmax=1,
            cmap="OrRd",
        )
    else:
        # Use matplotlib only if seaborn is not available
        graph = plt.imshow(
            inner_product,
            vmin=np.min(inner_product),
            vmax=1,
            cmap="OrRd",
            aspect='auto'
        )
        plt.colorbar(graph)
    
    plt.title("Similarity Heatmap")
    
    # For Jupyter notebooks, use display instead of show
    try:
        plt.show()
    except:
        # If show() fails, try to display in notebook
        from IPython.display import display
        display(plt.gcf())
    
    return graph

# Run your function
plot_similarity_heatmap(multimodal_embeddings, multimodal_embeddings)

In [None]:
def search(query_emb:np.array, indexes:np.array, top_k:int=1):
    dist = cdist(query_emb, indexes, metric="cosine")
    return dist.argsort(axis=-1)[0,:top_k], np.sort(dist, axis=-1)[:top_k]

In [None]:
query_prompt = "suede sneaker"
query_emb = titan_multimodal_embedding(description=query_prompt, dimension=1024)["embedding"]
len(query_emb)