In [34]:
import boto3
from botocore.exceptions import NoCredentialsError
import base64
import json
import os
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity
import uuid

# Initialize Bedrock runtime 

In [38]:
# Set the AWS profile (replace 'your-profile-name' with your actual profile name)
aws_profile = 'your-profile-name'

try:
    boto3.setup_default_session(profile_name=aws_profile)
    bedrock_runtime = boto3.client(
        service_name="bedrock-runtime",
        region_name="us-east-1"
    )
except NoCredentialsError:
    print("Credentials not found. Please configure your AWS profile.")

# Paths to images

In [39]:
image_path = "data/bags"
image_paths = ["1.png", "2.png", "3.png", "4.png", "5.png", "6.png", "7.png"]
descriptions = ["Red bag", "Brown bag", "Blue bag", "Pink bag", "Mint bag", "Salmon bag", "Black bag"]

# Create embeddings for each image and store in a DataFrame
embeddings = []
ids = []
for filename in image_paths:
    full_path = os.path.join(image_path, filename)
    image_base64 = encode_image_to_base64(full_path)
    embedding = get_embedding(image_base64=image_base64)
    embeddings.append(embedding)
    ids.append(str(uuid.uuid4()))  # Generate a unique ID

# Create a DataFrame with columns for ID, image, and vector
df = pd.DataFrame({'ID': ids, 'image': image_paths, 'vector': embeddings, 'description': descriptions})
df

Unnamed: 0,ID,image,vector,description
0,af72fd21-6da6-483b-b91f-5a2bb9cfcf5c,1.png,"[0.026412552, 0.018193262, -0.02678196, -0.041...",Red bag
1,749f855f-3a73-4482-bea9-8bf1872e10fd,2.png,"[0.011002224, 0.008811103, -0.026293451, -0.03...",Brown bag
2,1e85b159-2415-4d9a-898e-527c979ae09a,3.png,"[0.038916796, -0.011730901, -0.0054464894, -0....",Blue bag
3,41eac171-0a19-47a7-88db-0d6948e39d70,4.png,"[0.029844122, 0.015299835, 0.0043916195, -0.05...",Pink bag
4,a1ef6a75-5c81-48c4-bd3f-46834975f7ff,5.png,"[0.03972358, 0.0044689025, -0.005123439, -0.04...",Mint bag
5,64e2b838-f5a1-4ce3-baa3-f5161e92d942,6.png,"[0.0037585772, -0.0036411216, 0.002607513, -0....",Salmon bag
6,d307fb23-d31d-493f-8a25-955c9ba1d454,7.png,"[0.014139218, 0.0027335822, 0.010415891, -0.03...",Black bag


# Embedding for customer query

In [49]:
customer_query = "Hi! I'm looking for a red bag"
query_embedding = get_embedding(text_description=customer_query)
query_embedding[:2]

[-0.0027618408, 0.010559082]

# Calculate cosine similarity

In [41]:
# Extracting only the vectors from the DataFrame for comparison
vectors = list(df['vector'])

# Calculate cosine similarity between the query embedding and the vectors
cosine_scores = cosine_similarity([query_embedding], vectors)[0]

# Combine the description and image name into a single string for each row
combined_info = df['description'] + " (" + df['image'] + ")"

# Create a series with these scores and the corresponding IDs or Image names
df_scores = pd.Series(cosine_scores, index=df['image'])  # Or use df['Image'] if you prefer image names

# Sort the scores in descending order
sorted_scores = df_scores.sort_values(ascending=False)
sorted_scores

image
1.png    0.398314
2.png    0.391526
6.png    0.351008
4.png    0.347009
3.png    0.336229
5.png    0.327345
7.png    0.325239
dtype: float64

# Display scores and matching images

In [50]:
from IPython.core.display import HTML

# Initialize an HTML string
html_str = f"<h3>Query: '{customer_query}'</h3><table><tr>"

# Loop through sorted scores and images
for filename, score in sorted_scores.items():
    image_path = os.path.join('data', 'bags', filename)
    
    # Adding each image and its details to the HTML string
    html_str += f"<td style='text-align:center'><img src='{image_path}' width='100'><br>{filename}<br>Score: {score:.2f}</td>"

html_str += "</tr></table>"

# Display the HTML
display(HTML(html_str))


0,1,2,3,4,5,6
1.png Score: 0.40,2.png Score: 0.39,6.png Score: 0.35,4.png Score: 0.35,3.png Score: 0.34,5.png Score: 0.33,7.png Score: 0.33


# Function to encode an image to base64

In [32]:
def encode_image_to_base64(image_path):
    with open(image_path, "rb") as image_file:
        return base64.b64encode(image_file.read()).decode('utf8')

# Function to get embedding from AWS Bedrock

In [33]:
def get_embedding(image_base64=None, text_description=None):
    input_data = {}

    if image_base64 is not None:
        input_data["inputImage"] = image_base64
    if text_description is not None:
        input_data["inputText"] = text_description

    if not input_data:
        raise ValueError("At least one of image_base64 or text_description must be provided")

    body = json.dumps(input_data)

    response = bedrock_runtime.invoke_model(
        body=body,
        modelId="amazon.titan-embed-image-v1",
        accept="application/json",
        contentType="application/json"
    )

    response_body = json.loads(response.get("body").read())
    return response_body.get("embedding")
