In [1]:
import boto3
import json
import numpy as np
from typing import List, Union
import csv

In [2]:
embeddings = []
with open("embeddings.csv", "r") as f:
    reader = csv.reader(f)
    next(reader)
            
    for row in reader:
        if len(row) >= 2:  # Ensure row has both paragraph and embedding
            paragraph = row[0]
            # Convert embedding string back to list of floats
            embedding_str = row[1]
            embedding = [float(x) for x in embedding_str.split(',')]
            embeddings.append([paragraph, embedding])

In [3]:
def cosine_similarity(v1: Union[List[float], np.ndarray],
                     v2: Union[List[float], np.ndarray]) -> float:
    """
    Calculate the cosine similarity between two vectors.
    Args:
        v1: First vector (list or numpy array of floats)
        v2: Second vector (list or numpy array of floats)
    Returns:
        float: Cosine similarity score between -1 and 1
    Raises:
        ValueError: If vectors are not of equal length or contain all zeros
    """
    # Convert to numpy arrays if they aren't already
    v1_array = np.array(v1)
    v2_array = np.array(v2)
    # Check if vectors are of equal length
    if v1_array.shape != v2_array.shape:
        raise ValueError("Vectors must be of equal length")
    # Calculate dot product and magnitudes
    dot_product = np.dot(v1_array, v2_array)
    magnitude1 = np.linalg.norm(v1_array)
    magnitude2 = np.linalg.norm(v2_array)
    # Check for zero vectors
    if magnitude1 == 0 or magnitude2 == 0:
        raise ValueError("Vectors must not be zero vectors")
    # Calculate cosine similarity
    similarity = dot_product / (magnitude1 * magnitude2)
    # Due to floating point precision, we might get values slightly outside [-1, 1]
    return max(min(similarity, 1.0), -1.0)

In [4]:
bedrock = boto3.client("bedrock-runtime", region_name="us-west-2")

In [12]:
question = "What happened in October at Amazon?"
input_body = {
                "inputText": question
            }
response = bedrock.invoke_model(
                modelId="amazon.titan-embed-text-v2:0",
                body=json.dumps(input_body)
            )
response_body = json.loads(response.get("body").read())
query_embedding = response_body.get("embedding")

In [13]:
similarites = [[embedding[0], float(cosine_similarity(query_embedding, embedding[1]))] for embedding in embeddings]

In [14]:

top_similarities = sorted(similarites, key=lambda x: x[1], reverse=True)
print(top_similarities[:5])

[["amazon Dear Shareholders: Last year at this time, I shared my enthusiasm and optimism for Amazon's future. Today, I have even more. The reasons are many, but start with the progress we've made in our financial results and customer", 0.3304279986546396], ["traffic, sales, and service levels: Amazon.com's employee base grew from 158 to 614, and we significantly strengthened our management team. Distribution center capacity grew from 50,000 to 285,000 square feet, including a 70% expansion of our Seattle facilities and the launch of our second distribution center in Delaware in November.", 0.31344220407823314], ['Amazon grew SO quickly the first few years. This coupling was further highlighted by a heavyweight mechanism we used to operate called "NPI." Any new initiative requiring work from multiple internal teams had to be reviewed by this NPI cabal where each team would communicate how many people-weeks their work would take. This bottleneck constrained what we accomplished, frustrat

In [15]:
prompt_text = f"""
<context>
{top_similarities[:5]}
</context>

<instruction>
Based on the context above, annswer the questions asked. Only use the information in the context. if the information is not there, answer with: I dont know.
</instruction>

<text>
{question}
</text>
"""

In [16]:
model_id = "anthropic.claude-3-5-sonnet-20241022-v2:0"

request_body = {
    "anthropic_version": "bedrock-2023-05-31",
    "max_tokens": 1024,
    "temperature": 0,
    'top_p': 0.002,
    "messages": [
        {
            "role": "user",
            "content": [{"type": "text", "text": prompt_text}]
        }
    ]
}

In [17]:
response = bedrock.invoke_model(
    modelId=model_id,
    body=json.dumps(request_body)
)

In [18]:
response_body = json.loads(response['body'].read())
generated_text = response_body['content'][0]['text']
print(generated_text)

Based on the provided context, I don't know what specifically happened in October at Amazon. The context mentions that a second distribution center was launched in Delaware in November, but there is no mention of any events in October.
