# Bedrock Embeddings을 활용해서 임베딩 계산

In [1]:
from langchain.embeddings import BedrockEmbeddings
from numpy import dot
from numpy.linalg import norm

#Bedrock Embeddings LangChain 클라이언트 생성
#https://api.python.langchain.com/en/latest/embeddings/langchain_community.embeddings.bedrock.BedrockEmbeddings.html
emb = BedrockEmbeddings()

class EmbedItem:
    def __init__(self, text):
        self.text = text
        self.embedding = emb.embed_query(text)

class ComparisonResult:
    def __init__(self, text, similarity):
        self.text = text
        self.similarity = similarity

def calculate_similarity(a, b): # 코사인 유사도를 확인하세요: https://en.wikipedia.org/wiki/Cosine_similarity
    return dot(a, b) / (norm(a) * norm(b))


## 텍스트 embedding 

In [3]:
embeded_text = emb.embed_query("Hello")
print("vector size = ", len(embeded_text))
print(embeded_text)

vector size =  1536
[0.053955078, -0.34375, -0.15625, 0.45898438, 1.3046875, 0.4140625, 0.49023438, -0.0009613037, -0.51171875, -0.27148438, 0.71875, 0.96875, 0.22167969, -0.32421875, 0.34375, -0.23339844, -0.15917969, -0.09863281, -1.0546875, 0.56640625, -0.12451172, 0.34765625, 0.21386719, -0.13378906, -0.40234375, -0.20703125, 1.15625, -0.84375, 0.2109375, -0.6953125, 0.43945312, 1.0390625, 0.36132812, -0.6875, -0.26171875, 0.30664062, 0.63671875, -0.0154418945, 0.81640625, 0.1171875, 0.47460938, -0.63671875, 1.0546875, 1.21875, 0.89453125, -0.037109375, -0.049560547, -0.053466797, 0.47265625, 1.375, 0.47851562, 0.7578125, -0.7109375, -0.11767578, 0.08203125, -0.48242188, -0.14746094, -0.12695312, 0.29882812, -0.2890625, -0.12451172, -0.42578125, -0.106933594, 0.15722656, 0.35351562, -0.90234375, -1.0703125, 0.13769531, -0.40429688, 0.296875, -0.25390625, 0.040771484, 0.5546875, 1.171875, 0.40820312, -0.09277344, 0.80078125, -0.18261719, 0.38671875, -0.3203125, -0.23535156, -0.28710

### 텍스트 파일 로드

In [4]:
#비교할 임베딩 목록을 작성합니다.
items = []

with open("items.txt", "r") as f:
    text_items = f.read().splitlines()

for text in text_items:
    items.append(EmbedItem(text))


## 텍스트 사이의 Consine Similarity 계산

In [5]:
for e1 in items:
    print(f"Closest matches for '{e1.text}'")
    print ("----------------")
    cosine_comparisons = []
    
    for e2 in items:
        similarity_score = calculate_similarity(e1.embedding, e2.embedding) # 두 문장간 코사인 유사도를 구하고
        
        cosine_comparisons.append(ComparisonResult(e2.text, similarity_score)) # 코사인 유사도 값 을 목록에 저장합니다.
        
    cosine_comparisons.sort(key=lambda x: x.similarity, reverse=True) # 가장 가까운 일치 항목을 먼저 나열합니다.
    
    for c in cosine_comparisons:
        print("%.6f" % c.similarity, "\t", c.text)
    
    print()

Closest matches for 'Felines, canines, and rodents'
----------------
1.000000 	 Felines, canines, and rodents
0.872856 	 Cats, dogs, and mice
0.599730 	 Chats, chiens et souris
0.516598 	 Lions, tigers, and bears
0.456268 	 고양이, 개, 쥐
0.455923 	 猫、犬、ネズミ
0.068916 	 パン屋への道順を知りたい
0.061314 	 パン屋への行き方を教えてください
0.034925 	 빵집으로 가는 길을 알려주세요.
0.024160 	 경기장 가는 방법을 알려주시겠어요?
0.002239 	 Can you please tell me how to get to the stadium?
-0.003159 	 Kannst du mir bitte sagen, wie ich zur Bäckerei komme?
-0.007595 	 Can you please tell me how to get to the bakery?
-0.019469 	 Pouvez-vous s'il vous plaît me dire comment me rendre à la boulangerie?
-0.020840 	 I need directions to the bread shop

Closest matches for 'Can you please tell me how to get to the bakery?'
----------------
1.000000 	 Can you please tell me how to get to the bakery?
0.712236 	 I need directions to the bread shop
0.541959 	 Pouvez-vous s'il vous plaît me dire comment me rendre à la boulangerie?
0.492384 	 빵집으로 가는 길을 알려주세요.
0.4846

## 이미지와 텍스트의 벡터 거리 계산

In [6]:
import json
import base64
import boto3

In [7]:
bedrock_runtime = boto3.client("bedrock-runtime")
                          
def get_vector(input_content, input_type):
    if input_type == "image":
        with open(input_content, "rb") as image_file:
            input_image = base64.b64encode(image_file.read()).decode('utf8')
    
        body = json.dumps(
            {
                "inputImage": input_image
            }
        )
        
    elif input_type == "text":
        body = json.dumps(
            {
                "inputText": input_content
            }
        )
    
    response = bedrock_runtime.invoke_model(
    	body=body, 
    	modelId="amazon.titan-embed-image-v1", 
    	accept="application/json", 
    	contentType="application/json"
    )
    response_body = json.loads(response.get("body").read())
    return response_body.get("embedding")

#### 테스트에 사용할 이미지를 눌러서 확인해보세요

#### [Image1](./images/blue_t.jpg) [Image2](./images/red_t.jpg)

In [8]:
img1_vec = get_vector("./images/blue_t.jpg", "image")
img2_vec = get_vector("./images/red_t.jpg", "image")

In [10]:
len(img1_vec)

1024

#### `input_text`를 원하는 텍스트로 변경해보세요

In [11]:
input_text = "red shirt"
#input_text = "blue jean"
#input_text = "black socks"

text_vec = get_vector(input_text, "text")

#### 벡터 간 유클리디안 거리를 계산해 출력합니다

In [None]:
import numpy as np

def euclidean_distance(v1, v2):
    return np.sqrt(np.sum((v1 - v2) ** 2))
    
A = np.array(img1_vec)
B = np.array(img2_vec)
C = np.array(text_vec)
AC_distance = euclidean_distance(A, C)
BC_distance = euclidean_distance(B, C)

print(f"keyword : '{input_text}'  <--distance--> image : Blue T-shirt:", AC_distance)
print(f"keyword : '{input_text}'  <--distance--> image : Red T-shirt:", BC_distance)