# Bedrock Embeddings을 활용해서 임베딩 계산

In [None]:
from langchain.embeddings import BedrockEmbeddings
from numpy import dot
from numpy.linalg import norm

#Bedrock Embeddings LangChain 클라이언트 생성
#https://api.python.langchain.com/en/latest/embeddings/langchain_community.embeddings.bedrock.BedrockEmbeddings.html
emb = BedrockEmbeddings()

class EmbedItem:
    def __init__(self, text):
        self.text = text
        self.embedding = emb.embed_query(text)

class ComparisonResult:
    def __init__(self, text, similarity):
        self.text = text
        self.similarity = similarity

def calculate_similarity(a, b): # 코사인 유사도를 확인하세요: https://en.wikipedia.org/wiki/Cosine_similarity
    return dot(a, b) / (norm(a) * norm(b))


#### 텍스트 파일 로드

In [None]:
#비교할 임베딩 목록을 작성합니다.
items = []

with open("items.txt", "r") as f:
    text_items = f.read().splitlines()

for text in text_items:
    items.append(EmbedItem(text))


## 텍스트 사이의 Consine Similarity 계산

In [None]:
for e1 in items:
    print(f"Closest matches for '{e1.text}'")
    print ("----------------")
    cosine_comparisons = []
    
    for e2 in items:
        similarity_score = calculate_similarity(e1.embedding, e2.embedding) # 두 문장간 코사인 유사도를 구하고
        
        cosine_comparisons.append(ComparisonResult(e2.text, similarity_score)) # 코사인 유사도 값 을 목록에 저장합니다.
        
    cosine_comparisons.sort(key=lambda x: x.similarity, reverse=True) # 가장 가까운 일치 항목을 먼저 나열합니다.
    
    for c in cosine_comparisons:
        print("%.6f" % c.similarity, "\t", c.text)
    
    print()

## 이미지와 텍스트의 벡터 거리 계산

In [None]:
import json
import base64
import boto3

In [None]:
bedrock_runtime = boto3.client("bedrock-runtime")
                          
def get_vector(input_content, input_type):
    if input_type == "image":
        with open(input_content, "rb") as image_file:
            input_image = base64.b64encode(image_file.read()).decode('utf8')
    
        body = json.dumps(
            {
                "inputImage": input_image
            }
        )
        
    elif input_type == "text":
        body = json.dumps(
            {
                "inputText": input_content
            }
        )
    
    response = bedrock_runtime.invoke_model(
    	body=body, 
    	modelId="amazon.titan-embed-image-v1", 
    	accept="application/json", 
    	contentType="application/json"
    )
    response_body = json.loads(response.get("body").read())
    return response_body.get("embedding")

#### 테스트에 사용할 이미지를 눌러서 확인해보세요

#### [Image1](./images/blue_t.jpg) [Image2](./images/red_t.jpg)

In [None]:
img1_vec = get_vector("./images/blue_t.jpg", "image")
img2_vec = get_vector("./images/red_t.jpg", "image")

#### 원하는 텍스트로 변경해보세요

In [None]:
input_text = "red shirt"
#input_text = "blue jean"
#input_text = "black socks"

text_vec = get_vector(input_text, "text")

#### 벡터 간 유클리디안 거리를 계산해 출력합니다

In [None]:
import numpy as np

def euclidean_distance(v1, v2):
    return np.sqrt(np.sum((v1 - v2) ** 2))
    
A = np.array(img1_vec)
B = np.array(img2_vec)
C = np.array(text_vec)
AC_distance = euclidean_distance(A, C)
BC_distance = euclidean_distance(B, C)

print(f"keyword : '{input_text}'  <--distance--> image : Blue T-shirt:", AC_distance)
print(f"keyword : '{input_text}'  <--distance--> image : Red T-shirt:", BC_distance)