## 구글 임베딩 처리

### 라이브러리 설치

In [1]:
%pip install --upgrade --quiet google-genai

### GCP 환경설정 및 로그인

In [18]:
import os

PROJECT_ID = "ai-hangsik" #@param {type:"string"}
REGION = "us-central1" #@param {type:"string"}
USE_VERTEX_AI = True #@param {type:"boolean"}


In [None]:
!gcloud auth application-default login
!gcloud auth application-default set-quota-project {PROJECT_ID}

### Vertex AI Client 실행

In [19]:
import base64
from IPython.display import display, Markdown

from google import genai
from google.genai import types
from google.genai.types import HttpOptions

client = genai.Client(
    vertexai=USE_VERTEX_AI,
    project=PROJECT_ID,
    location=REGION,)

### 벡터 유사도 측정 함수

In [20]:
def cosine_similarity(embed_1, embed_2):
  import numpy as np
  from scipy.spatial.distance import cosine

  embedding_1 = np.array(embed_1)
  embedding_2 = np.array(embed_2)

  cosine_similarity = 1 - cosine(embedding_1, embedding_2)
  print(f"두 임베딩 배열의 코사인 유사도: {cosine_similarity:.4f}")


### Gemini Embedding

In [21]:
def gemini_embedding_func(content):
  result = client.models.embed_content(
          model="gemini-embedding-001",
          contents=content)

  return result.embeddings[0].values

In [22]:
str_1 = "고양이가 자전거를 타고 간다"
str_2 = "호랑이가 차를 차고 가고 있고 고양이도 자전거를 타고 뒤따르고 있다"

embed_1 = gemini_embedding_func(content = str_1)
embed_2 = gemini_embedding_func(content = str_2)

cosine_similarity(embed_1, embed_2)

두 임베딩 배열의 코사인 유사도: 0.8660


### Text Embedding

In [23]:
#@title Vertex AI Init : Old style for Embedding module
import vertexai
vertexai.init(project=PROJECT_ID, location=REGION)

# warnings supress : 기존 몇몇 모듈이 Deprecated 될 예정에 대한 가이
import warnings
warnings.filterwarnings('ignore')

In [24]:
from vertexai.language_models import TextEmbeddingInput, TextEmbeddingModel

def text_embedding_func(content:str):


  inputs = [TextEmbeddingInput(content)]
  model = TextEmbeddingModel.from_pretrained("text-multilingual-embedding-002")
  embeddings = model.get_embeddings(inputs)

  return embeddings[0].values

In [25]:
str_1 = "고양이가 자전거를 타고 간다"
str_2 = "호랑이가 차를 차고 가고 있고 고양이도 자전거를 타고 뒤따르고 있다"

embed_1 = text_embedding_func(content = str_1)
embed_2 = text_embedding_func(content = str_2)

cosine_similarity(embed_1, embed_2)

두 임베딩 배열의 코사인 유사도: 0.8576


### 멀티모달 임베딩

#### 테스트 이미지 다운로드

In [15]:
!wget -O cha1.jpg https://img.hankyung.com/photo/201501/03.9477513.1.jpg
!wget -O cha2.jpg https://spnimage.edaily.co.kr/images/photo/files/NP/P/2008/01/PP08011000034.JPG
!wget -O cha3.jpg https://thumbnews.nateimg.co.kr/view610///news.nateimg.co.kr/orgImg/my/2024/10/22/2024102212062078097_l.jpg
!wget -O yoo.jpg https://file.sportsseoul.com/news/legacy/2019/08/11/news/2019081101000782100052321.jpg

--2025-07-25 05:08:32--  https://img.hankyung.com/photo/201501/03.9477513.1.jpg
Resolving img.hankyung.com (img.hankyung.com)... 183.111.246.130, 183.111.246.138, 183.111.246.137
Connecting to img.hankyung.com (img.hankyung.com)|183.111.246.130|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 256432 (250K) [image/jpeg]
Saving to: ‘cha1.jpg’


2025-07-25 05:08:35 (297 KB/s) - ‘cha1.jpg’ saved [256432/256432]

--2025-07-25 05:08:35--  https://spnimage.edaily.co.kr/images/photo/files/NP/P/2008/01/PP08011000034.JPG
Resolving spnimage.edaily.co.kr (spnimage.edaily.co.kr)... 183.111.246.130, 183.111.246.138, 183.111.246.137
Connecting to spnimage.edaily.co.kr (spnimage.edaily.co.kr)|183.111.246.130|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 251803 (246K) [image/jpeg]
Saving to: ‘cha2.jpg’


2025-07-25 05:08:37 (298 KB/s) - ‘cha2.jpg’ saved [251803/251803]

--2025-07-25 05:08:38--  https://thumbnews.nateimg.co.kr/view610///news.nateimg.c

In [16]:
import numpy as np
import torch
import torch.nn as nn
from typing import List, Optional
from vertexai.vision_models import Image, MultiModalEmbeddingModel

#------------------------------------------------------------
def multimodal_embeddings(image_path, contextual_text):

  model = MultiModalEmbeddingModel.from_pretrained("multimodalembedding@001")
  image = Image.load_from_file(image_path)

  embeddings = model.get_embeddings(
      image=image,
      contextual_text=contextual_text,
  )
  print(f"Image Embedding: {embeddings.image_embedding}")

  return embeddings

In [17]:
embed_1 = multimodal_embeddings("./cha1.jpg", "차승원" ).image_embedding
embed_2 = multimodal_embeddings("./cha2.jpg", "차승원" ).image_embedding
embed_3 = multimodal_embeddings("./cha3.jpg", "차승원" ).image_embedding

NotFound: 404 GET https://aiplatform.googleapis.com/v1/publishers/google/models/multimodalembedding@001?%24alt=json%3Benum-encoding%3Dint: Publisher Model `publishers/google/models/multimodalembedding@001` is not found.

In [None]:
cosine_similarity(embed_1, embed_2)
cosine_similarity(embed_1, embed_3)

In [None]:
embed_4 = multimodal_embeddings("./yoo.jpg", "유혜진" ).image_embedding
cosine_similarity(embed_4, embed_1)
cosine_similarity(embed_4, embed_2)
cosine_similarity(embed_4, embed_3)

## End of Document