## 구글 임베딩 처리

### 라이브러리 설치

In [1]:
%pip install --upgrade --quiet google-genai

[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/43.1 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m43.1/43.1 kB[0m [31m1.6 MB/s[0m eta [36m0:00:00[0m
[?25h[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/218.5 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[90m╺[0m [32m215.0/218.5 kB[0m [31m9.2 MB/s[0m eta [36m0:00:01[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m218.5/218.5 kB[0m [31m5.7 MB/s[0m eta [36m0:00:00[0m
[?25h

### GCP 환경설정 및 로그인

In [2]:
import os

PROJECT_ID = "ai-hangsik" #@param {type:"string"}
REGION = "us-central1" #@param {type:"string"}
USE_VERTEX_AI = True #@param {type:"boolean"}


In [3]:
!gcloud auth application-default login
!gcloud auth application-default set-quota-project {PROJECT_ID}

Go to the following link in your browser, and complete the sign-in prompts:

    https://accounts.google.com/o/oauth2/auth?response_type=code&client_id=764086051850-6qr4p6gpi6hn506pt8ejuq83di341hur.apps.googleusercontent.com&redirect_uri=https%3A%2F%2Fsdk.cloud.google.com%2Fapplicationdefaultauthcode.html&scope=openid+https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fuserinfo.email+https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fcloud-platform+https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fsqlservice.login&state=TgJLusdyKyU4I2pJUoBRSt8vWTWTYM&prompt=consent&token_usage=remote&access_type=offline&code_challenge=4C7JkRy_aSZwSWVbzzy9WrrFJQGv3Jt916X-Bw27bqc&code_challenge_method=S256

Once finished, enter the verification code provided in your browser: 4/0AVMBsJjyCegj_BXoTW3jmYE4gQk37cx7RfeCUbUxSKVOAddC6l9GNqH_tRqbKODOIYG3kA

Credentials saved to file: [/content/.config/application_default_credentials.json]

These credentials will be used by any library that requests Application Default Credentials (ADC).
Ca

### Vertex AI Client 실행

In [4]:
import base64
from IPython.display import display, Markdown

from google import genai
from google.genai import types
from google.genai.types import HttpOptions

client = genai.Client(
    vertexai=USE_VERTEX_AI,
    project=PROJECT_ID,
    location=REGION,)

### 벡터 유사도 측정 함수

In [5]:
def cosine_similarity(embed_1, embed_2):
  import numpy as np
  from scipy.spatial.distance import cosine

  embedding_1 = np.array(embed_1)
  embedding_2 = np.array(embed_2)

  cosine_similarity = 1 - cosine(embedding_1, embedding_2)
  print(f"두 임베딩 배열의 코사인 유사도: {cosine_similarity:.4f}")


### Gemini Embedding

In [6]:
def gemini_embedding_func(content):
  result = client.models.embed_content(
          model="gemini-embedding-001",
          contents=content)

  return result.embeddings[0].values

In [7]:
str_1 = "고양이가 자전거를 타고 간다"
str_2 = "호랑이가 차를 차고 가고 있고 고양이도 자전거를 타고 뒤따르고 있다"

embed_1 = gemini_embedding_func(content = str_1)
embed_2 = gemini_embedding_func(content = str_2)

cosine_similarity(embed_1, embed_2)

두 임베딩 배열의 코사인 유사도: 0.8660


### Text Embedding

In [8]:
#@title Vertex AI Init : Old style for Embedding module
import vertexai
vertexai.init(project=PROJECT_ID, location=REGION)

# warnings supress : 기존 몇몇 모듈이 Deprecated 될 예정에 대한 가이
import warnings
warnings.filterwarnings('ignore')

In [9]:
from vertexai.language_models import TextEmbeddingInput, TextEmbeddingModel

def text_embedding_func(content:str):


  inputs = [TextEmbeddingInput(content)]
  model = TextEmbeddingModel.from_pretrained("text-multilingual-embedding-002")
  embeddings = model.get_embeddings(inputs)

  return embeddings[0].values

In [10]:
str_1 = "고양이가 자전거를 타고 간다"
str_2 = "호랑이가 차를 차고 가고 있고 고양이도 자전거를 타고 뒤따르고 있다"

embed_1 = text_embedding_func(content = str_1)
embed_2 = text_embedding_func(content = str_2)

cosine_similarity(embed_1, embed_2)

두 임베딩 배열의 코사인 유사도: 0.8576


### 멀티모달 임베딩

#### 테스트 이미지 다운로드

In [64]:
!wget -O cha1.jpg https://img.hankyung.com/photo/201501/03.9477513.1.jpg
!wget -O cha2.jpg https://spnimage.edaily.co.kr/images/photo/files/NP/P/2008/01/PP08011000034.JPG
!wget -O cha3.jpg https://thumbnews.nateimg.co.kr/view610///news.nateimg.co.kr/orgImg/my/2024/10/22/2024102212062078097_l.jpg
!wget -O yoo.jpg https://file.sportsseoul.com/news/legacy/2019/08/11/news/2019081101000782100052321.jpg

--2025-07-24 07:13:42--  https://img.hankyung.com/photo/201501/03.9477513.1.jpg
Resolving img.hankyung.com (img.hankyung.com)... 211.115.109.71, 211.115.109.69, 211.115.109.70
Connecting to img.hankyung.com (img.hankyung.com)|211.115.109.71|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 256432 (250K) [image/jpeg]
Saving to: ‘cha1.jpg’


2025-07-24 07:13:45 (285 KB/s) - ‘cha1.jpg’ saved [256432/256432]

--2025-07-24 07:13:45--  https://spnimage.edaily.co.kr/images/photo/files/NP/P/2008/01/PP08011000034.JPG
Resolving spnimage.edaily.co.kr (spnimage.edaily.co.kr)... 183.111.246.137, 183.111.246.130, 183.111.246.138
Connecting to spnimage.edaily.co.kr (spnimage.edaily.co.kr)|183.111.246.137|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 251803 (246K) [image/jpeg]
Saving to: ‘cha2.jpg’


2025-07-24 07:13:47 (299 KB/s) - ‘cha2.jpg’ saved [251803/251803]

--2025-07-24 07:13:48--  https://thumbnews.nateimg.co.kr/view610///news.nateimg.co.kr

In [65]:
import numpy as np
import torch
import torch.nn as nn
from typing import List, Optional
from vertexai.vision_models import Image, MultiModalEmbeddingModel

#------------------------------------------------------------
def multimodal_embeddings(image_path, contextual_text):

  model = MultiModalEmbeddingModel.from_pretrained("multimodalembedding@001")
  image = Image.load_from_file(image_path)

  embeddings = model.get_embeddings(
      image=image,
      contextual_text=contextual_text,
  )
  print(f"Image Embedding: {embeddings.image_embedding}")

  return embeddings

In [66]:
embed_1 = multimodal_embeddings("./cha1.jpg", "차승원" ).image_embedding
embed_2 = multimodal_embeddings("./cha2.jpg", "차승원" ).image_embedding
embed_3 = multimodal_embeddings("./cha3.jpg", "차승원" ).image_embedding

Image Embedding: [-0.033747945, 0.0520880446, -0.016555056, -0.0370086841, -0.00232363818, -0.0287632402, 0.00419967389, 0.00324012013, -5.23778e-05, 0.0067775161, -0.00970153417, 0.0311379489, -0.0315521136, 0.000996450777, 0.00998737663, -0.0433760211, 0.0388495289, 0.0148805212, -0.00569657702, -0.00195749593, -0.0551092289, 0.0129920235, 0.0214672275, -0.0447969511, 0.0106161954, -0.00424140459, 0.00253575901, -0.000185520737, 0.00256281649, 0.00742350658, -0.00048825008, 0.00962429866, -0.0111507187, -0.0480701812, -0.0180476494, -0.00959331635, 0.00537710637, -0.0255019926, 0.0401527323, -0.0204960108, 0.0288087185, -0.0355279818, 0.011160383, -0.0699055, 0.0105597237, -0.00675495388, -0.0236966908, -0.0135018229, 0.0102790706, 0.0081152264, -0.00690435525, -0.0175723322, 0.0139087401, 0.0187049322, -0.0432659686, -0.0241564, 0.0129161207, 0.00138293148, -0.00602468802, -0.0155867543, 0.0250388589, 0.0026072741, -0.01473145, 0.0180786606, -0.00159570645, 0.0177024249, -0.03116521

In [67]:
cosine_similarity(embed_1, embed_2)
cosine_similarity(embed_1, embed_3)

두 임베딩 배열의 코사인 유사도: 0.7707
두 임베딩 배열의 코사인 유사도: 0.4314


In [68]:
embed_4 = multimodal_embeddings("./yoo.jpg", "유혜진" ).image_embedding
cosine_similarity(embed_4, embed_1)
cosine_similarity(embed_4, embed_2)
cosine_similarity(embed_4, embed_3)

Image Embedding: [-0.0167236812, 0.0748841241, -0.0323987268, 0.0250371788, 0.0175045, 0.0122071328, -0.000364793552, -0.00441229716, -0.00360232126, -0.0190716758, 0.0104172137, 0.0377596095, 0.0139192501, 0.0285811666, -0.0275056511, -0.0143679744, 0.015180341, -0.0052400413, -0.022528626, -0.00241062907, -0.0832243711, 0.0451219268, 0.0289542191, 0.035618294, -0.00313301152, 0.00761308661, 0.0298128612, -0.00246769399, 0.0167496167, -0.0352715477, 0.012472556, -0.00977505371, -0.0523917526, 0.0170153622, -0.0408460237, -0.0404001921, 0.021501394, -0.00168886397, -0.0056421645, 0.0324835479, 0.0164035484, -0.0321259648, 0.0279442556, -0.0175971948, -0.0185303278, 0.00679245917, -0.0172736924, -0.0217978973, 0.00155055267, 0.00689588441, 0.00366544398, -0.0339683928, 0.00258772564, 0.0150907105, -0.0180503298, 0.00566523708, 0.0287829544, -0.0219612569, 0.0246091727, -0.0131020732, -0.013623436, -0.0166558251, -0.0258831512, -0.00163781934, -0.00369050982, 0.0141632017, -0.00507903285

## End of Document