# Disclaimer & Copyright

Copyright 2024 Forusone : shins777@gmail.com

Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at

https://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License.

# Gecko Multimodal Embeddings - Korean text embedding test
* This notebook explains how to use Korean embeddings and understand vectorization.
* Refer to the link for more information about the embeddings.
 * https://cloud.google.com/vertex-ai/generative-ai/docs/model-reference/text-embeddings
 * https://cloud.google.com/vertex-ai/generative-ai/docs/model-reference/multimodal-embeddings

# Configuration
## Install python packages
* Vertex AI SDK for Python
  * https://cloud.google.com/python/docs/reference/aiplatform/latest

In [1]:
%pip install --upgrade --quiet google-cloud-aiplatform

[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/6.3 MB[0m [31m?[0m eta [36m-:--:--[0m[2K   [91m━━[0m[90m╺[0m[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.4/6.3 MB[0m [31m12.1 MB/s[0m eta [36m0:00:01[0m[2K   [91m━━━━━━━━━━━━━━━━━━━━━[0m[90m╺[0m[90m━━━━━━━━━━━━━━━━━━[0m [32m3.3/6.3 MB[0m [31m47.5 MB/s[0m eta [36m0:00:01[0m[2K   [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[91m╸[0m [32m6.3/6.3 MB[0m [31m69.4 MB/s[0m eta [36m0:00:01[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m6.3/6.3 MB[0m [31m49.8 MB/s[0m eta [36m0:00:00[0m
[?25h

## Authentication to access to the GCP & Google drive

* Use OAuth to access the GCP environment.
 * Refer to the authentication methods in GCP : https://cloud.google.com/docs/authentication?hl=ko

In [2]:
import sys
from IPython.display import Markdown, display

if "google.colab" in sys.modules:
    from google.colab import auth
    auth.authenticate_user(project_id="ai-hangsik")

!gcloud config set project ai-hangsik

Updated property [core/project].


# Set the environment on GCP Project
* Configure project information
  * Model name : LLM model name : https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models
  * Project Id : prodect id in GCP
  * Region : region name in GCP

In [3]:
MODEL_NAME="gemini-1.5-flash"
PROJECT_ID="ai-hangsik"
REGION="asia-northeast3"

### Vertex AI initialization
Configure Vertex AI and access to the foundation model.
* Vertex AI initialization : aiplatform.init(..)
  * https://cloud.google.com/python/docs/reference/aiplatform/latest#initialization

In [4]:
from vertexai.generative_models import (
    GenerationConfig,
    GenerationResponse,
    GenerativeModel,
    HarmBlockThreshold,
    HarmCategory,
    Part,
    Tool
)

import vertexai
from vertexai.preview.generative_models import grounding

vertexai.init(project=PROJECT_ID, location=REGION)
model = GenerativeModel(MODEL_NAME)

## Functions to evaluate similarity

## Multimodal embedding

* https://cloud.google.com/vertex-ai/generative-ai/docs/model-reference/multimodal-embeddings

In [61]:
import vertexai
from vertexai.vision_models import Image, MultiModalEmbeddingModel

def get_gecko_embeddings(image_path, contextual_text):

  model = MultiModalEmbeddingModel.from_pretrained("multimodalembedding")
  image = Image.load_from_file(image_path)

  embeddings = model.get_embeddings(
      image=image,
      contextual_text=contextual_text,
  )
  print(f"Image Embedding: {embeddings.image_embedding}")
  print(f"Text Embedding: {embeddings.text_embedding}")

  return embeddings


In [59]:
from google.colab import files
from IPython.display import Image
uploaded = files.upload()

Saving cha3.jpeg to cha3.jpeg


In [29]:
image_path = "./cha.jpg"
contextual_text = "차승원"

embeddings1 = get_gecko_embeddings(image_path, contextual_text )

Image Embedding: [-0.0110583, 0.0260190386, -0.0109926891, -0.010400584, 0.00143542932, -0.0106043266, 0.00768703455, -0.00869302079, -0.0264202468, 0.00124640216, -0.032614775, 0.0205467436, -0.0501487553, 0.0190443434, 0.00590475695, -0.0321571454, 0.0566952452, -0.011616814, -0.00460632099, -0.017692117, -0.0489736, -0.00130457059, 0.0162397418, -0.0411570668, 0.0322006494, -0.00525356317, 0.0134333754, -0.00314690196, 0.0245450791, -0.00548022939, 0.00842030346, -0.0164810866, -0.0128317829, -0.0222177058, -0.0112460665, 0.00541418232, 0.00370099279, -0.0368631892, 0.0246526897, -0.0142671373, 0.0390058234, 0.00120316446, -0.00382687873, -0.0627598912, 0.0122671574, -0.0188629571, -0.0420732535, -1.13300748e-05, -0.0083430158, 0.0155079933, 0.0109181162, 0.00202469551, 0.00574951293, 0.00337651069, -0.0283353589, -0.0150975417, 0.0112142712, -0.00640941365, -0.0245240983, -0.0241362918, 0.0263236985, -0.00647141552, -0.0209921282, -0.00690630032, 0.00418380788, 0.00822836, -0.00016

In [30]:
image_path = "./cha2.png"
contextual_text = "차승원"

embeddings2 = get_gecko_embeddings(image_path, contextual_text )

Image Embedding: [-0.0357608, 0.0620567501, -0.0352320224, 0.010782294, 0.0140453689, 0.00573695498, 0.000760450901, -0.026676489, -0.0279798, 0.0400953107, 0.0207833219, 0.0366143063, -0.0223037191, -0.0432168804, -0.00556320744, -0.0470141, 0.0367084742, 0.0164120886, 0.0147938142, -0.000236946275, -0.0676350072, 0.0220765509, 0.00983047299, -0.0129327551, -0.0148069924, -0.0170390494, 0.00850488525, -0.00605214806, -0.00334944506, 0.0063216812, 0.0139411911, 0.0269594025, -0.0113042695, 0.0125942519, 0.00426169671, 0.0138557199, 0.0125776622, -0.046911668, 0.0237267073, -0.0281051919, 0.00724841189, -0.00837476924, -0.0030613956, -0.0310167987, 0.0119006978, 0.000947111694, -0.025796501, -0.0234870259, 0.0230491664, 0.00506005716, 0.0226945803, -0.0239044521, 0.00195974, -0.00252949609, -0.0183905717, -0.00495094294, -0.0162835028, -0.00991679169, -0.00475419499, -0.0274578314, 0.00957208499, -0.00644598715, 0.00350414542, -0.0105296774, 0.00766746327, 0.0180227403, 0.0358074978, 0.

In [56]:
image_path = "./gd.jpg"
contextual_text = "권지용"

embeddings3 = get_gecko_embeddings(image_path, contextual_text )

Image Embedding: [0.0289522, 0.0307700243, 0.00293883774, -0.00598485628, -0.01058874, -0.0384576134, 0.00260921568, 0.00280767633, -0.0108984048, 0.023325704, -0.0106139006, 0.0294637941, -0.0126742916, -0.0379564688, -0.0298879314, -0.0290131755, 0.00592393242, 0.0232289415, 0.0111784078, 0.0171745066, -0.0464103557, 0.0345826596, -0.0348713882, -0.0285716, 0.00452556182, -0.0236448627, -0.000657603261, 0.00298778852, -0.0109141422, 0.0213508885, 0.0225828029, 0.0176140033, -0.0225178786, 0.0271262024, 0.00761678629, -0.00158481789, -0.00734545896, 0.00561617641, 0.0122945374, -0.0504351035, 0.0336094387, -0.0135374721, 0.0130743347, -0.0024550024, -6.97957075e-05, -0.0272967853, -0.0216262508, -0.0252094064, -0.0332851522, 0.00997487921, -0.01681423, -0.0417077206, -0.00718177203, -0.00482757669, 0.0112738209, -0.0200119056, 0.0063747433, -0.0118828882, -0.00551579427, -0.0321098901, -0.00444124313, -0.00820281357, 0.0163185932, 0.0066119181, -0.0117043406, 0.0109439045, -0.00015075

In [62]:
image_path = "./cha3.jpeg"
contextual_text = "차승원"

embeddings4 = get_gecko_embeddings(image_path, contextual_text )

Image Embedding: [0.0242623296, 0.0507314876, -0.00474032294, -0.0333081409, 0.017795708, -0.0197478589, -0.00269838376, -0.00167833036, 0.00237768143, 0.00813251082, -0.0141243, 0.00596956676, -0.0247591399, 0.11032638, 0.0197641626, -0.0483233035, 0.0575691052, 0.00704401219, 0.00550321816, -0.0107939783, -0.0281235762, 0.00182896992, -0.0152599337, 0.0124402968, -0.0287194829, -0.0187914185, -0.000445483718, -0.0195077658, 0.00701737264, -0.00441741152, -0.00973840524, 0.01818363, 0.00393992104, -0.00556177692, -0.0103643276, 0.0209067874, -0.0318217129, 0.0136300717, 0.0304532386, -0.00310236891, 0.0258220527, 0.0026801778, 0.023931019, -0.0425943546, -0.025125226, -0.0366544612, -0.0420950763, -0.0032448417, 0.00410705432, 0.00894600153, 0.00251550786, -0.00209248392, 0.0382484123, 0.00975345727, 0.00456762407, -0.0245184954, 0.0314035155, -0.00226430385, 0.00530383736, -0.0246123523, -0.0243046619, -0.0110614654, -0.00376962032, -0.0131111378, 0.00434930669, -0.0153157515, -0.009

In [57]:
import numpy as np
import torch
import torch.nn as nn

""" torch matrix multiplication """
def cal_mm(a, b):
  if len(a.shape) == 1: a = a.unsqueeze(0)
  if len(b.shape) == 1: b = b.unsqueeze(0)

  a_norm = a / a.norm(dim=1)[:, None]
  b_norm = b / b.norm(dim=1)[:, None]
  return torch.mm(a_norm, b_norm.transpose(0, 1)) * 100

""" torch cosine similarity """
def cal_cosine(a, b):
  cos = nn.CosineSimilarity(dim=1, eps=1e-6)
  output = cos(a, b)
  return output

""" torch dot product similarity """
def cal_dot_product(a, b):
  a1= a[0]
  b1= b[0]
  return torch.dot(a1, b1)

In [63]:
embeddings1.image_embedding

tensor_embed1 = torch.Tensor(embeddings1.image_embedding)
tensor_embed2 = torch.Tensor(embeddings2.image_embedding)
tensor_embed3 = torch.Tensor(embeddings3.image_embedding)
tensor_embed4 = torch.Tensor(embeddings4.image_embedding)

print(f"mm_score1 : {cal_mm(tensor_embed1,tensor_embed1 )}")
print(f"mm_score2 : {cal_mm(tensor_embed1,tensor_embed2 )}")
print(f"mm_score3 : {cal_mm(tensor_embed1,tensor_embed3 )}")
print(f"mm_score4 : {cal_mm(tensor_embed1,tensor_embed4 )}")


mm_score1 : tensor([[100.]])
mm_score2 : tensor([[54.9205]])
mm_score3 : tensor([[39.9327]])
mm_score4 : tensor([[46.3895]])
