In [None]:
# Copyright 2024 Forusone(shins777@gmail.com)
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# Text Embedding Gecko (Multilingual)
* Refer to the link for more information about the embeddings.
* https://cloud.google.com/vertex-ai/generative-ai/docs/model-reference/text-embeddings
* https://cloud.google.com/vertex-ai/generative-ai/docs/model-reference/multimodal-embeddings

## Set configuration

### Package Install
* [google-cloud-aiplatform](https://cloud.google.com/python/docs/reference/aiplatform/latest)

In [None]:
%pip install --upgrade --user --quiet google-cloud-aiplatform

### Authentication to access to GCP
* Only for Colab in Google Drive
* No need to do this process if in Colab Enteprise on Vertex AI.
* Refer to the [authentication ways](https://cloud.google.com/docs/authentication?hl=ko) in GCP

In [None]:
# To use markdown for output data from LLM
from IPython.display import display, Markdown

# Use OAuth to access the GCP environment.
import sys
if "google.colab" in sys.modules:
    from google.colab import auth
    auth.authenticate_user()

## Lab Execution

### Define constants

In [None]:
PROJECT_ID = "ai-hangsik"  # @param {type:"string"}
LOCATION = "us-central1"  # @param {type:"string"}
MODEL_NAME = "gemini-1.5-flash-002" # @param {type:"string"}

### Import libraries

In [None]:
import vertexai

from vertexai.generative_models import (
    GenerationConfig,
    GenerativeModel,
    HarmBlockThreshold,
    HarmCategory,
    GenerationResponse,
    Tool,
    Part,
    ChatSession,
)

from vertexai.preview.generative_models import grounding
from vertexai.preview.language_models import TextEmbeddingModel

### Initalize Vertex AI

In [None]:
# https://cloud.google.com/python/docs/reference/aiplatform/latest#initialization
vertexai.init(project=PROJECT_ID, location=LOCATION)

# https://cloud.google.com/vertex-ai/generative-ai/docs/reference/python/latest/vertexai.generative_models.GenerativeModel
model = GenerativeModel(MODEL_NAME)

### Helper function

In [None]:
import numpy as np
import torch
import torch.nn as nn
from typing import List, Optional
from vertexai.vision_models import Image, MultiModalEmbeddingModel

#------------------------------------------------------------
def get_gecko_embeddings(image_path, contextual_text):

  model = MultiModalEmbeddingModel.from_pretrained("multimodalembedding")
  image = Image.load_from_file(image_path)

  embeddings = model.get_embeddings(
      image=image,
      contextual_text=contextual_text,
  )
  print(f"Image Embedding: {embeddings.image_embedding}")
  print(f"Text Embedding: {embeddings.text_embedding}")

  return embeddings

#------------------------------------------------------------
def dot_product(a, b):
  """ torch dot product similarity """

  return torch.dot(a, b)

#------------------------------------------------------------
def cosine_similarity(a, b):
  """ torch cosine similarity """

  cos = nn.CosineSimilarity(dim=0, eps=1e-6)
  output = cos(a, b)
  return output

#------------------------------------------------------------
def matrix_multiplication(a, b):
  """ torch matrix multiplication """

  if len(a.shape) == 1: a = a.unsqueeze(0)
  if len(b.shape) == 1: b = b.unsqueeze(0)

  a_norm = a / a.norm(dim=1)[:, None]
  b_norm = b / b.norm(dim=1)[:, None]
  return torch.mm(a_norm, b_norm.transpose(0, 1)) * 100

#------------------------------------------------------------
def print_similarity(input_embed, target_embed):
  import torch

  score0 = dot_product(torch.Tensor(input_embed),torch.Tensor(target_embed) )


  print(f"score0[{score0}]")



### Image Upload

In [None]:
from google.colab import files
from IPython.display import Image
uploaded = files.upload()

### Embedding

In [None]:
input = get_gecko_embeddings("./cha.jpg", "차승원" )

cha_1 = get_gecko_embeddings("./cha1.jpg", "차승원" )
cha_2 = get_gecko_embeddings("./cha2.png", "차승원" )
cha_3 = get_gecko_embeddings("./cha3.jpeg", "차승원" )
cha_4 = get_gecko_embeddings("./cha4.jpg", "차승원" )
cha_5 = get_gecko_embeddings("./cha5.jpg", "차승원" )
cha_6 = get_gecko_embeddings("./cha6.jpg", "차승원" )
cha_7 = get_gecko_embeddings("./cha7.jpg", "차승원" )

In [None]:

print_similarity(cha_1.image_embedding, cha_1.image_embedding )
print_similarity(cha_1.image_embedding, cha_2.image_embedding )
print_similarity(cha_1.image_embedding, cha_3.image_embedding )
print_similarity(cha_1.image_embedding, cha_4.image_embedding )
print_similarity(cha_1.image_embedding, cha_5.image_embedding )
print_similarity(cha_1.image_embedding, cha_6.image_embedding )
print_similarity(cha_1.image_embedding, cha_7.image_embedding )


In [None]:
others_1 = get_gecko_embeddings("./jung.jpeg", "정우성" )
others_2 = get_gecko_embeddings("./lee.png", "이경규" )
others_3 = get_gecko_embeddings("./um.jpg", "엄태구" )

In [None]:
print_similarity(cha_1.image_embedding, others_1.image_embedding )
print_similarity(cha_1.image_embedding, others_2.image_embedding )
print_similarity(cha_1.image_embedding, others_3.image_embedding )