### Import required Libraries  
> Sentence Transformers provide multiple Embedding models  
> Google Gemini provides Embedding models as well

In [8]:
# Sentence transformers to use the embedding models locally
from sentence_transformers import SentenceTransformer, util
import pandas as pd

# Google AI library
from google import genai
from google.genai import types

# Load Environment variables from file
from dotenv import load_dotenv

# Initialise an client object with API key
load_dotenv ()
client = genai.Client()

Both GOOGLE_API_KEY and GEMINI_API_KEY are set. Using GOOGLE_API_KEY.


#### Google Gemini Embedder
Google Gemini provides embedding models that can be used through API  
They are heavy models which are resource intensive

In [9]:
Text = "Embeddings are a mechanism to vectorise the text data"

# invoke API to get embedding
result = client.models.embed_content(
        model="gemini-embedding-001",
        contents=Text,
        )

# Check Dimension and vector content
print("Vector Dimension : ", len(result.embeddings[0].values))
print(result.embeddings)

Vector Dimension :  3072
[ContentEmbedding(
  values=[
    0.003665868,
    0.02415171,
    0.008096882,
    -0.055476468,
    -0.0024439981,
    <... 3067 more items ...>,
  ]
)]


**Adapt the Embeddings**  
Gemini API provides possibility to get the embedding vector at pre-defined dimensions  
Also vector can be generated for the purpose of usage  

In [10]:
# invoke API to get embedding for a dimension
result = client.models.embed_content(
        model="gemini-embedding-001",
        contents=Text,
        config=types.EmbedContentConfig(output_dimensionality=768)
        )

# Check Dimension and vector content
print("Vector Dimension : ", len(result.embeddings[0].values))
print(result.embeddings)

Vector Dimension :  768
[ContentEmbedding(
  values=[
    0.003665868,
    0.02415171,
    0.008096882,
    -0.055476468,
    -0.0024439981,
    <... 763 more items ...>,
  ]
)]


In [12]:
# invoke API to get embedding for specific task
result = client.models.embed_content(
        model="gemini-embedding-001",
        contents=Text,
        config=types.EmbedContentConfig(output_dimensionality=768, 
                                        # task_type="SEMANTIC_SIMILARITY",
                                        task_type="CLASSIFICATION",
                                        )
        )

# Check Dimension and vector content
print("Vector Dimension : ", len(result.embeddings[0].values))
print(result.embeddings)

Vector Dimension :  768
[ContentEmbedding(
  values=[
    0.0012605385,
    0.025236906,
    0.0028632479,
    -0.054441065,
    0.00060710014,
    <... 763 more items ...>,
  ]
)]


#### Embedding Models from Sentence Transformers
Sentence Transformers provide various embedding models that are different in terms of dimension, trainning data set  
These models can be used locally by python library caching for local inference  
Details can be found in Hugging face portal

In [13]:
# Identify few embedders from HF
Embedders  = ["nomic-ai/nomic-embed-text-v1.5",
              "sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2",
              "BAAI/bge-m3"]

Vectors = {}

# Convert the text into vector using each embedder
for Emb in Embedders :

    # initialise the model
    model = SentenceTransformer (Emb, trust_remote_code=True)

    # encode (vectorise) the same sentence
    vec = model.encode (Text)

    Vectors[Emb] = {'Dimension' : vec.size,'vector' : vec}

Dim = {m : d['Dimension'] for m, d  in Vectors.items()}
Vec = {m : v['vector'] for m, v  in Vectors.items()}

A new version of the following files was downloaded from https://huggingface.co/nomic-ai/nomic-bert-2048:
- configuration_hf_nomic_bert.py
. Make sure to double-check they do not contain any added malicious code. To avoid downloading new versions of the code file, you can pin a revision.
Encountered exception while importing einops: No module named 'einops'


ImportError: This modeling file requires the following packages that were not found in your environment: einops. Run `pip install einops`

In [None]:
# Check Dimension and vectors from different models for same text
print (Dim)
Vec