### [Embeddings](https://python.langchain.com/docs/integrations/text_embedding/) :
Converting text into vectors

- OpenAI : https://python.langchain.com/docs/integrations/text_embedding/openai/
- Ollama : https://python.langchain.com/docs/integrations/text_embedding/ollama/
- Huggingface : https://python.langchain.com/docs/integrations/text_embedding/huggingfacehub/
- Google Generative AI Embeddings : https://python.langchain.com/docs/integrations/text_embedding/google_generative_ai/
- Google Vertex AI Embeddings : https://python.langchain.com/v0.2/docs/integrations/text_embedding/google_vertex_ai_palm/

#### Google Generative AI Embeddings

In [2]:
from dotenv import load_dotenv
import os

# Load environment variables from .env file
load_dotenv()

# Get the API key
GENERATIVE_AI_API_KEY = os.getenv('GENERATIVE_AI_API_KEY')

from langchain_google_genai import GoogleGenerativeAIEmbeddings
embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001", google_api_key=GENERATIVE_AI_API_KEY)
vector = embeddings.embed_query("hello, world!")
vector

[0.05168594419956207,
 -0.030764883384108543,
 -0.03062233328819275,
 -0.02802734449505806,
 0.01813092641532421,
 -0.0018945294432342052,
 0.028477225452661514,
 -0.007562295068055391,
 0.011064722202718258,
 -0.005353892687708139,
 0.03884561359882355,
 0.05086333677172661,
 -0.008304028771817684,
 -0.06084785982966423,
 -0.0065867058001458645,
 -0.022183923050761223,
 0.013262644410133362,
 -0.008496992290019989,
 -0.0006867790361866355,
 -0.0032968695741146803,
 -0.0034330044873058796,
 0.009621595032513142,
 -0.02791913039982319,
 -0.030381333082914352,
 0.021061236038804054,
 0.011688413098454475,
 -0.000953729497268796,
 -0.07332165539264679,
 0.012589260004460812,
 0.05902665853500366,
 -0.035337094217538834,
 0.01709485612809658,
 -0.054261308163404465,
 0.006611740216612816,
 0.03894328698515892,
 -0.05709062144160271,
 0.03994591161608696,
 0.00969378650188446,
 -0.0018339328235015273,
 -4.905527748633176e-05,
 0.024423865601420403,
 -0.0919656902551651,
 -0.0446292273700237

In [3]:
# Using Service Account key

from google.oauth2 import service_account
from langchain_google_genai import GoogleGenerativeAIEmbeddings

# Load the service account JSON key
service_account_credentials = service_account.Credentials.from_service_account_file('cred/key.json')

embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001", credentials=service_account_credentials)
vector = embeddings.embed_query("Connect to Google's generative AI embeddings service using the GoogleGenerativeAIEmbeddings class, found in the langchain-google-genai package.")
vector

[0.020311782136559486,
 -0.03246069699525833,
 0.016909142956137657,
 0.023249264806509018,
 0.02866043895483017,
 0.0005120245041325688,
 0.07369165867567062,
 -0.018536455929279327,
 0.05439412593841553,
 0.010940732434391975,
 -0.009900876320898533,
 -0.0445476695895195,
 -0.028169365599751472,
 -0.04524508863687515,
 0.004226373974233866,
 -0.0709991306066513,
 0.03207865357398987,
 -0.030102146789431572,
 0.017204077914357185,
 -0.01215374656021595,
 0.03888555243611336,
 0.011622089892625809,
 0.03903420269489288,
 -0.03630085289478302,
 -0.005504735745489597,
 -0.03828619047999382,
 0.015282387845218182,
 -0.01435568742454052,
 -0.008106876164674759,
 0.02765306644141674,
 -0.037148743867874146,
 0.024654686450958252,
 -0.016573915258049965,
 0.004295009188354015,
 -0.0031221983954310417,
 -0.017577221617102623,
 0.007454912178218365,
 -0.026190873235464096,
 0.014728325419127941,
 -0.019390925765037537,
 0.03636304661631584,
 -0.07101158797740936,
 -0.010387644171714783,
 -0.02

In [4]:
vectors = embeddings.embed_documents(
    [
        "Today is Monday",
        "Today is Tuesday",
        "Today is April Fools day",
    ]
)
len(vectors), len(vectors[0])

(3, 768)

In [26]:
# Load data :
from langchain_community.document_loaders import TextLoader
docLoad = TextLoader("data/speech.txt").load()

# split the data :
from langchain_text_splitters import RecursiveCharacterTextSplitter
textSPlitterOperation = RecursiveCharacterTextSplitter(chunk_size=100, chunk_overlap=20)
finalDocments = textSPlitterOperation.split_documents(docLoad)

# Vector Embedding and Vector StoreDB (here'll use ChromaDB)
embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001", credentials=service_account_credentials)

from langchain_community.vectorstores import Chroma
db = Chroma.from_documents(finalDocments, embeddings)
db

<langchain_community.vectorstores.chroma.Chroma at 0x7f697ccde0b0>

In [27]:
query = "models capable of creating new content"
retrive_results = db.similarity_search(query)
print(retrive_results)

[Document(metadata={'source': 'data/speech.txt'}, page_content='new content, such as text, images, music, and more, by learning patterns from existing data. These'), Document(metadata={'source': 'data/speech.txt'}, page_content='new content, such as text, images, music, and more, by learning patterns from existing data. These'), Document(metadata={'source': 'data/speech.txt'}, page_content='code. By learning from vast datasets, these models can generate original works that are'), Document(metadata={'source': 'data/speech.txt'}, page_content='data. These models, like GPT-4, can generate human-like text, create realistic images, and even')]
