## Setup and Import Libraries

In [None]:
import os
from langchain_openai import OpenAIEmbeddings
from dotenv import load_dotenv

import warnings
warnings.filterwarnings('ignore')

In [2]:
load_dotenv()

True

In [3]:
os.environ["OPENAI_API_KEY"] = os.getenv("OPENAI_API_KEY")

In [4]:
embeddings = OpenAIEmbeddings(model="text-embedding-3-small")

## OpenAI Embeddings

In [5]:
text = "Hello, I am learning about Embeddings!"

text_embedding = embeddings.embed_query(text=text)

print(f"Text: {text}")
print(f"Embedding Length: {len(text_embedding)}")
print(text_embedding)

Text: Hello, I am learning about Embeddings!
Embedding Length: 1536
[-0.0029620041605085135, -0.04828907549381256, -0.0015035830438137054, 0.005989415105432272, -0.03774920105934143, -0.003242319915443659, 0.034310657531023026, 0.027558162808418274, -0.025913642719388008, 0.023608822375535965, 0.012869611382484436, -0.02848009020090103, -0.015573102049529552, -0.06009971722960472, 0.006092197727411985, 0.019497524946928024, -0.010683148168027401, 0.034734249114990234, -0.006107770837843418, 0.058455195277929306, 0.024032412096858025, -0.03615451604127884, -0.0041735912673175335, 0.06164456903934479, 0.015535727143287659, -0.023795699700713158, 0.039991725236177444, 0.06498344242572784, 0.019111311063170433, -0.006279075052589178, 0.014003333635628223, -0.02897842973470688, -0.009779908694326878, -0.01595931500196457, -0.012807318940758705, 0.010820191353559494, -0.050755854696035385, 0.05262462794780731, -0.04298176243901253, 0.039916977286338806, 0.023509155958890915, -0.0165946986526

In [6]:
text = "Langchain and RAG are amazing framework and projects to work on"

text_embedding = embeddings.embed_query(text=text)

print(f"Text: {text}")
print(f"Embedding Length: {len(text_embedding)}")
print(text_embedding)

Text: Langchain and RAG are amazing framework and projects to work on
Embedding Length: 1536
[-0.04561923071742058, -0.023481694981455803, 0.009566870518028736, -0.0023111365735530853, 0.014182286337018013, -0.029708733782172203, -0.00020981108536943793, -0.00370330479927361, -0.011260789819061756, -0.03105289675295353, 0.015677323564887047, 0.009669740684330463, -0.039913397282361984, 0.04424763470888138, 0.005585818085819483, -0.014388024806976318, -4.714855822385289e-05, -0.05384879559278488, 0.02852916345000267, 0.07516337186098099, -0.0214243046939373, -0.002863203175365925, -0.00856560654938221, 0.040818650275468826, -0.017967887222766876, -0.013455340638756752, -0.003398125059902668, 0.06989645212888718, 0.012639242224395275, -0.026938114315271378, 0.0016287682810798287, -0.03568888455629349, 0.00595271959900856, 0.023852026090025902, 0.006631658878177404, 0.026910681277513504, -0.0022717032115906477, -0.011288221925497055, -0.012646100483834743, 0.020080141723155975, 0.02080708

In [7]:
sentences = [
    "The cat sat on a mat",
    "A feline rested on the rug",
    "The dog played in the yard",
    "I love programming in Python",
    "Python is my favourite programming language"
]

sentence_embedding = embeddings.embed_documents(texts=sentences)

print(sentence_embedding[0])
print(sentence_embedding[1])

[-0.02105872519314289, -0.047883372753858566, 0.003616806585341692, 0.009264790453016758, 0.03640394285321236, -0.008622676134109497, -0.00667012482881546, 0.03168636932969093, 0.018254389986395836, -0.008720959536731243, 0.036430153995752335, -0.004278577398508787, 0.04743782430887222, 0.055405281484127045, 0.02799093723297119, 0.03367823362350464, 0.003390755970031023, 0.0018116793362423778, -0.06777580827474594, 0.04890551418066025, 0.03014005348086357, -0.043060965836048126, -0.0014119959669187665, 0.011807038448750973, 0.00273389951325953, 0.020613176748156548, -0.01546315848827362, -0.01550247147679329, 0.006424418184906244, 0.011341833509504795, 0.01600043661892414, -0.035853561013936996, -0.026824647560715675, -0.05129050835967064, -0.037819214165210724, -0.0003534085117280483, -0.016354255378246307, -0.012580196373164654, -0.04277266934514046, -0.024426547810435295, -0.03491004556417465, 0.0010049415286630392, 0.024990035220980644, 0.014912774786353111, 0.025553524494171143, -

## Different OpenAI Embedding Models

In [8]:
models_comparison = {
    "text-embedding-3-small": {
        "dimensions": 1536,
        "description": "Good balance of performance and cost",
        "cost_per_1m_tokens": 0.02,
        "use_case": "General purpose, cost-effective"
    },
    "text-embedding-3-large": {
        "dimensions": 3072,
        "description": "Highest quality embeddings",
        "cost_per_1m_tokens": 0.13,
        "use_case": "When accuracy is critical"
    },
    "text-embedding-ada-002": {
        "dimensions": 1536,
        "description": "Previous generation model",
        "cost_per_1m_tokens": 0.10,
        "use_case": "Legacy applications"
    }
}

# Display comparison
print("📊 OpenAI Embedding Models Comparison:\n")
for model_name, details in models_comparison.items():
    print(f"Model: {model_name}")
    print(f"  📏 Dimensions: {details['dimensions']}")
    print(f"  💰 Cost: ${details['cost_per_1m_tokens']}/1M tokens")
    print(f"  📝 Description: {details['description']}")
    print(f"  🎯 Use case: {details['use_case']}\n")

📊 OpenAI Embedding Models Comparison:

Model: text-embedding-3-small
  📏 Dimensions: 1536
  💰 Cost: $0.02/1M tokens
  📝 Description: Good balance of performance and cost
  🎯 Use case: General purpose, cost-effective

Model: text-embedding-3-large
  📏 Dimensions: 3072
  💰 Cost: $0.13/1M tokens
  📝 Description: Highest quality embeddings
  🎯 Use case: When accuracy is critical

Model: text-embedding-ada-002
  📏 Dimensions: 1536
  💰 Cost: $0.1/1M tokens
  📝 Description: Previous generation model
  🎯 Use case: Legacy applications

