## Setup and Import Libraries

In [None]:
from langchain_huggingface import HuggingFaceEmbeddings

import warnings
warnings.filterwarnings('ignore')

## Huggingface Embeddings

In [2]:
embeddings = HuggingFaceEmbeddings(
    model_name="sentence-transformers/all-MiniLM-L6-v2"
)

embeddings

HuggingFaceEmbeddings(model_name='sentence-transformers/all-MiniLM-L6-v2', cache_folder=None, model_kwargs={}, encode_kwargs={}, query_encode_kwargs={}, multi_process=False, show_progress=False)

In [3]:
text = "Hello, I am learning about Embeddings!"

text_embedding = embeddings.embed_query(text=text)

print(f"Text: {text}")
print(f"Embedding Length: {len(text_embedding)}")
print(text_embedding)

Text: Hello, I am learning about Embeddings!
Embedding Length: 384
[-0.018163328990340233, -0.09955169260501862, 0.013816121965646744, -0.008125878870487213, 0.01415223628282547, 0.06406482309103012, -0.0062533351592719555, -0.003017911920323968, 0.02528717927634716, -0.020198646932840347, 0.02432970143854618, 0.07435058802366257, 0.051177170127630234, 0.022038454189896584, -0.058306146413087845, 0.015268314629793167, 0.023584412410855293, 0.09455394744873047, -0.06508847326040268, 0.013296643272042274, -0.02049759030342102, -0.05690853297710419, 0.03030337020754814, -0.08365615457296371, 0.026596330106258392, -0.015231478959321976, -0.043615393340587616, 0.05398396775126457, 0.0902571752667427, -0.08893876522779465, 0.03964461013674736, -0.008834999985992908, -0.030343735590577126, 0.07425572723150253, -0.054099176079034805, 0.11108004301786423, 0.03689989075064659, -0.008959817700088024, -0.06140238791704178, -0.00314342207275331, 0.02195824310183525, 0.04220819100737572, -0.02893788

In [4]:
sentences = [
    "The cat sat on a mat",
    "A feline rested on the rug",
    "The dog played in the yard",
    "I love programming in Python",
    "Python is my favourite programming language"
]

sentence_embedding = embeddings.embed_documents(texts=sentences)

print(sentence_embedding[0])
print(sentence_embedding[1])

[0.12403588742017746, -0.019651127979159355, -0.02453743666410446, 0.05351785570383072, -0.06559471040964127, 0.020825592800974846, 0.026947803795337677, 0.027260996401309967, -0.027275962755084038, 0.05574776977300644, -0.03161691501736641, 0.05673734471201897, 0.04232770577073097, 0.04341622442007065, -0.06020624563097954, -0.05678749829530716, -0.06002932786941528, -0.029868002980947495, 0.06268008053302765, 0.04571900889277458, -0.031081920489668846, -0.0015217073960229754, 0.03421218693256378, -0.05939732491970062, -0.056173283606767654, 0.07893767952919006, -0.025612488389015198, -0.048605721443891525, 0.03694688156247139, -0.0041404408402740955, -0.07109127193689346, -0.002202989999204874, -0.02952180802822113, 0.056354209780693054, 0.018776869401335716, -0.09552905708551407, 0.00223659910261631, -0.06496160477399826, 0.035332899540662766, 0.025230571627616882, 0.07101701945066452, -0.010285301133990288, -0.00324084865860641, -0.06567946076393127, 0.03739187493920326, 0.01491645

## Popular Model Comparison

In [5]:
models = {
    "all-MiniLM-L6-v2": {
        "size": 384,
        "description": "Fast and efficient, good quality",
        "use_case": "General purpose, real-time applications"
    },
    "all-mpnet-base-v2": {
        "size": 768,
        "description": "Best quality, slower than MiniLM",
        "use_case": "When quality matters more than speed"
    },
    "all-MiniLM-L12-v2": {
        "size": 384,
        "description": "Slightly better than L6, bit slower",
        "use_case": "Good balance of speed and quality"
    },
    "multi-qa-MiniLM-L6-cos-v1": {
        "size": 384,
        "description": "Optimized for question-answering",
        "use_case": "Q&A systems, semantic search"
    },
    "paraphrase-multilingual-MiniLM-L12-v2": {
        "size": 384,
        "description": "Supports 50+ languages",
        "use_case": "Multilingual applications"
    }
}

print("📊 Popular Open Source Embedding Models:\n")
for model_name, info in models.items():
    print(f"Model: sentence-transformers/{model_name}")
    print(f"  📏 Embedding size: {info['size']} dimensions")
    print(f"  📝 Description: {info['description']}")
    print(f"  🎯 Use case: {info['use_case']}\n")


📊 Popular Open Source Embedding Models:

Model: sentence-transformers/all-MiniLM-L6-v2
  📏 Embedding size: 384 dimensions
  📝 Description: Fast and efficient, good quality
  🎯 Use case: General purpose, real-time applications

Model: sentence-transformers/all-mpnet-base-v2
  📏 Embedding size: 768 dimensions
  📝 Description: Best quality, slower than MiniLM
  🎯 Use case: When quality matters more than speed

Model: sentence-transformers/all-MiniLM-L12-v2
  📏 Embedding size: 384 dimensions
  📝 Description: Slightly better than L6, bit slower
  🎯 Use case: Good balance of speed and quality

Model: sentence-transformers/multi-qa-MiniLM-L6-cos-v1
  📏 Embedding size: 384 dimensions
  📝 Description: Optimized for question-answering
  🎯 Use case: Q&A systems, semantic search

Model: sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2
  📏 Embedding size: 384 dimensions
  📝 Description: Supports 50+ languages
  🎯 Use case: Multilingual applications

