# Langchain Embeddings

In [1]:
# install langchain
!pip install langchain_core langchain_openai numpy



In [2]:
from dotenv import load_dotenv

load_dotenv()

True

In [3]:
# Create embedding model
import os
from langchain_openai import OpenAIEmbeddings
embeddings = OpenAIEmbeddings(
    model="text-embedding-3-small",
    base_url=os.getenv("AZURE_OPENAI_ENDPOINT"),
    api_key=os.getenv("AZURE_OPENAI_KEY")
)

# Indexing and Retrieval

In [4]:
from langchain_core.vectorstores import InMemoryVectorStore

text = "LangChain is the framework for building context-aware reasoning applications"

vectorstore = InMemoryVectorStore.from_texts(
    [text],
    embedding=embeddings
)

# Use the vectorstore as a retriever
retriever = vectorstore.as_retriever()

In [5]:
retrieved_documents = retriever.invoke("What is Langchain?")

In [9]:
retrieved_documents[0].__dict__

{'id': 'c0f6760e-ae10-41ff-a28b-6a1a1af611d0',
 'metadata': {},
 'page_content': 'LangChain is the framework for building context-aware reasoning applications',
 'type': 'Document'}

# Embedding texts

In [10]:
# Single text

text = "Hi my name is Rithick, I am 25 years old."

single_vector = embeddings.embed_query(text)

In [14]:
print("Length:", len(single_vector))

single_vector

Length: 1536


[0.02393210120499134,
 -0.009793475270271301,
 -0.02285144291818142,
 -0.019451867789030075,
 -0.013181792572140694,
 -0.02245745249092579,
 -0.04243839159607887,
 0.011504518799483776,
 -0.07758233696222305,
 -0.055023569613695145,
 0.009512053802609444,
 -0.05169153958559036,
 -0.027804464101791382,
 -0.04077237471938133,
 0.042258281260728836,
 -0.0173806045204401,
 -0.03577432781457901,
 -0.003233535448089242,
 -0.04340648278594017,
 -0.003444601548835635,
 0.06920722126960754,
 -0.01942935399711132,
 0.02602587826550007,
 0.025530576705932617,
 0.03102392889559269,
 -0.020082252100110054,
 0.030663708224892616,
 -0.023616909980773926,
 0.015748359262943268,
 -0.049395136535167694,
 -0.003990559838712215,
 -0.020982801914215088,
 0.0022879585158079863,
 -0.034130822867155075,
 0.019541922956705093,
 0.008386367000639439,
 -0.002553901867941022,
 -0.0401194766163826,
 -0.008853526785969734,
 -0.0376654788851738,
 0.017493173480033875,
 -0.02505778893828392,
 0.0381832979619503,
 0.0

In [15]:
text1 = "My name is rihtick"
text2 = "I am 25 years old"

vectors = embeddings.embed_documents([text1, text2])
for v in vectors:
    print(v[:100])

[0.015011735260486603, -0.05155312269926071, -0.035874199122190475, 0.0242497269064188, -0.030434047803282738, -0.005991863552480936, -0.045163512229919434, 0.03820935636758804, -0.05080895125865936, -0.029279299080371857, 0.0032060318626463413, -0.010585198178887367, 0.0222481619566679, -0.017885776236653328, 0.02525050938129425, -0.026815835386514664, -0.04611297324299812, 0.02100359834730625, -0.014460021629929543, 0.011515412479639053, 0.012477703392505646, -0.017077453434467316, 0.06127867475152016, 0.007319824770092964, 0.03184540942311287, 0.00011387107224436477, 0.03823501989245415, -0.0008620521402917802, 0.026084493845701218, -0.030511030927300453, 0.026918480172753334, -0.021067751571536064, 0.0429823212325573, -0.033821310847997665, -0.009135346859693527, 0.03238429129123688, 0.01460115797817707, -0.04778094217181206, -0.0035283993929624557, 0.03628477454185486, -0.00922516081482172, -0.051219526678323746, 0.07056798785924911, 0.038363322615623474, -0.03723423555493355, -0.

# Vector stores

In [26]:
# Using Vector stores

from langchain_core.vectorstores import InMemoryVectorStore
from langchain_core.documents import Document

vectorstore = InMemoryVectorStore(
    embedding=embeddings
)

In [30]:
vectorstore.add_texts(["My name is rithick"])
vectorstore.add_texts(["I am 25 years old"])
doc1 = Document("The cookies are tastier")
doc2 = Document("Rithick studied in VIT")
vectorstore.add_documents([doc1, doc2], ids=["id1", "id2"])

['id1', 'id2']

In [39]:
documents = vectorstore.similarity_search("Give me users personal details?", k=5)

In [40]:
documents

[Document(id='2d7824b5-1dc7-48f8-a3f5-e6b4e2486e9c', metadata={}, page_content='My name is rithick'),
 Document(id='da58a31a-9974-4ec8-a652-66f3804d6d97', metadata={}, page_content='I am 25 years old'),
 Document(id='0a3ae68b-104a-4d44-8ecd-172ee72bca0c', metadata={}, page_content='I'),
 Document(id='885b8487-847a-4a00-aff1-01c5aaefdc37', metadata={}, page_content='y'),
 Document(id='12d69cbc-067d-45d5-bf97-3b4706eb0662', metadata={}, page_content='y')]