# Embedding Techniques

Converting texts into vectors.

In [1]:
import os 
from dotenv import load_dotenv
load_dotenv()

True

In [2]:
os.environ["OPENAI_API_KEY"] = os.getenv("OPENAI_API_KEY")

In [3]:
from langchain_openai import OpenAIEmbeddings

embeddings = OpenAIEmbeddings(model= "text-embedding-3-large")

In [4]:
embeddings

OpenAIEmbeddings(client=<openai.resources.embeddings.Embeddings object at 0x00000236B7842660>, async_client=<openai.resources.embeddings.AsyncEmbeddings object at 0x00000236B7842F90>, model='text-embedding-3-large', dimensions=None, deployment='text-embedding-ada-002', openai_api_version=None, openai_api_base=None, openai_api_type=None, openai_proxy=None, embedding_ctx_length=8191, openai_api_key=SecretStr('**********'), openai_organization=None, allowed_special=None, disallowed_special=None, chunk_size=1000, max_retries=2, request_timeout=None, headers=None, tiktoken_enabled=True, tiktoken_model_name=None, show_progress_bar=False, model_kwargs={}, skip_empty=False, default_headers=None, default_query=None, retry_min_seconds=4, retry_max_seconds=20, http_client=None, http_async_client=None, check_embedding_ctx_length=True)

In [5]:
text = "My Name is Karan, I used to be a professional Esports athlete, now I am an AI enthusiast."
query_result = embeddings.embed_query(text)

In [6]:
query_result

[0.005871852859854698,
 0.00012101067113690078,
 -0.02845880016684532,
 -0.005122036207467318,
 0.014380048029124737,
 0.015653708949685097,
 -0.029006611555814743,
 0.005608218722045422,
 0.02152898721396923,
 0.008963563479483128,
 -0.010579606518149376,
 -0.05839668959379196,
 0.0006770606269128621,
 -0.014530695974826813,
 0.013106387108564377,
 0.043112754821777344,
 0.005758867133408785,
 0.016858894377946854,
 -0.008888239040970802,
 -0.029609203338623047,
 0.03379996120929718,
 -0.019036443904042244,
 -0.04609832540154457,
 -0.015311327762901783,
 -0.005875276867300272,
 -0.007909026928246021,
 0.02266569435596466,
 0.0025969683192670345,
 -0.014366352930665016,
 -0.0029958433005958796,
 0.018077773973345757,
 0.01457178220152855,
 0.019406218081712723,
 0.009333335794508457,
 0.02091269940137863,
 -0.007333824411034584,
 -0.017036933451890945,
 -0.026048429310321808,
 -0.02333676442503929,
 0.02224114164710045,
 0.018379071727395058,
 0.013667893595993519,
 -0.0234737172722816

In [8]:
query_result[0]

0.005871852859854698

### This takes the entire sentence and converts it into a vector of 3072 dimensions for the model we chose. `model= "text-embedding-3-large"`

### We can also convert the dimensions of the embedding by using the following command :
`embeddings_1024 = OpenAIEmbeddings(model="text-embedding-3-large", dimensions = 1024)`

In [9]:
len(query_result)

3072

In [10]:
## Testing on a document

from langchain_community.document_loaders import TextLoader

loader = TextLoader('comp_systems.txt')
docs = loader.load()
docs


[Document(metadata={'source': 'comp_systems.txt'}, page_content='Charles Babbage, an English mechanical engineer and polymath, originated the concept of a programmable computer. Considered the "father of the computer",[22] he conceptualized and invented the first mechanical computer in the early 19th century.\n\nAfter working on his difference engine he announced his invention in 1822, in a paper to the Royal Astronomical Society, titled "Note on the application of machinery to the computation of astronomical and mathematical tables".[23] He also designed to aid in navigational calculations, in 1833 he realized that a much more general design, an analytical engine, was possible. The input of programs and data was to be provided to the machine via punched cards, a method being used at the time to direct mechanical looms such as the Jacquard loom. For output, the machine would have a printer, a curve plotter and a bell. The machine would also be able to punch numbers onto cards to be rea

In [11]:
len(docs)

1

In [12]:
from langchain_text_splitters import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)
final_documents = text_splitter.split_documents(docs)
final_documents

[Document(metadata={'source': 'comp_systems.txt'}, page_content='Charles Babbage, an English mechanical engineer and polymath, originated the concept of a programmable computer. Considered the "father of the computer",[22] he conceptualized and invented the first mechanical computer in the early 19th century.'),
 Document(metadata={'source': 'comp_systems.txt'}, page_content='After working on his difference engine he announced his invention in 1822, in a paper to the Royal Astronomical Society, titled "Note on the application of machinery to the computation of astronomical and mathematical tables".[23] He also designed to aid in navigational calculations, in 1833 he realized that a much more general design, an analytical engine, was possible. The input of programs and data was to be provided to the machine via punched cards, a method being used at the time to direct'),
 Document(metadata={'source': 'comp_systems.txt'}, page_content='cards, a method being used at the time to direct mech

## Vector Embeddings And Vector Store (ChromaDB)

In [14]:
from langchain_community.vectorstores import Chroma

db = Chroma.from_documents(final_documents, embeddings)

In [15]:
db

<langchain_community.vectorstores.chroma.Chroma at 0x236db55b4d0>

In [19]:
## We retrieved the results from our query vectorstore DB

query = "an English mechanical engineer and polymath, originated the concept of a programmable computer. Considered the father of the computer,[22] he conceptualized and invented the first mechanical computer in the early 19th century."
retrieved = db.similarity_search(query)
print(retrieved)

[Document(metadata={'source': 'comp_systems.txt'}, page_content='Charles Babbage, an English mechanical engineer and polymath, originated the concept of a programmable computer. Considered the "father of the computer",[22] he conceptualized and invented the first mechanical computer in the early 19th century.'), Document(metadata={'source': 'comp_systems.txt'}, page_content="computer and to move ahead faster than anyone else could follow. Nevertheless, his son, Henry Babbage, completed a simplified version of the analytical engine's computing unit (the mill) in 1888. He gave a successful demonstration of its use in computing tables in 1906."), Document(metadata={'source': 'comp_systems.txt'}, page_content='After working on his difference engine he announced his invention in 1822, in a paper to the Royal Astronomical Society, titled "Note on the application of machinery to the computation of astronomical and mathematical tables".[23] He also designed to aid in navigational calculations,