### Embedding Techniques

In [5]:
import os
from dotenv import load_dotenv
load_dotenv()

True

In [7]:
api_key = os.getenv("OPENAI_API_KEY")
if api_key is None:
    raise ValueError("OPENAI_API_KEY not set in environment")
os.environ["OPENAI_API_KEY"] = api_key

In [8]:
from langchain_openai import OpenAIEmbeddings
embeddings = OpenAIEmbeddings(model="text-embedding-3-large")
embeddings

OpenAIEmbeddings(client=<openai.resources.embeddings.Embeddings object at 0x000001C7DB04F370>, async_client=<openai.resources.embeddings.AsyncEmbeddings object at 0x000001C7DB5EF340>, model='text-embedding-3-large', dimensions=None, deployment='text-embedding-ada-002', openai_api_version=None, openai_api_base=None, openai_api_type=None, openai_proxy=None, embedding_ctx_length=8191, openai_api_key=SecretStr('**********'), openai_organization=None, allowed_special=None, disallowed_special=None, chunk_size=1000, max_retries=2, request_timeout=None, headers=None, tiktoken_enabled=True, tiktoken_model_name=None, show_progress_bar=False, model_kwargs={}, skip_empty=False, default_headers=None, default_query=None, retry_min_seconds=4, retry_max_seconds=20, http_client=None, http_async_client=None, check_embedding_ctx_length=True)

In [9]:
text = "I am learning Generative AI"
embeded_text = embeddings.embed_query(text)
embeded_text

[-0.02045566588640213,
 0.019907614216208458,
 -0.02630648948252201,
 -0.014441907405853271,
 0.02755071595311165,
 0.0040067033842206,
 -0.01731547713279724,
 0.01977430284023285,
 0.010316706262528896,
 0.03643804416060448,
 0.0022921899799257517,
 -0.004817672073841095,
 0.02572881244122982,
 -0.01599719002842903,
 0.017863528802990913,
 0.014701120555400848,
 -0.004114091861993074,
 0.00586563628166914,
 -0.012501507066190243,
 -0.054657064378261566,
 0.017433974891901016,
 -0.03113527037203312,
 -0.05009490251541138,
 0.013619828969240189,
 -0.01690073497593403,
 0.007272796239703894,
 0.011012880131602287,
 0.02377360127866268,
 -0.028276514261960983,
 0.020129796117544174,
 0.0071505955420434475,
 0.012716284021735191,
 0.009961212985217571,
 -0.0025199276860803366,
 0.015730569139122963,
 0.027565527707338333,
 0.01010933518409729,
 0.028172828257083893,
 -0.028557945042848587,
 -0.0015071425586938858,
 0.01879669725894928,
 0.01808571070432663,
 -0.002832835540175438,
 0.02681

In [10]:
len(embeded_text)

3072

In [12]:
embeddings_1024 = OpenAIEmbeddings(model="text-embedding-3-large",dimensions=1024)
embeded_text_2 = embeddings_1024.embed_query(text)
len(embeded_text_2)


1024

### Small AI Process
##### Data Ingestion -> Data Splitting -> Data Embedding -> Vector Store

In [13]:
# Data Ingestion
from langchain_community.document_loaders import TextLoader
loader = TextLoader("../00.Data/text.txt")
text = loader.load()

In [14]:
text

[Document(metadata={'source': '../00.Data/text.txt'}, page_content='What is AI?\n\nArtificial Intelligence (AI) is the simulation of human intelligence in machines that are designed to think, learn, and make decisions. AI systems use algorithms and large datasets to recognize patterns, solve problems, and adapt to new situations.\n\nKey Subfields of AI\n\nMachine Learning (ML) – Teaching machines to learn from data and improve performance over time without being explicitly programmed.\n\nDeep Learning – A subset of ML that uses neural networks with multiple layers, enabling breakthroughs in vision, speech, and natural language.\n\nNatural Language Processing (NLP) – Enables machines to understand, interpret, and generate human language (used in chatbots, translation, summarization).\n\nComputer Vision – AI that allows machines to interpret and process visual information from the world (image recognition, object detection).\n\nRobotics – Applying AI to physical machines that can interac

In [None]:
# Data Splitting
from langchain_text_splitters import RecursiveCharacterTextSplitter
splitter = RecursiveCharacterTextSplitter(chunk_size=500,chunk_overlap=50)
splitted_docs = splitter.split_documents(text) 

In [16]:
splitted_docs[0]

Document(metadata={'source': '../00.Data/text.txt'}, page_content='What is AI?\n\nArtificial Intelligence (AI) is the simulation of human intelligence in machines that are designed to think, learn, and make decisions. AI systems use algorithms and large datasets to recognize patterns, solve problems, and adapt to new situations.\n\nKey Subfields of AI\n\nMachine Learning (ML) – Teaching machines to learn from data and improve performance over time without being explicitly programmed.')

In [19]:
from langchain_community.vectorstores import Chroma
db = Chroma.from_documents(splitted_docs,embeddings_1024)
db

<langchain_community.vectorstores.chroma.Chroma at 0x1c7dc3f6170>

In [22]:
query = "What is Computer Vision?"
result=db.similarity_search(query)
print(result)

[Document(metadata={'source': '../00.Data/text.txt'}, page_content='Deep Learning – A subset of ML that uses neural networks with multiple layers, enabling breakthroughs in vision, speech, and natural language.\n\nNatural Language Processing (NLP) – Enables machines to understand, interpret, and generate human language (used in chatbots, translation, summarization).\n\nComputer Vision – AI that allows machines to interpret and process visual information from the world (image recognition, object detection).'), Document(metadata={'source': '../00.Data/text.txt'}, page_content='What is AI?\n\nArtificial Intelligence (AI) is the simulation of human intelligence in machines that are designed to think, learn, and make decisions. AI systems use algorithms and large datasets to recognize patterns, solve problems, and adapt to new situations.\n\nKey Subfields of AI\n\nMachine Learning (ML) – Teaching machines to learn from data and improve performance over time without being explicitly programm