In [None]:
from langchain_huggingface.embeddings import HuggingFaceEmbeddings
from langchain_classic.schema import Document
from langchain_chroma import Chroma
from dotenv import load_dotenv

In [None]:
load_dotenv()

In [None]:
# Create documents
doc1 = Document(
    page_content="This is a test document about machine learning. Machine Learning is fast growing field in today's world. Those without sound knowledge of maths now can work in this domain",
    metadata =({'source':'Machine Learning'})
)
doc2 = Document(
    page_content="This is a test document about data analysis. Data Analysis is considered as a sub-branch of machine learning. It involves inspecting, cleansing, transforming, and modeling data to discover useful information.",
    metadata =({'source':'Data Analysis'})
)
doc3 = Document(
    page_content="This is a test document about robotics. Robotics is an interdisciplinary branch of engineering and science that includes mechanical engineering, electronic engineering, information engineering, computer science, and others. Robotics deals with the design, construction, operation, and use of robots.",
    metadata =({'source':'Robotics'})
)
doc4 = Document(
    page_content="This is a test document about ethical hacking. Ethical Hacking is the practice of legally breaking into computers and devices to test an organization's defenses. These hackers are often referred to as 'white hats.'",
    metadata =({'source':'Ethical Hacking'})
)
doc5 = Document(
    page_content="This is a test document about devops. Devops is a set of practices that combines software development (Dev) and IT operations (Ops). It aims to shorten the systems development life cycle and provide continuous delivery with high software quality.",
    metadata =({'source':'Devops'})
)

In [None]:
doc = [doc1, doc2, doc3, doc4, doc5]

In [None]:
# Create Embedding
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")

In [None]:
vector_store = Chroma(
    embedding_function=embeddings,
    persist_directory='../VectorStore/chroma_db',
    collection_name='software_jobs'
)

In [None]:
# add documents
vector_store.add_documents(documents=doc)

In [None]:
# view documents
vector_store.get(include=['embeddings','metadatas','documents'])

In [None]:
# search documnets
vector_store.similarity_search(query='Which domain of job requires maths knowledge', k=2)

In [None]:
# search with similarity score
vector_store.similarity_search_with_score(
    query='People working in which domain are called as white hats?',
    k=2
)

In [None]:
# meta data filtering
vector_store.similarity_search_with_score(
    query='', # for metadata filtering keep it empty
    filter = {'source':'Data Analysis'}
)

In [None]:
# update document
updated_doc = Document(
    page_content="Machine learning is a branch of Artificial Intelligence that focuses on developing models and algorithms that let computers learn from data without being explicitly programmed for every task. In simple words, ML teaches the systems to think and understand like humans by learning from the data.",
    metadata={'source':'Machine Learning'}
)
vector_store.update_document(document_id='5fa08ab3-e180-49b9-8132-3074b30ae7b8' , document=updated_doc)

In [None]:
vector_store.get(include=['embeddings','metadatas','documents'])

In [None]:
# delete document
vector_store.delete(ids=['0da63be7-1629-41b8-8aa1-f4bd620f02de'])

In [None]:
# view document
vector_store.get(include=['embeddings','metadatas','documents'])