## Choma DB

### Adding Text

In [None]:
from langchain.vectorstores import Chroma
from langchain_google_genai import GoogleGenerativeAIEmbeddings
from dotenv import load_dotenv


load_dotenv()

embeddings = GoogleGenerativeAIEmbeddings(model="gemini-embedding-001")

text_docs = ["Hello World", "World Economy"]

ch_text_db = Chroma.from_texts(texts=text_docs, embedding=embeddings, collection_name="My_text", persist_directory="Chroma_db")

ch_text_db.persist()

  ch_text_db.persist()


In [4]:
print(ch_text_db.get()['documents'])

['Hello World', 'World Economy']


In [5]:
text_docs2 = ["Data Science", "Machine Learning"]
ch_text_db.add_texts(texts=text_docs2)

print(ch_text_db.get()['documents'])

['Hello World', 'World Economy', 'Data Science', 'Machine Learning']


### Adding Documents

In [1]:
from importlib import metadata
from langchain.vectorstores import Chroma
from langchain_google_genai import GoogleGenerativeAIEmbeddings
from langchain.schema import Document
from dotenv import load_dotenv


load_dotenv()

embeddings = GoogleGenerativeAIEmbeddings(model="gemini-embedding-001")

docs = [
  Document(page_content="Hello World", metadata={"source": "doc1"}),
  Document(page_content="World Economy", metadata={"source": "do2"})
]

ch_doc_db = Chroma.from_documents(documents=docs, embedding=embeddings, collection_name="My_docs", persist_directory="Chroma_db")
ch_doc_db.persist()
print(ch_doc_db.get()["documents"])

['Hello World', 'World Economy']


  ch_doc_db.persist()


In [2]:
docs2 = [
  Document(page_content="Data Science", metadata={"source": "doc3"}),
  Document(page_content="Machine Learning", metadata={"source": "doc4"})
]

ch_doc_db.add_documents(documents=docs2)
print(ch_doc_db.get()["documents"])

['Hello World', 'World Economy', 'Data Science', 'Machine Learning']


### Similarity Search

In [6]:
result = ch_doc_db.similarity_search("World", k=4)
for i in result:
  print(i.page_content, i.metadata)

World Economy {'source': 'do2'}
Hello World {'source': 'doc1'}
Machine Learning {'source': 'doc4'}
Data Science {'source': 'doc3'}


## FAISS

### Adding Text

In [None]:
from langchain.vectorstores import FAISS
from langchain_google_genai import GoogleGenerativeAIEmbeddings
from dotenv import load_dotenv


load_dotenv()

embeddings = GoogleGenerativeAIEmbeddings(model="gemini-embedding-001")

text_docs = ["Hello World 2", "World Economy 2"]


faiss_text_db = FAISS.from_texts(texts=text_docs, embedding=embeddings)

In [10]:
result1 = list(faiss_text_db.docstore._dict.values())
for d in result1:
  print(d.page_content)

Hello World 2
World Economy 2


In [12]:
text_docs2 = ["Data Science 2 ", "Machine Learning 2"]

faiss_text_db2 = FAISS.from_texts(texts=text_docs2, embedding=embeddings)

In [14]:
result2 = list(faiss_text_db2.docstore._dict.values())
for d in result2:
  print(d.page_content)

Data Science 2 
Machine Learning 2


In [15]:
faiss_text_db2.merge_from(faiss_text_db)

In [16]:
result3 = list(faiss_text_db2.docstore._dict.values())
for d in result3:
  print(d.page_content)

Data Science 2 
Machine Learning 2
Hello World 2
World Economy 2


### Adding Document

In [17]:
from langchain.vectorstores import FAISS
from langchain_google_genai import GoogleGenerativeAIEmbeddings
from langchain.schema import Document
from dotenv import load_dotenv

load_dotenv()

embeddings = GoogleGenerativeAIEmbeddings(model="gemini-embedding-001")

docs = [
  Document(page_content="Hello World 2", metadata={"source": "doc1"}),
  Document(page_content="World Economy 2", metadata={"source": "doc2"})
  
]

faiss_doc_db = FAISS.from_documents(documents=docs, embedding=embeddings)


In [18]:
result4 = list(faiss_doc_db.docstore._dict.values())
for d in result4:
  print(d.page_content)

Hello World 2
World Economy 2


In [19]:
docs2 = [
  Document(page_content="Data Science 2", metadata={"source": "doc3"}),
  Document(page_content="Machine Learning 2", metadata={"source": "doc4"})  
]

faiss_doc_db2 = FAISS.from_documents(documents=docs2, embedding=embeddings)

faiss_doc_db2.merge_from(faiss_doc_db)

result5 = list(faiss_doc_db2.docstore._dict.values())
for d in result5:
  print(d.page_content)

Data Science 2
Machine Learning 2
Hello World 2
World Economy 2


In [22]:
result6 =faiss_doc_db2.similarity_search("Science", k=2)
for i in result6:
  print(i.page_content, i.metadata)

Data Science 2 {'source': 'doc3'}
Machine Learning 2 {'source': 'doc4'}
