# ChromaDB
- Chroma is a AI-native open-source vector database focused on developer productivity and happiness. 
- Chroma is licensed under Apache 2.0.

In [1]:
from langchain.text_splitter import CharacterTextSplitter
from langchain_community.document_loaders import TextLoader
from langchain_community.embeddings.sentence_transformer import (SentenceTransformerEmbeddings)

In [2]:
from langchain_community.vectorstores import Chroma

In [3]:
loader = TextLoader('state_of_the_union.txt', encoding='utf-8')
loader

<langchain_community.document_loaders.text.TextLoader at 0x1c3ccb31160>

In [4]:
documents = loader.load()
documents

[Document(page_content='So tonight I’m offering a Unity Agenda for the Nation. Four big things we can do together.\n\nFirst, beat the opioid epidemic.\nThere is so much we can do. Increase funding for prevention, treatment, harm reduction, and recovery.\nGet rid of outdated rules that stop doctors from prescribing treatments. And stop the flow of illicit drugs by working with state and local law enforcement to go after traffickers.\nIf you’re suffering from addiction, know you are not alone. I believe in recovery, and I celebrate the 23 million Americans in recovery.\n\nSecond, let’s take on mental health. Especially among our children, whose lives and education have been turned upside down.\nThe American Rescue Plan gave schools money to hire teachers and help students make up for lost learning.\nI urge every parent to make sure your school does just that. And we can all play a part—sign up to be a tutor or a mentor.\nChildren were also struggling before the pandemic. Bullying, violen

In [5]:
documents[0].page_content

'So tonight I’m offering a Unity Agenda for the Nation. Four big things we can do together.\n\nFirst, beat the opioid epidemic.\nThere is so much we can do. Increase funding for prevention, treatment, harm reduction, and recovery.\nGet rid of outdated rules that stop doctors from prescribing treatments. And stop the flow of illicit drugs by working with state and local law enforcement to go after traffickers.\nIf you’re suffering from addiction, know you are not alone. I believe in recovery, and I celebrate the 23 million Americans in recovery.\n\nSecond, let’s take on mental health. Especially among our children, whose lives and education have been turned upside down.\nThe American Rescue Plan gave schools money to hire teachers and help students make up for lost learning.\nI urge every parent to make sure your school does just that. And we can all play a part—sign up to be a tutor or a mentor.\nChildren were also struggling before the pandemic. Bullying, violence, trauma, and the har

In [6]:
text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
text_splitter

<langchain.text_splitter.CharacterTextSplitter at 0x1c3ccbaab80>

In [7]:
docs = text_splitter.split_documents(documents)
docs

Created a chunk of size 2046, which is longer than the specified 1000


[Document(page_content='So tonight I’m offering a Unity Agenda for the Nation. Four big things we can do together.\n\nFirst, beat the opioid epidemic.\nThere is so much we can do. Increase funding for prevention, treatment, harm reduction, and recovery.\nGet rid of outdated rules that stop doctors from prescribing treatments. And stop the flow of illicit drugs by working with state and local law enforcement to go after traffickers.\nIf you’re suffering from addiction, know you are not alone. I believe in recovery, and I celebrate the 23 million Americans in recovery.', metadata={'source': 'state_of_the_union.txt'}),
 Document(page_content='Second, let’s take on mental health. Especially among our children, whose lives and education have been turned upside down.\nThe American Rescue Plan gave schools money to hire teachers and help students make up for lost learning.\nI urge every parent to make sure your school does just that. And we can all play a part—sign up to be a tutor or a mento

In [8]:
len(docs)

5

In [9]:
embedding_function = SentenceTransformerEmbeddings(model_name= "all-MiniLM-L6-v2")

In [10]:
embedding_function

HuggingFaceEmbeddings(client=SentenceTransformer(
  (0): Transformer({'max_seq_length': 256, 'do_lower_case': False}) with Transformer model: BertModel 
  (1): Pooling({'word_embedding_dimension': 384, 'pooling_mode_cls_token': False, 'pooling_mode_mean_tokens': True, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False, 'pooling_mode_weightedmean_tokens': False, 'pooling_mode_lasttoken': False})
  (2): Normalize()
), model_name='all-MiniLM-L6-v2', cache_folder=None, model_kwargs={}, encode_kwargs={}, multi_process=False)

In [12]:
db = Chroma.from_documents(docs, embedding_function)

In [13]:
db

<langchain_community.vectorstores.chroma.Chroma at 0x1c3d880f430>

In [14]:
query = 'what actions does the speaker suggest to improve mental health, especially among children?'
query

'what actions does the speaker suggest to improve mental health, especially among children?'

In [15]:
result = db.similarity_search(query)
result

[Document(page_content='Second, let’s take on mental health. Especially among our children, whose lives and education have been turned upside down.\nThe American Rescue Plan gave schools money to hire teachers and help students make up for lost learning.\nI urge every parent to make sure your school does just that. And we can all play a part—sign up to be a tutor or a mentor.\nChildren were also struggling before the pandemic. Bullying, violence, trauma, and the harms of social media.\nAs Frances Haugen, who is here with us tonight, has shown, we must hold social media platforms accountable for the national experiment they’re conducting on our children for profit.\nIt’s time to strengthen privacy protections, ban targeted advertising to children, demand tech companies stop collecting personal data on our children.\nAnd let’s get all Americans the mental health services they need. More people they can turn to for help, and full parity between physical and mental health care.', metadata=

In [16]:
result[0].page_content

'Second, let’s take on mental health. Especially among our children, whose lives and education have been turned upside down.\nThe American Rescue Plan gave schools money to hire teachers and help students make up for lost learning.\nI urge every parent to make sure your school does just that. And we can all play a part—sign up to be a tutor or a mentor.\nChildren were also struggling before the pandemic. Bullying, violence, trauma, and the harms of social media.\nAs Frances Haugen, who is here with us tonight, has shown, we must hold social media platforms accountable for the national experiment they’re conducting on our children for profit.\nIt’s time to strengthen privacy protections, ban targeted advertising to children, demand tech companies stop collecting personal data on our children.\nAnd let’s get all Americans the mental health services they need. More people they can turn to for help, and full parity between physical and mental health care.'

In [17]:
result[1].page_content

'Third, support our veterans.\nVeterans are the best of us.\nI’ve always believed that we have a sacred obligation to equip all those we send to war and care for them and their families when they come home.\nMy administration is providing assistance with job training and housing, and now helping lower-income veterans get VA care debt-free.\nOur troops in Iraq and Afghanistan faced many dangers.\nOne was stationed at bases and breathing in toxic smoke from “burn pits” that incinerated wastes of war—medical and hazard material, jet fuel, and more.\nWhen they came home, many of the world’s fittest and best trained warriors were never the same.\nHeadaches. Numbness. Dizziness.\nA cancer that would put them in a flag-draped coffin.\nI know.\nOne of those soldiers was my son Major Beau Biden.\nWe don’t know for sure if a burn pit was the cause of his brain cancer, or the diseases of so many of our troops.\nBut I’m committed to finding out everything we can.\nCommitted to military families li

In [21]:
db2 = Chroma.from_documents(docs, embedding_function, persist_directory="./chroma_db_new")

In [22]:
db3 = Chroma(persist_directory="./chroma_db_new", embedding_function=embedding_function)

In [23]:
db3

<langchain_community.vectorstores.chroma.Chroma at 0x1c3dd951040>

In [25]:
result1 = db3.similarity_search(query, k=1)
result1

[Document(page_content='Second, let’s take on mental health. Especially among our children, whose lives and education have been turned upside down.\nThe American Rescue Plan gave schools money to hire teachers and help students make up for lost learning.\nI urge every parent to make sure your school does just that. And we can all play a part—sign up to be a tutor or a mentor.\nChildren were also struggling before the pandemic. Bullying, violence, trauma, and the harms of social media.\nAs Frances Haugen, who is here with us tonight, has shown, we must hold social media platforms accountable for the national experiment they’re conducting on our children for profit.\nIt’s time to strengthen privacy protections, ban targeted advertising to children, demand tech companies stop collecting personal data on our children.\nAnd let’s get all Americans the mental health services they need. More people they can turn to for help, and full parity between physical and mental health care.', metadata=

In [26]:
result2 = db3.similarity_search_with_score(query, k=1)
result2

[(Document(page_content='Second, let’s take on mental health. Especially among our children, whose lives and education have been turned upside down.\nThe American Rescue Plan gave schools money to hire teachers and help students make up for lost learning.\nI urge every parent to make sure your school does just that. And we can all play a part—sign up to be a tutor or a mentor.\nChildren were also struggling before the pandemic. Bullying, violence, trauma, and the harms of social media.\nAs Frances Haugen, who is here with us tonight, has shown, we must hold social media platforms accountable for the national experiment they’re conducting on our children for profit.\nIt’s time to strengthen privacy protections, ban targeted advertising to children, demand tech companies stop collecting personal data on our children.\nAnd let’s get all Americans the mental health services they need. More people they can turn to for help, and full parity between physical and mental health care.', metadata