In [2]:
import os
from dotenv import load_dotenv
import openai
from langchain_openai import ChatOpenAI
from langchain_openai import OpenAIEmbeddings
import chromadb
from langchain.text_splitter import CharacterTextSplitter
from langchain.vectorstores import Chroma
from langchain.document_loaders import TextLoader

In [3]:
# Setup model
load_dotenv()
api_key = os.getenv('OPENAI_API_KEY')
openai.api_key = api_key
model = ChatOpenAI(openai_api_key=api_key)

In [4]:
# LOAD DOCUMENT → SPLIT CHUNKS
# EMBEDDING → EMBED CHUNKS → VECTORS
# VECTOR CHUNKS → SAVE CHROMADB
# QUERY → SIMIlARITY SEARCH CHROMADB
data = {'test': 'AAAAAAAAAAAAAAAA'}

In [5]:
# LOAD DOCUMENT → SPLIT CHUNKS
loader = TextLoader('some_data/FDR_State_of_Union_1944.txt')
documents = loader.load()
text_splitter = CharacterTextSplitter.from_tiktoken_encoder(chunk_size=500)
docs = text_splitter.split_documents(documents)
print("---------------------------------------")
print(docs)
print("---------------------------------------")
print(docs[0])
print("---------------------------------------")
print(type(docs))
print("---------------------------------------")
print(docs[0].page_content)
print("---------------------------------------")
print(docs[0].metadata)
print("---------------------------------------")
docs[0].metadata = data
docs[0]

---------------------------------------
---------------------------------------
page_content='This Nation in the past two years has become an active partner in the world\'s greatest war against human slavery.\n\nWe have joined with like-minded people in order to defend ourselves in a world that has been gravely threatened with gangster rule.\n\nBut I do not think that any of us Americans can be content with mere survival. Sacrifices that we and our allies are making impose upon us all a sacred obligation to see to it that out of this war we and our children will gain something better than mere survival.\n\nWe are united in determination that this war shall not be followed by another interim which leads to new disaster- that we shall not repeat the tragic errors of ostrich isolationismâ€”that we shall not repeat the excesses of the wild twenties when this Nation went for a joy ride on a roller coaster which ended in a tragic crash.\n\nWhen Mr. Hull went to Moscow in October, and when I 

Document(page_content='This Nation in the past two years has become an active partner in the world\'s greatest war against human slavery.\n\nWe have joined with like-minded people in order to defend ourselves in a world that has been gravely threatened with gangster rule.\n\nBut I do not think that any of us Americans can be content with mere survival. Sacrifices that we and our allies are making impose upon us all a sacred obligation to see to it that out of this war we and our children will gain something better than mere survival.\n\nWe are united in determination that this war shall not be followed by another interim which leads to new disaster- that we shall not repeat the tragic errors of ostrich isolationismâ€”that we shall not repeat the excesses of the wild twenties when this Nation went for a joy ride on a roller coaster which ended in a tragic crash.\n\nWhen Mr. Hull went to Moscow in October, and when I went to Cairo and Teheran in November, we knew that we were in agreemen

In [6]:
embedding_function = OpenAIEmbeddings()
db =Chroma.from_documents(docs,embedding_function,persist_directory="./speech_new_db")
db.persist()

In [7]:
# Chroma text str | [1,2,3]
# new str → [2,3,4] → similarity
# This for loading retrieve
db_new_connection = Chroma(persist_directory="./speech_new_db",
                          embedding_function=embedding_function)

In [8]:
new_doc = "What did FDR say about the cost of food law?"

In [9]:
similar_docs = db_new_connection.similarity_search(new_doc)

In [10]:
len(similar_docs)

4

In [11]:
type(similar_docs)

list

In [12]:
print(similar_docs[0].page_content)

(2) A continuation of the law for the renegotiation of war contractsâ€”which will prevent exorbitant profits and assure fair prices to the Government. For two long years I have pleaded with the Congress to take undue profits out of war.

(3) A cost of food lawâ€”which will enable the Government (a) to place a reasonable floor under the prices the farmer may expect for his production; and (b) to place a ceiling on the prices a consumer will have to pay for the food he buys. This should apply to necessities only; and will require public funds to carry out. It will cost in appropriations about one percent of the present annual cost of the war.

(4) Early reenactment of. the stabilization statute of October, 1942. This expires June 30, 1944, and if it is not extended well in advance, the country might just as well expect price chaos by summer.

(5) A national service law- which, for the duration of the war, will prevent strikes, and, with certain appropriate exceptions, will make available

In [13]:
loader = TextLoader('some_data/Lincoln_State_of_Union_1862.txt')
documents = loader.load()
docs = text_splitter.split_documents(documents)

Created a chunk of size 611, which is longer than the specified 500
Created a chunk of size 539, which is longer than the specified 500
Created a chunk of size 686, which is longer than the specified 500


In [14]:
#add to verctor db
db2 =Chroma.from_documents(docs,embedding_function,persist_directory="./speech_new_db")
similar_docs = db2.similarity_search("slavery")
print(similar_docs[0].page_content)

As to the second article, I think it would be impracticable to return to bondage the class of persons therein contemplated. Some of them, doubtless, in the property sense belong to loyal owners, and hence provision is made in this article for compensating such. The third article relates to the future of the freed people. It does not oblige, but merely authorizes Congress to aid in colonizing such as may consent. This ought not to be regarded as objectionable on the one hand or on the other, insomuch as it comes to nothing unless by the mutual consent of the people to be deported and the American voters, through their representatives in Congress.

I can not make it better known than it already is that I strongly favor colonization; and yet I wish to say there is an objection urged against free colored persons remaining in the country which is largely imaginary, if not sometimes malicious.

It is insisted that their presence would injure and displace white labor and white laborers. If th

In [15]:
#Get the source of document
print(similar_docs[0].metadata)

{'source': 'some_data/Lincoln_State_of_Union_1862.txt'}


In [16]:
similar_docs2 = db2.similarity_search("cost of food law")
print(similar_docs2[0].page_content)
print(similar_docs2[0].metadata)

(2) A continuation of the law for the renegotiation of war contractsâ€”which will prevent exorbitant profits and assure fair prices to the Government. For two long years I have pleaded with the Congress to take undue profits out of war.

(3) A cost of food lawâ€”which will enable the Government (a) to place a reasonable floor under the prices the farmer may expect for his production; and (b) to place a ceiling on the prices a consumer will have to pay for the food he buys. This should apply to necessities only; and will require public funds to carry out. It will cost in appropriations about one percent of the present annual cost of the war.

(4) Early reenactment of. the stabilization statute of October, 1942. This expires June 30, 1944, and if it is not extended well in advance, the country might just as well expect price chaos by summer.

(5) A national service law- which, for the duration of the war, will prevent strikes, and, with certain appropriate exceptions, will make available

## Retriever

In [17]:
type(db2)

langchain_community.vectorstores.chroma.Chroma

In [18]:
retriever = db2.as_retriever()
result = retriever.get_relevant_documents('cost food of law')
len(result)

4