# Lanchain OpenAI Setup

In [1]:
import os
import langchain
from langchain_openai import OpenAI
from langchain_openai import ChatOpenAI
from langchain.schema import AIMessage, HumanMessage, SystemMessage

# Import chat templates
from langchain.prompts import (
    ChatPromptTemplate,
    PromptTemplate,
    SystemMessagePromptTemplate,
    AIMessagePromptTemplate,
    HumanMessagePromptTemplate,
)

# Caching
from langchain.cache import InMemoryCache
from langchain.cache import SQLiteCache
from langchain.globals import set_llm_cache

In [2]:
cacheType = 'in_memory'

if cacheType == 'in_memory':
    set_llm_cache(InMemoryCache())
elif cacheType == 'sqlite':
    set_llm_cache(SQLiteCache(database_path=".langchain.db"))

# Set OpenAI API key and create LLM and Chat LLM. Note that key can be stored in a separate file or as an environment variable. Refer to docs.
api_key = open('./openai_key.txt').read()
os.environ['OPENAI_API_KEY'] = api_key
llm = OpenAI(openai_api_key=api_key)
chat = ChatOpenAI(openai_api_key=api_key)

In [3]:
import chromadb
from langchain_openai.embeddings import OpenAIEmbeddings
from langchain.text_splitter import CharacterTextSplitter
from langchain.vectorstores import Chroma
from langchain.document_loaders import TextLoader

In [4]:
db_name = './speech_new_db1'

In [5]:
embedding_function = OpenAIEmbeddings()

In [6]:
# LOAD Document --> Split into chunks

# Embedding -> Embed chunks -> vectors

# vector chunks -> save chroma db

#Query -> similarity search

In [12]:
loader = TextLoader('langchain/extras/01-Data-Connections/some_data/FDR_State_of_Union_1944.txt')
documents = loader.load()

text_splitter = CharacterTextSplitter.from_tiktoken_encoder(chunk_size=500)
docs = text_splitter.split_documents(documents)

In [13]:
db = Chroma.from_documents(docs, embedding_function, persist_directory=db_name)

In [14]:
# save to disk
db.persist()

In [15]:
# Open an existing chroma db instance
db_new = Chroma(persist_directory=db_name, embedding_function=embedding_function)

In [16]:
new_doc = "What did FDR say about the cost of food law?"

In [17]:
similar_docs = db_new.similarity_search(new_doc)

In [18]:
print(similar_docs[0].page_content)

(2) A continuation of the law for the renegotiation of war contractsâ€”which will prevent exorbitant profits and assure fair prices to the Government. For two long years I have pleaded with the Congress to take undue profits out of war.

(3) A cost of food lawâ€”which will enable the Government (a) to place a reasonable floor under the prices the farmer may expect for his production; and (b) to place a ceiling on the prices a consumer will have to pay for the food he buys. This should apply to necessities only; and will require public funds to carry out. It will cost in appropriations about one percent of the present annual cost of the war.

(4) Early reenactment of. the stabilization statute of October, 1942. This expires June 30, 1944, and if it is not extended well in advance, the country might just as well expect price chaos by summer.

(5) A national service law- which, for the duration of the war, will prevent strikes, and, with certain appropriate exceptions, will make available

## Add new document to same DB

In [23]:
loader1 = TextLoader('langchain/extras/01-Data-Connections/some_data/Lincoln_State_of_Union_1862.txt')
documents1 = loader1.load()

# Use same text splitter
docs1 = text_splitter.split_documents(documents1)

Created a chunk of size 611, which is longer than the specified 500
Created a chunk of size 539, which is longer than the specified 500
Created a chunk of size 686, which is longer than the specified 500


In [24]:
# docs1[0].page_content

In [25]:
# Put into DIR
db_new = Chroma.from_documents(docs1, embedding_function, persist_directory=db_name)

In [26]:
similar_docs1 = db_new.similarity_search('slavery')
similar_docs1[0].metadata

{'source': 'langchain/extras/01-Data-Connections/some_data/Lincoln_State_of_Union_1862.txt'}

In [27]:
similar_docs2 = db_new.similarity_search('cost of food')
similar_docs2[0].metadata

{'source': 'langchain/extras/01-Data-Connections/some_data/FDR_State_of_Union_1944.txt'}

## Retrievers

In [28]:
retriever = db_new.as_retriever()
results = retriever.get_relevant_documents("cost of food law?")

In [29]:
results

[Document(page_content='(2) A continuation of the law for the renegotiation of war contractsâ€”which will prevent exorbitant profits and assure fair prices to the Government. For two long years I have pleaded with the Congress to take undue profits out of war.\n\n(3) A cost of food lawâ€”which will enable the Government (a) to place a reasonable floor under the prices the farmer may expect for his production; and (b) to place a ceiling on the prices a consumer will have to pay for the food he buys. This should apply to necessities only; and will require public funds to carry out. It will cost in appropriations about one percent of the present annual cost of the war.\n\n(4) Early reenactment of. the stabilization statute of October, 1942. This expires June 30, 1944, and if it is not extended well in advance, the country might just as well expect price chaos by summer.\n\n(5) A national service law- which, for the duration of the war, will prevent strikes, and, with certain appropriate e