# Lanchain OpenAI Setup

In [34]:
# import langchain
import os
import logging
from langchain_openai import OpenAI
from langchain_openai import ChatOpenAI
from langchain.schema import AIMessage, HumanMessage, SystemMessage

# Import chat templates
from langchain.prompts import (
    ChatPromptTemplate,
    PromptTemplate,
    SystemMessagePromptTemplate,
    AIMessagePromptTemplate,
    HumanMessagePromptTemplate,
)

# Caching
from langchain.cache import InMemoryCache
from langchain.cache import SQLiteCache
from langchain.globals import set_llm_cache

In [40]:
cacheType = 'in_memory'

if cacheType == 'in_memory':
    set_llm_cache(InMemoryCache())
elif cacheType == 'sqlite':
    set_llm_cache(SQLiteCache(database_path=".langchain.db"))

# Set OpenAI API key and create LLM and Chat LLM. Note that key can be stored in a separate file or as an environment variable. Refer to docs.
api_key = open('./openai_key.txt').read()
os.environ['OPENAI_API_KEY'] = api_key

logging.basicConfig(filename='example.log', encoding='utf-8', level=logging.DEBUG)

In [6]:
from langchain.vectorstores import Chroma
from langchain_openai import OpenAIEmbeddings
from langchain.text_splitter import CharacterTextSplitter

In [51]:
from langchain.retrievers import ContextualCompressionRetriever
from langchain.retrievers.document_compressors import LLMChainExtractor

In [43]:

from langchain.document_loaders import TextLoader

def load_file_into_db(file, db_name,embedding_function, chunk_size=500): 
    logging.info(f'Loading file into DB. file={file} db={db_name}, chunk_size={chunk_size}')
    loader = TextLoader(file)
    documents = loader.load()
    text_splitter = CharacterTextSplitter.from_tiktoken_encoder(chunk_size=chunk_size)
    docs = text_splitter.split_documents(documents)
    
    db = Chroma.from_documents(docs, embedding_function, persist_directory=db_name)
    
    # save to disk
    db.persist()
    logging.info(f'Persited file into DB. file={file} db={db_name}, chunk_size={chunk_size}')
    return db


In [12]:
embedding_function = OpenAIEmbeddings()
db_connection = Chroma(persist_directory=db_path, embedding_function=embedding_function)

In [49]:
db_path = './speech_new_db_1'
question = "What was Lincon's stance on slavery?"

In [45]:
db = load_file_into_db('extras/01-Data-Connections/some_data/FDR_State_of_Union_1944.txt',
                    db_path, embedding_function, chunk_size=500)

<langchain_community.vectorstores.chroma.Chroma at 0x241bce2b9d0>

In [47]:
db = load_file_into_db('extras/01-Data-Connections/some_data/Lincoln_State_of_Union_1862.txt',
                    db_path, embedding_function, chunk_size=500)



In [61]:
# LLM Use compression
chat = ChatOpenAI(temperature=0)

# LLM -> LLMChainExtractor
compressor = LLMChainExtractor.from_llm(chat)

In [63]:
# Context Retriever
compression_retriever = ContextualCompressionRetriever(base_compressor=compressor,
                                                       base_retriever=db.as_retriever())

In [65]:
# Old way without OpenAI
docs = db_connection.similarity_search(question)
docs[0].page_content[:500]

'Among the friends of the Union there is great diversity of sentiment and of policy in regard to slavery and the African race amongst us. Some would perpetuate slavery; some would abolish it suddenly and without compensation; some would abolish it gradually and with compensation: some would remove the freed people from us, and some would retain them with us; and there are yet other minor diversities. Because of these diversities we waste much strength in struggles among ourselves. By mutual conce'

In [67]:
# New way with context retriever
compressed_docs = compression_retriever.get_relevant_documents(question)



In [69]:
compressed_docs[0]

Document(page_content='Some would perpetuate slavery; some would abolish it suddenly and without compensation; some would abolish it gradually and with compensation: some would remove the freed people from us, and some would retain them with us; and there are yet other minor diversities. Because of these diversities we waste much strength in struggles among ourselves. By mutual concession we should harmonize and act together. This would be compromise, but it would be compromise among the friends and not with the enemies of the Union. These articles are intended to embody a plan of such mutual concessions. if the plan shall be adopted, it is assumed that emancipation will follow, at least in several of the States.', metadata={'source': 'extras/01-Data-Connections/some_data/Lincoln_State_of_Union_1862.txt'})