# Lanchain OpenAI Setup

In [2]:
import langchain
from langchain_openai import OpenAI
from langchain_openai import ChatOpenAI
from langchain.schema import AIMessage, HumanMessage, SystemMessage

# Import chat templates
from langchain.prompts import (
    ChatPromptTemplate,
    PromptTemplate,
    SystemMessagePromptTemplate,
    AIMessagePromptTemplate,
    HumanMessagePromptTemplate,
)

# Caching
from langchain.cache import InMemoryCache
from langchain.cache import SQLiteCache
from langchain.globals import set_llm_cache

In [3]:
cacheType = 'in_memory'

if cacheType == 'in_memory':
    set_llm_cache(InMemoryCache())
elif cacheType == 'sqlite':
    set_llm_cache(SQLiteCache(database_path=".langchain.db"))

# Set OpenAI API key and create LLM and Chat LLM. Note that key can be stored in a separate file or as an environment variable. Refer to docs.
api_key = open('./openai_key.txt').read()
llm = OpenAI(openai_api_key=api_key)
chat = ChatOpenAI(openai_api_key=api_key)

In [6]:
from langchain.text_splitter import CharacterTextSplitter

In [9]:
with open('extras/01-Data-Connections/some_data/FDR_State_of_Union_1944.txt') as file:
    speech_text = file.read()

In [11]:
len(speech_text)

21995

In [15]:
# May fit into Open AI (current linit is 8000 tokens) hower one large chunk is not as user friendly when developing
len(speech_text.split())

3750

## Character splitting on a character

In [34]:
text_splitter = CharacterTextSplitter(separator="\n\n", chunk_size=1000)

In [36]:
texts = text_splitter.create_documents([speech_text])

In [40]:
len(texts)

28

## Splitting by token count


```sh
!pip install tiktoken
```

In [44]:
text_splitter = CharacterTextSplitter.from_tiktoken_encoder(chunk_size=500)
texts = text_splitter.create_documents([speech_text])

In [46]:
len(texts)

15