## Setting LLM Model

In [12]:
import nest_asyncio

nest_asyncio.apply()

In [13]:
from dotenv import load_dotenv
import os

load_dotenv()
#print("Open AI - ",os.getenv("LITELLM_URL"),os.getenv("OPENAI_API_MODEL"), os.getenv("OPENAI_API_EMBEDDING"))
#print("OLLAMA  - ",os.getenv("OLLAMA_URL"),os.getenv("OLLAMA_MODEL"))
#print("Local OLLAMA - ",os.getenv("OLLAMA_LOCAL_URL"),os.getenv("OLLAMA_LOCAL_MODEL"))

True

In [14]:
from llama_index.core import Settings

### RUN LLM AS OLLAMA 

In [None]:
#install OPEN AI LLM, skip if already installed
!pipenv install llama-index-llms-ollama

In [40]:
from llama_index.llms.ollama import Ollama
api_base=os.getenv("OLLAMA_LOCAL_URL")
model=os.getenv("OLLAMA_LOCAL_MODEL")
Settings.llm = Ollama(model=model, base_url=api_base,request_timeout=360.0)

# use remote ollam
"""
api_base=os.getenv("OLLAMA_URL")
model=os.getenv("OLLAMA_MODEL")
llm = Ollama(model=model, base_url=api_base,request_timeout=360.0)
"""
#test run
response = Settings.llm.complete("What is the capital of France?")
print(response)

INFO:httpx:HTTP Request: POST http://localhost:11434/api/chat "HTTP/1.1 200 OK"
HTTP Request: POST http://localhost:11434/api/chat "HTTP/1.1 200 OK"
The capital of France is **Paris**. 🇫🇷  



### RUN LLM AS OPEN AI 

In [None]:
#install OPEN AI LLM, skip if already installed
!pipenv install llama-index-llms-openai

In [15]:
from llama_index.llms.openai import OpenAI
api_base=os.getenv("LITELLM_URL")
model=os.getenv("OPENAI_API_MODEL")

Settings.llm = OpenAI(
    model=model,
    api_base = api_base,
    temperature=0.3
)

# resp = Settings.llm.complete("What is the capital of France?")
# print(resp)

## Setting Embedding Model

In [16]:
# use open AI embedding
# can skip if use local embedding
from llama_index.embeddings.openai import OpenAIEmbedding
api_base=os.getenv("LITELLM_URL")
embedding_model=os.getenv("OPENAI_API_EMBEDDING")

Settings.embed_model = OpenAIEmbedding(
    model_name=embedding_model,
    api_base = api_base,
)
# embed_text = Settings.embed_model.get_text_embedding("hello")
# print(f"{len(embed_text)}, {embed_text}")

In [17]:
import logging
import sys

#logging.basicConfig(stream=sys.stdout, level=logging.INFO)
#logging.getLogger().addHandler(logging.StreamHandler(stream=sys.stdout))

### Load and Parse document

In [18]:
from llama_index.core import VectorStoreIndex, SimpleKeywordTableIndex
from llama_index.core import SummaryIndex

In [19]:
from llama_index.core import VectorStoreIndex, SimpleDirectoryReader,Document
documents = SimpleDirectoryReader("./data/paul").load_data()

In [20]:
from llama_index.core.node_parser import SentenceSplitter

nodes = SentenceSplitter().get_nodes_from_documents(documents)

## Chat Store using simple chat store in memory

In [21]:
from llama_index.core.storage.chat_store import SimpleChatStore
from llama_index.core.memory import ChatMemoryBuffer

In [22]:
chat_store = SimpleChatStore()

chat_memory = ChatMemoryBuffer.from_defaults(
    token_limit=3000,
    chat_store=chat_store,
    chat_store_key="user1",
)

In [23]:
index = VectorStoreIndex.from_documents(documents)

In [24]:
chat_engine = index.as_chat_engine(memory=chat_memory)

In [25]:
response = chat_engine.query("What did the author do growing up?")
print(response)

Growing up, the author focused on writing and programming outside of school. They initially wrote short stories, which they later described as lacking plot but featuring characters with strong feelings. In 9th grade, they began programming on an IBM 1401, using punch cards to input their programs. Later, they acquired a TRS-80 microcomputer, which allowed them to write simple games, a program for predicting model rocket flights, and a word processor for their father. Although they enjoyed programming, the author initially planned to study philosophy in college but eventually switched to studying artificial intelligence.


In [23]:
chat_store.persist(persist_path="chat_store.json")

In [26]:
chat_memory.to_dict

<bound method ChatMemoryBuffer.to_dict of ChatMemoryBuffer(chat_store=SimpleChatStore(store={'user1': [ChatMessage(role=<MessageRole.USER: 'user'>, additional_kwargs={}, blocks=[TextBlock(block_type='text', text='What did the author do growing up?')]), ChatMessage(role=<MessageRole.ASSISTANT: 'assistant'>, additional_kwargs={'tool_calls': [ChatCompletionMessageToolCall(id='call_IqblU3B3aMuSCrByBTny7wmf', function=Function(arguments='{"input":"What did the author do growing up?"}', name='query_engine_tool'), type='function')]}, blocks=[]), ChatMessage(role=<MessageRole.TOOL: 'tool'>, additional_kwargs={'name': 'query_engine_tool', 'tool_call_id': 'call_IqblU3B3aMuSCrByBTny7wmf'}, blocks=[TextBlock(block_type='text', text='Growing up, the author focused on writing and programming outside of school. Initially, they wrote short stories, which they later described as lacking plot but featuring characters with strong feelings. They began programming on an IBM 1401 in 9th grade, using punch c

In [27]:
response = chat_engine.query("What did the author do after his time at YC?")
print(response)

After his time at Y Combinator, the author decided to step back and hand over the leadership to someone else, specifically seeking to recruit Sam Altman as the new president. He initially intended to balance writing essays, working on Y Combinator, and developing the programming language Arc. However, as Y Combinator grew, it consumed more of his attention. Ultimately, he focused on writing essays and pursuing other projects.


In [27]:
chat_memory.to_dict

<bound method ChatMemoryBuffer.to_dict of ChatMemoryBuffer(chat_store=SimpleChatStore(store={'user1': [ChatMessage(role=<MessageRole.USER: 'user'>, additional_kwargs={}, blocks=[TextBlock(block_type='text', text='What did the author do after his time at YC?')]), ChatMessage(role=<MessageRole.ASSISTANT: 'assistant'>, additional_kwargs={'tool_calls': [ChatCompletionMessageToolCall(id='call_vra67bjSt6pjeMg8P3wWv5SY', function=Function(arguments='{"input":"What did the author do after his time at Y Combinator (YC)?"}', name='query_engine_tool'), type='function')]}, blocks=[]), ChatMessage(role=<MessageRole.TOOL: 'tool'>, additional_kwargs={'name': 'query_engine_tool', 'tool_call_id': 'call_vra67bjSt6pjeMg8P3wWv5SY'}, blocks=[TextBlock(block_type='text', text='The author did not initially intend for Y Combinator (YC) to become a full-time job, as he planned to focus on hacking, writing essays, and working on YC. However, as YC grew and he became more excited about it, it started to occupy a