## Setting LLM Model

In [1]:
import nest_asyncio

nest_asyncio.apply()

In [1]:
from dotenv import load_dotenv
import os

load_dotenv()
#print("Open AI - ",os.getenv("LITELLM_URL"),os.getenv("OPENAI_API_MODEL"), os.getenv("OPENAI_API_EMBEDDING"))
#print("OLLAMA  - ",os.getenv("OLLAMA_URL"),os.getenv("OLLAMA_MODEL"))
#print("Local OLLAMA - ",os.getenv("OLLAMA_LOCAL_URL"),os.getenv("OLLAMA_LOCAL_MODEL"))

True

In [2]:
from llama_index.core import Settings

### RUN LLM AS OLLAMA 

In [None]:
#install OPEN AI LLM, skip if already installed
!pipenv install llama-index-llms-ollama

In [40]:
from llama_index.llms.ollama import Ollama
api_base=os.getenv("OLLAMA_LOCAL_URL")
model=os.getenv("OLLAMA_LOCAL_MODEL")
llm = Ollama(model=model, base_url=api_base,request_timeout=120.0)

# use remote ollam
"""
api_base=os.getenv("OLLAMA_URL")
model=os.getenv("OLLAMA_MODEL")
llm = Ollama(model=model, base_url=api_base,request_timeout=180.0)
"""
#test run
response = llm.complete("What is the capital of France?")
print(response)

The capital of France is **Paris**. 🇫🇷  



### RUN LLM AS OPEN AI 

In [None]:
#install OPEN AI LLM, skip if already installed
!pipenv install llama-index-llms-openai

In [None]:
from llama_index.llms.openai import OpenAI
api_base=os.getenv("LITELLM_URL")
model=os.getenv("OPENAI_API_MODEL")

Settings.llm = OpenAI(
    model=model,
    api_base = api_base,
    temperature=0.3
)

resp = Settings.llm.complete("What is the capital of France?")
print(resp)

## Tokenization
Tokenization is the process of breaking text into smaller units (tokens) that LLMs can process, converting them into numerical IDs for model input. It ensures efficient and consistent text representation, enabling the model to handle fixed-length sequences and unseen words using techniques like subword or byte pair encoding (BPE). Tokenization occurs before text embedding, generation, or any LLM processing to prepare data for computation.

- Llama-index default using tiktoken as use by Open AI
- Non Open AI model can use trasformer

In [7]:
import tiktoken

#for Open AI
tokenizer1 = tiktoken.encoding_for_model("gpt-3.5-turbo").encode
print(tokenizer1("Hello World!"))



[9906, 4435, 0]


In [None]:
#install OPEN AI LLM, skip if already installed
!pipenv install transformers

In [5]:
# huggingface
from transformers import AutoTokenizer
tokenizer2 = AutoTokenizer.from_pretrained("HuggingFaceH4/zephyr-7b-beta")
print(tokenizer2("Hello World!"))


  from .autonotebook import tqdm as notebook_tqdm


{'input_ids': [1, 22557, 3304, 28808], 'attention_mask': [1, 1, 1, 1]}


In [8]:
Settings.tokenizer = tokenizer1

## Setting Embedding Model

In [11]:
# use open AI embedding
from llama_index.embeddings.openai import OpenAIEmbedding
api_base=os.getenv("LITELLM_URL")
embedding_model=os.getenv("OPENAI_API_EMBEDDING")

Settings.embed_model = OpenAIEmbedding(
    model_name=embedding_model,
    api_base = api_base,
)
# embed_text = Settings.embed_model.get_text_embedding("hello")
# print(f"{len(embed_text)}, {embed_text}")

In [None]:
#download embedding model from Hugging face into local  (unable to run in notebook)
!pipenv install llama-index-embeddings-huggingface
!pipenv install sentence-transformers

In [12]:
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
Settings.embed_model = HuggingFaceEmbedding(
    model_name="BAAI/bge-small-en-v1.5"
)
embed_text = Settings.embed_model.get_text_embedding("hello")
print(f"{len(embed_text)}, {embed_text}")

384, [-0.0643514096736908, -0.06281544268131256, 0.05967331677675247, -0.062457676976919174, -0.013114610686898232, 0.018242744728922844, 0.06856439262628555, 0.03575027361512184, 0.04076302796602249, 0.005130363628268242, 0.008038632571697235, -0.07947416603565216, 0.025934062898159027, 0.049173787236213684, 0.0275560449808836, -0.0004877340397797525, 0.04075757414102554, -0.051806751638650894, -0.11306574940681458, 0.004447642248123884, 0.009401279501616955, 0.02884845621883869, -0.05381949245929718, -0.04361865669488907, 0.0051504215225577354, -0.0023293327540159225, 0.02580035664141178, -0.010388363152742386, -0.008328532800078392, -0.023409048095345497, -0.03143850341439247, 0.004619847983121872, 0.04228010028600693, 0.024808714166283607, 0.018703529611229897, -0.026772426441311836, 0.038852423429489136, -0.013954431749880314, -0.07236617058515549, -0.0033572171814739704, 0.05542605742812157, -0.051612697541713715, 0.0002378230419708416, -0.022276420146226883, 0.03365354984998703,

### HuggingFace Optimum ONNX Embeddings (unable to run in notebook)
ONNX embeddings are vector representations generated by models saved in the ONNX format, which allows for cross-framework compatibility and efficient deployment across various hardware platforms. These embeddings are used in NLP tasks like semantic search or document classification. The ONNX format optimizes performance, allowing faster inference and reduced resource usage compared to other formats like PyTorch or TensorFlow.

In [None]:
!pipenv install transformers optimum[exporters]
!pipenv install llama-index-embeddings-huggingface-optimum

In [13]:
from llama_index.embeddings.huggingface_optimum import OptimumEmbedding

OptimumEmbedding.create_and_save_optimum_model(
    "BAAI/bge-small-en-v1.5", "./bge_onnx"
)

Saved optimum model to ./bge_onnx. Use it with `embed_model = OptimumEmbedding(folder_name='./bge_onnx')`.


In [None]:
Settings.embed_model = OptimumEmbedding(folder_name="./bge_onnx")
embed_text = Settings.embed_model.get_text_embedding("hello")
print(f"{len(embed_text)}, {embed_text}")

# Prompt

In [15]:
from llama_index.core import VectorStoreIndex, SimpleDirectoryReader
documents = SimpleDirectoryReader("./data/paul").load_data()

In [16]:
index = VectorStoreIndex.from_documents(documents)


In [17]:
query = index.as_query_engine(llm=Settings.llm)
print(query.query("Who is Paul Graha?"))

Paul Graham is a writer and programmer known for his essays on technology, startups, and entrepreneurship. He co-founded Y Combinator, a startup accelerator, and has been influential in the tech startup community. His experiences and reflections on running Y Combinator and writing essays are discussed in his work.


In [18]:
from llama_index.core import PromptTemplate

text_qa_template_str = (
    "Context information is"
    " below.\n---------------------\n{context_str}\n---------------------\nUsing"
    " both the context information and also using your own knowledge, answer"
    " the question: {query_str}\nIf the context isn't helpful, you can also"
    " answer the question on your own.\n"
)
text_qa_template = PromptTemplate(text_qa_template_str)

refine_template_str = (
    "The original question is as follows: {query_str}\nWe have provided an"
    " existing answer: {existing_answer}\nWe have the opportunity to refine"
    " the existing answer (only if needed) with some more context"
    " below.\n------------\n{context_msg}\n------------\nUsing both the new"
    " context and your own knowledge, update or repeat the existing answer.\n"
)
refine_template = PromptTemplate(refine_template_str)

In [19]:
print(text_qa_template)

metadata={'prompt_type': <PromptType.CUSTOM: 'custom'>} template_vars=['context_str', 'query_str'] kwargs={} output_parser=None template_var_mappings=None function_mappings=None template="Context information is below.\n---------------------\n{context_str}\n---------------------\nUsing both the context information and also using your own knowledge, answer the question: {query_str}\nIf the context isn't helpful, you can also answer the question on your own.\n"


In [21]:
print(
    index.as_query_engine(
        text_qa_template=text_qa_template,
        refine_template=refine_template,
        llm=Settings.llm,
    ).query("Who is Joe Biden?")
)

Joe Biden is an American politician and attorney who has served as the 46th President of the United States since January 20, 2021. He was born on November 20, 1942, in Scranton, Pennsylvania. Before his presidency, Biden had a long career in politics, including serving as a U.S. Senator from Delaware from 1973 to 2009. He was also the Vice President of the United States under President Barack Obama from 2009 to 2017.

Biden is a member of the Democratic Party and has focused on issues such as healthcare, foreign policy, and climate change during his political career. His presidency has been marked by efforts to address the COVID-19 pandemic, economic recovery, and social justice issues.
