# Dependencies

In [40]:
!pip install llama-index-readers-file pymupdf
!pip install llama-index-vector-stores-postgres
!pip install llama-index-embeddings-huggingface
!pip install llama-index-llms-llama-cpp
!pip install llama-index-llms-huggingface-api

In [40]:
!pip install llama-cpp-python
!pip install llama-index-vector-stores-chroma
!pip install llama-index-embeddings-huggingface

# Creds

In [9]:
load_dotenv()

openai_api_key = os.getenv('OPENAI_API_KEY')
hf_token = os.getenv("HUGGING_FACE_TOKEN")

# Imports

In [2]:
from llama_index.core import VectorStoreIndex, SimpleDirectoryReader
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
from llama_index.core import StorageContext
from llama_index.core import PromptTemplate
from llama_index.core import Settings

from llama_index.vector_stores.chroma import ChromaVectorStore
import chromadb

from IPython.display import Markdown, display
import time
import os
from typing import List, Optional
from dotenv import load_dotenv

# from llama_index.llms.ollama import Ollama
from llama_index.llms.llama_cpp import LlamaCPP
from llama_index.llms.openai import OpenAI
from llama_index.core.llms import ChatMessage
from llama_index.llms.huggingface import HuggingFaceLLM
from llama_index.llms.huggingface_api import HuggingFaceInferenceAPI

# LLMs

In [3]:
# uncomment the LLM you want to use

# local inference
llamacpp_llm = LlamaCPP(
    model_path='gguf/Mistral-7B-Instruct-v0.3-Q8_0.gguf',  # download the GGUF model from huggingface
    temperature=0.1,
    max_new_tokens=256,
    context_window=4096,
    generate_kwargs={},
    verbose=False,
)

# ollama_llm = Ollama(model="llama3.1:latest", request_timeout=30.0)

# openai inference
# openai_llm = OpenAI(model="gpt-4o-mini", api_key=openai_api_key)
# messages = [
#     ChatMessage(
#         role="system", content="You are a pirate with a colorful personality"
#     ),
#     ChatMessage(role="user", content="What is your name"),]

# huggingface inference
# hf_inf_llm=HuggingFaceInferenceAPI(model_name="mistralai/Mixtral-8x7B-Instruct-v0.1", token=hf_token)
# Settings.llm=HuggingFaceInferenceAPI(model_name="meta-llama/Meta-Llama-3-8B-Instruct", token=hf_token)

#### Huggingface inference API testing

In [69]:
hf_inf_llm = HuggingFaceInferenceAPI(model_name="meta-llama/Meta-Llama-3-8B-Instruct", token=hf_token)

completion_response = hf_inf_llm.complete("what is python. answer in one line")
print(completion_response)


Python is a high-level, interpreted, interactive and object-oriented scripting language that is widely used for web development, scientific computing, data analysis, artificial intelligence, and more.

Note: This answer is a brief summary of Python, if you want to know more about Python, you can check out the official Python documentation or other resources.


# Embedding generation model

In [4]:
embed_model = HuggingFaceEmbedding(model_name="BAAI/bge-base-en-v1.5", cache_folder='embed')

# Load and process your docs

In [9]:
documents = SimpleDirectoryReader("data").load_data() #make a folder named 'data' and pass your docs like pdf, docx, etc
Settings.chunk_size = 100
Settings.chunk_overlap = 20

# Generate embeddings and save to vector database

In [10]:
db = chromadb.PersistentClient(path="./chroma_db")
chroma_collection = db.get_or_create_collection("finance_agents")
vector_store = ChromaVectorStore(chroma_collection=chroma_collection)
storage_context = StorageContext.from_defaults(vector_store=vector_store)
index = VectorStoreIndex.from_documents(
    documents, storage_context=storage_context, embed_model=embed_model
)

#### Fetch the stored embedding collection

In [5]:
db2 = chromadb.PersistentClient(path="./chroma_db")
chroma_collection2 = db2.get_or_create_collection("finance_agents")
vector_store2 = ChromaVectorStore(chroma_collection=chroma_collection2)
index2 = VectorStoreIndex.from_vector_store(
    vector_store2,
    embed_model=embed_model,
)

# Prompt Engineering

In [6]:
# open source based models prompt
open_source_llm_template_str = ("""
You are a Q&A assistant. 
First read the context. 
Then read the question
Your goal is to find the question as accurately as possible based on the instructions and context provided.
then generate your response in bullet point way to the user


Context:
{context_str}

 
Question:
{query_str}
""")

open_source_llm_template = PromptTemplate(open_source_llm_template_str)


# openai based GPT model prompt
openai_template_str = ("""
You are a Q&A assistant. Your goal is to answer questions as
accurately as possible based on the instructions and context provided.
 
Context:
 
{context_str}
 
Question:
 
{query_str}
""")

openai_template = PromptTemplate(refine_template_str)

# Retrieval

In [None]:
ques = input('Your question goes here: ')

In [11]:
a=time.time()

query_engine = index2.as_query_engine(text_qa_template=open_source_llm_template,  # pass your prompt here
                                      llm=llamacpp_llm,  # pass your LLM here
                                      streaming=True)

response = query_engine.query(ques)
# print(response)  # uncomment in case using LLM from hugginface inferenceAPI as it doesn't support steaming
response.print_response_stream()  # comment this while using LLM from huggingface inference API

b=time.time()
c=b-a
formatted_time = float(format(c, ".2f"))
print('\n\n\n\nTime taken to generate: ', formatted_time)


Response:
AutoGen is a generic framework designed to build diverse applications of various complexities and Language Model (LLM) capacities. It is particularly useful in domains such as mathematics, coding, question-answering, operations research, online decision-making, and entertainment. The framework streamlines and consolidates multi-agent workflows using multi-agent conversations to reduce the effort required for developers to create complex LLM applications.

Time taken to generate:  40.642993450164795


# Generated examples

In [117]:
# LlamaCPP - Tell me about chapter 2


Response:
In Chapter 2, Financial Performance, the report discusses the financial achievements of the company for the year 2023. The key points highlighted include a 12% growth in revenue compared to the previous year, reaching $3.2 billion. This growth is attributed to increased sales from higher-margin products and cost optimization initiatives. Additionally, the gross profit margin improved to 45%. This chapter provides an in-depth analysis of the financial performance of the company for the year.

Time taken to generate:  59.90000009536743


In [116]:
# LlamaCPP - what is the financial report for year 2024?


Response:
The report for the year 2024, as per the Financial Report, presents a cautiously optimistic outlook with an expected revenue growth of 8%. This is a moderation from the extraordinary factors that contributed to the 12% revenue increase seen in the year 2023.

Time taken to generate:  43.412781953811646


In [115]:
# LlamaCPP - how was the financial year 2023?


Response:
The financial year 2023 was a year of robust growth for our company, with a revenue increase of 12% compared to the previous year.

Time taken to generate:  33.57563900947571


In [109]:
# Mixtral 8x7B MoE Huggingface - What is the net income?

 

Response:
According to the financial report, the net income was $500 million, representing a 15% year-over-year increase.


Time taken to generate:  1.076716423034668


In [108]:
# Mixtral 8x7B MoE Huggingface - What is Autogen?

 

Response:
AutoGen is a generic framework for building diverse applications of various complexities and Large Language Model (LLM) capacities. It aims to streamline and consolidate multi-agent workflows using multi-agent conversations, thereby reducing the effort required for developers to create complex LLM applications across various domains. 

Please let me know if you need any further assistance.  I'll be happy to help. 


Time taken to generate:  2.2069997787475586


In [96]:
# LlamaCPP - What is the net income


Answer:
$500 million

Time taken to generate:  19.408074378967285


In [92]:
# Llama 3 huggingface - What is the net income

Answer:
$500 million. 





Please let me know if you want me to do anything else.


Time taken to generate:  1.2758889198303223


In [66]:
# OpenAI - what is the net income

The net income mentioned in the financial report is $500 million, representing a 15% increase year-over-year.

Time taken to generate:  1.0434603691101074


In [63]:
# OpenAI - what is autogen

AutoGen is a generic framework designed to facilitate the development of diverse applications with various complexities and capacities of Large Language Models (LLMs). It aims to streamline and consolidate multi-agent workflows using multi-agent conversations to reduce the effort required for developers to create complex LLM applications across different domains. Empirical studies have shown the effectiveness of the AutoGen framework in various example applications, including mathematics, coding, question answering, operations research, online decision-making, entertainment, and more.

Time taken to generate:  2.124453544616699
