# Install Packages and Setup Variables


In [1]:
!pip install -q openai==1.58.0 chromadb==0.5.5 google-generativeai==0.5.4 langchain==0.1.17 langchain-chroma==0.1.2 langchain_openai==0.1.5 langchain_google_genai==1.0.4 llama-index-llms-gemini==0.1.11 llama-index==0.10.57 llama-index-vector-stores-chroma==0.1.10

[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/67.3 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m67.3/67.3 kB[0m [31m3.1 MB/s[0m eta [36m0:00:00[0m
[?25h  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m454.3/454.3 kB[0m [31m12.1 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m584.3/584.3 kB[0m [31m28.6 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m150.7/150.7 kB[0m [31m16.0 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m867.6/867.6 kB[0m [31m58.8 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.4/2.4 MB[0m [31m96.0 MB/s[0m eta [36m

In [2]:
import os
from google.colab import userdata

os.environ["OPENAI_API_KEY"] = userdata.get('openai_api_key')
os.environ["GOOGLE_API_KEY"] = userdata.get('GOOGLE_API_KEY')

# Load the Dataset (CSV)


## Download


The dataset includes several articles from the TowardsAI blog, which provide an in-depth explanation of the LLaMA2 model. Read the dataset as a long string.


In [5]:
!curl -o ./mini-dataset.csv https://raw.githubusercontent.com/AlaFalaki/tutorial_notebooks/main/data/mini-llama-articles.csv

  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
  0     0    0     0    0     0      0      0 --:--:-- --:--:-- --:--:--     0100  169k  100  169k    0     0  1697k      0 --:--:-- --:--:-- --:--:-- 1712k


## Read File


In [6]:
import csv

text = ""

# Load the file as a JSON
with open("./mini-dataset.csv", mode="r", encoding="utf-8") as file:
    csv_reader = csv.reader(file)

    for idx, row in enumerate(csv_reader):
        if idx == 0:
            continue
        text += row[1]

# The number of characters in the dataset.
print(len(text))

171044


# Chunking


In [7]:
chunk_size = 512
chunks = []

# Split the long text into smaller manageable chunks of 512 characters.
for i in range(0, len(text), chunk_size):
    chunks.append(text[i : i + chunk_size])

print(len(chunks))

335


#Interface of Chroma with LlamaIndex


In [8]:
from llama_index.core import Document

# Convert the chunks to Document objects so the LlamaIndex framework can process them.
documents = [Document(text=t) for t in chunks]

Save on Chroma


In [9]:
import chromadb

# create client and a new collection
# chromadb.EphemeralClient saves data in-memory.
chroma_client = chromadb.PersistentClient(path="./mini-chunked-dataset")
chroma_collection = chroma_client.create_collection("mini-chunked-dataset")

In [10]:
from llama_index.vector_stores.chroma import ChromaVectorStore
from llama_index.core import StorageContext

# Define a storage context object using the created vector database.
vector_store = ChromaVectorStore(chroma_collection=chroma_collection)
storage_context = StorageContext.from_defaults(vector_store=vector_store)

In [11]:
from llama_index.core import VectorStoreIndex
from llama_index.core.node_parser import SentenceSplitter
from llama_index.embeddings.openai import OpenAIEmbedding

# Build index / generate embeddings using OpenAI embedding model
index = VectorStoreIndex.from_documents(
    documents,
    embed_model=OpenAIEmbedding(model="text-embedding-3-small"),
    storage_context=storage_context,
    show_progress=True,
)

Parsing nodes:   0%|          | 0/335 [00:00<?, ?it/s]

Generating embeddings:   0%|          | 0/335 [00:00<?, ?it/s]

Query Dataset


In [12]:
# Define a query engine that is responsible for retrieving related pieces of text,
# and using a LLM to formulate the final answer.

from llama_index.llms.gemini import Gemini

llm = Gemini(model="models/gemini-1.5-flash", temperature=1, max_tokens=512)

query_engine = index.as_query_engine(llm=llm, similarity_top_k=5)

In [13]:
response = query_engine.query("How many parameters LLaMA2 model has?")
print(response)

The Llama 2 model comes in four sizes: 7 billion, 13 billion, 34 billion, and 70 billion parameters.



# Interface of Chroma with LangChain


In [14]:
from langchain.schema.document import Document

# Convert the chunks to Document objects so the LangChain framework can process them.
documents = [Document(page_content=t) for t in chunks]

Save on Chroma


In [15]:
from langchain_chroma import Chroma
from langchain_openai import OpenAIEmbeddings

# Add the documents to chroma DB and create Index / embeddings

embeddings = OpenAIEmbeddings(model="text-embedding-3-small")

chroma_db = Chroma.from_documents(
    documents=documents,
    embedding=embeddings,
    persist_directory="./mini-chunked-dataset",
    collection_name="mini-chunked-dataset",
)

Query Dataset


In [16]:
from langchain_google_genai import ChatGoogleGenerativeAI

# Initializing the LLM model
#llm = ChatOpenAI(temperature=0, model="gpt-4o-mini", max_tokens=512)

llm = ChatGoogleGenerativeAI(
    model="gemini-1.5-flash",
    temperature=0,
    max_tokens=512,
)

In [17]:
from langchain.chains import RetrievalQA

query = "How many parameters LLaMA 2 model has?"

retriever = chroma_db.as_retriever(search_kwargs={"k": 4})
# Define a RetrievalQA chain that is responsible for retrieving related pieces of text,
# and using a LLM to formulate the final answer.
chain = RetrievalQA.from_chain_type(llm=llm, chain_type="stuff", retriever=retriever)

response = chain.invoke(query)
print(response["result"])

The provided text gives the parameter range for the original LLaMA models as 7 billion to 65 billion.  It does not contain information about the parameter count for LLaMA 2.

