# Install Packages and Setup Variables

In [1]:
!pip install -q llama-index==0.10.49 openai==1.35.3 llama-index-llms-vertex==0.2.0 google-cloud-aiplatform==1.56.0

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m327.4/327.4 kB[0m [31m3.5 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m15.4/15.4 MB[0m [31m24.8 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.0/2.0 MB[0m [31m19.9 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m75.6/75.6 kB[0m [31m3.8 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m38.3/38.3 MB[0m [31m10.1 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m130.8/130.8 kB[0m [31m8.3 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.1/1.1 MB[0m [31m41.4 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m77.9/77.9 kB[0m [31m8.1 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━

In [2]:
from google.cloud import aiplatform
from google.colab import auth

# Authenticate
auth.authenticate_user()

# Initialize Vertex AI by setting your project id
PROJECT_ID = "[your-project-id]"
aiplatform.init(project=PROJECT_ID, location='us-central1')

In [3]:
import os

# Set the "OPENAI_API_KEY" in the Python environment. Will be used by OpenAI client later.
os.environ["OPENAI_API_KEY"] = "<YOUR_OPENAI_KEY>"

# Load Dataset

## Download

The dataset includes several articles from the TowardsAI blog, which provide an in-depth explanation of the LLaMA2 model.

In [4]:
!curl -o ./mini-dataset.csv https://raw.githubusercontent.com/AlaFalaki/tutorial_notebooks/main/data/mini-llama-articles.csv

  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100  169k  100  169k    0     0   645k      0 --:--:-- --:--:-- --:--:--  647k


## Read File

In [5]:
import csv

rows = []

# Load the CSV file
with open("./mini-dataset.csv", mode="r", encoding="utf-8") as file:
  csv_reader = csv.reader(file)

  for idx, row in enumerate( csv_reader ):
    if idx == 0: continue; # Skip header row
    rows.append( row )

# The number of characters in the dataset.
print( "number of articles:", len( rows ) )

number of articles: 14


# Generate Embedding

In [6]:
from llama_index.core import Document

# Convert the texts to Document objects so the LlamaIndex framework can process them.
documents = [Document(text=row[1]) for row in rows]

In [7]:
from llama_index.core import VectorStoreIndex
from llama_index.core.node_parser import SentenceSplitter
from llama_index.embeddings.openai import OpenAIEmbedding


# Build index / generate embeddings using OpenAI embedding model
index = VectorStoreIndex.from_documents(
    documents,
    embed_model=OpenAIEmbedding(model="text-embedding-3-small"),
    transformations=[SentenceSplitter(chunk_size=768, chunk_overlap=64)],
    show_progress=True,
)

Parsing nodes:   0%|          | 0/14 [00:00<?, ?it/s]

Generating embeddings:   0%|          | 0/56 [00:00<?, ?it/s]

# Query Dataset

In [9]:
# Define a query engine that is responsible for retrieving related pieces of text,
# and using a LLM to formulate the final answer.

from llama_index.llms.vertex import Vertex

llm = Vertex(model="gemini-1.5-flash-001", temperature=1, max_tokens=512)

query_engine = index.as_query_engine(llm=llm)

In [10]:
response = query_engine.query(
    "How many parameters LLaMA2 model has?"
)
print(response)

LLaMA 2 comes in four different model sizes, with 7 billion, 13 billion, 34 billion, and 70 billion parameters. 



In [11]:
response = query_engine.query(
    "When will Llama3 will be released?"
)
print(response)

The context does not provide a release date for Llama 3. 

