In [40]:
%pip install --upgrade --user google-cloud-aiplatform pymupdf rich colorama

Note: you may need to restart the kernel to use updated packages.


In [18]:
# Restart kernel after installs so that your environment can access the new packages
import IPython

app = IPython.Application.instance()
app.kernel.do_shutdown(True)

{'status': 'ok', 'restart': True}

In [2]:
import os
os.environ["GOOGLE_APPLICATION_CREDENTIALS"]="./key.json"
# %system ~/google-cloud-sdk/bin/gcloud auth application-default login

In [3]:
from dotenv import load_dotenv
import os

load_dotenv()
google_project_id = os.getenv('GOOGLE_PROJECT_ID')
google_location = "europe-west1"

In [4]:
import vertexai
vertexai.init(project=google_project_id, location=google_location)

In [5]:
from IPython.display import Markdown, display
from rich.markdown import Markdown as rich_Markdown
from vertexai.generative_models import GenerationConfig, GenerativeModel, Image

text_model = GenerativeModel("gemini-1.5-pro")
multimodal_model = GenerativeModel("gemini-1.5-pro")
multimodal_model_flash = GenerativeModel("gemini-1.5-flash")


In [6]:
image_description_prompt = """Explain what is going on in the image.
If it's a table, extract all elements of the table.
If it's a graph, explain the findings in the graph.
Do not include any numbers that are not mentioned in the image.
"""

In [10]:
from helpers import get_document_metadata

# Extract text and image metadata from the PDF document
text_metadata_df, image_metadata_df = get_document_metadata(
    multimodal_model,
    './pdf',
    image_save_dir="./images",
    image_description_prompt=image_description_prompt,
    embedding_size=1408,
    # add_sleep_after_page = True, # Uncomment this if you are running into API quota issues
    # sleep_time_after_page = 5,
    # generation_config = # see next cell
    # safety_settings =  # see next cell
)



 Processing the file: --------------------------------- ./pdf/epam-report.pdf 


Processing page: 1
Extracting image from page: 1, saved as: ./images/epam-report.pdf_image_0_0_9.jpeg
Processing page: 2
Processing page: 3
Processing page: 4
Processing page: 5


In [11]:
text_metadata_df.head()

Unnamed: 0,file_name,page_num,text,text_embedding_page,chunk_number,chunk_text,text_embedding_chunk
0,epam-report.pdf,1,"V I E W A L L N E W S\nD ATE\nNovember 7, 20...","[-0.019254421815276146, -0.026206426322460175,...",1,"V I E W A L L N E W S\nD ATE\nNovember 7, 20...","[-0.01310722902417183, -0.013363610953092575, ..."
1,epam-report.pdf,1,"V I E W A L L N E W S\nD ATE\nNovember 7, 20...","[-0.019254421815276146, -0.026206426322460175,...",2,"PAM Systems, Inc. (NYSE: EPAM), a leading digi...","[-0.001168870716355741, -0.05384514853358269, ..."
2,epam-report.pdf,1,"V I E W A L L N E W S\nD ATE\nNovember 7, 20...","[-0.019254421815276146, -0.026206426322460175,...",3,uarter of 2023;\nWe recorded a benefit of $52....,"[-0.01423549372702837, -0.026544077321887016, ..."
3,epam-report.pdf,1,"V I E W A L L N E W S\nD ATE\nNovember 7, 20...","[-0.019254421815276146, -0.026206426322460175,...",4,"ion, an increase of $63.0 million, or 55.2%, c...","[-0.009012407623231411, -0.01399849634617567, ..."
4,epam-report.pdf,1,"V I E W A L L N E W S\nD ATE\nNovember 7, 20...","[-0.019254421815276146, -0.026206426322460175,...",5,ncrease to the effective tax rate;\nNon-GAAP d...,"[-0.005996230058372021, -0.01537022739648819, ..."


In [12]:
image_metadata_df.head()

Unnamed: 0,file_name,page_num,img_num,img_path,img_desc,mm_embedding_from_img_only,text_embedding_from_image_description
0,epam-report.pdf,1,1,./images/epam-report.pdf_image_0_0_9.jpeg,The image presents a financial report from EPA...,"[-0.00892232917, 0.015445943, 0.00693804584, -...","[-0.018329229205846786, -0.003188361646607518,..."


In [14]:
from helpers import (
    display_images,
    get_gemini_response,
    get_similar_image_from_query,
    get_similar_text_from_query,
    print_text_to_image_citation,
    print_text_to_text_citation,
)

query = "I need details for basic and diluted net income for EPAM?"

matching_results_text = get_similar_text_from_query(
    query,
    text_metadata_df,
    column_name="text_embedding_chunk",
    top_n=3,
    chunk_text=True,
)

# Print the matched text citations
print_text_to_text_citation(matching_results_text, print_top=False, chunk_text=True)

[91mCitation 1: Matched text: 
[0m
[94mscore: [0m 0.76
[94mfile_name: [0m epam-report.pdf
[94mpage_number: [0m 2
[94mchunk_number: [0m 5
[94mchunk_text: [0m nderlying trends in EPAM's business and uses the measures to establish
budgets and operational goals, communicate internally and externally, for managing EPAM's business and evaluating its performance. Management also
believes these measures help investors compare EPAM's operating performance with its results in prior periods. EPAM anticipates that it will continue to
report both GAAP and certain non-GAAP financial measures in its financial results, including non-GAAP results that exclude stock-based compensation
expenses, acquisition-related costs including amortization of acquired intangible assets, impairment of assets, expenses associated with EPAM's
humanitarian commitment to its professionals in Ukraine, unbilled business continuity resources resulting from Russia's invasion of Ukraine, costs
associated with the g

In [15]:
print("\n **** Result: ***** \n")

# All relevant text chunk found across documents based on user query
context = "\n".join(
    [value["chunk_text"] for key, value in matching_results_text.items()]
)

instruction = f"""Answer the question with the given context.
If the information is not available in the context, just return "not available in the context".
Question: {query}
Context: {context}
Answer:
"""

# Prepare the model input
model_input = instruction

# Generate Gemini response with streaming output
get_gemini_response(
    text_model,  # we are passing Gemini 1.0 Pro
    model_input=model_input,
    stream=True,
    generation_config=GenerationConfig(temperature=0.2),
)


 **** Result: ***** 



"The provided text gives information about EPAM's non-GAAP diluted EPS, but does not contain the values for basic and diluted net income. Thus, the answer is **not available in the context**. \n"