Copyright 2024 shins777@gmail.com

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

   https://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.

## Gemini Pro - Grounding Service : PDF reading.

Feedback : shins777@gmail.com. 

* 이 Colab 은 Google Gemini 사용시 Multimodal 환경에서 PDF 파일을 직접 읽어서 Grounding service로 활용하기 위한 예제입니다.

In [1]:
%pip install --upgrade --quiet google-cloud-aiplatform

Note: you may need to restart the kernel to use updated packages.


In [2]:
from IPython.display import display, Markdown

### GCP 프로젝트 및 리전 설정
본인의 GCP 환경에 맞게 아래 설정을 구성하세요.  
* 구글의 최신버전인 gemini pro 사용을 권고드립니다.   
* 현재 Gemini는 한국리전(asia-northeast3)을 통해서 접근이 가능합니다.

In [3]:
MODEL="gemini-1.5-pro-preview-0409"
PROJECT_ID="ai-hangsik"
REGION="us-central1"

In [4]:
import base64

def get_obj_content(coding='utf-8', obj_loc=""):
    with open(obj_loc, 'rb') as f:
        obj_data = base64.b64encode(f.read()).decode(coding)
        return obj_data

In [5]:
import base64
import vertexai
from vertexai.generative_models import GenerativeModel, Part, FinishReason
import vertexai.preview.generative_models as generative_models

def generate(file_path:str, query:str):

    vertexai.init(project=PROJECT_ID, location=REGION)
    model = GenerativeModel(MODEL)
  
    pdf_obj = get_obj_content(obj_loc=file_path)
    doc_obj = Part.from_data(data=base64.b64decode(pdf_obj), mime_type="application/pdf")
  
    generation_config = {
        "max_output_tokens": 8192,
        "temperature": 1,
        "top_p": 0.95,
    }
  
    safety_settings = {
        generative_models.HarmCategory.HARM_CATEGORY_HATE_SPEECH: generative_models.HarmBlockThreshold.BLOCK_MEDIUM_AND_ABOVE,
        generative_models.HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT: generative_models.HarmBlockThreshold.BLOCK_MEDIUM_AND_ABOVE,
        generative_models.HarmCategory.HARM_CATEGORY_SEXUALLY_EXPLICIT: generative_models.HarmBlockThreshold.BLOCK_MEDIUM_AND_ABOVE,
        generative_models.HarmCategory.HARM_CATEGORY_HARASSMENT: generative_models.HarmBlockThreshold.BLOCK_MEDIUM_AND_ABOVE,
    }

    responses = model.generate_content(
        [doc_obj, query],
        generation_config=generation_config,
        safety_settings=safety_settings,
        stream=True,
    )

    return responses


In [6]:
file_path = "/home/admin_/google_gen_ai_sample/contents/pdfs/20211102083535653K.pdf"
#query = "표 2. 온라인 패션 플랫폼별 구입 가격대 현황에 따르면 29CM 에서의 30,000~50,000 가격대는 몇퍼센트인가요 ?"
query = "Earnings & Investment Ratios 그래프에서 지배주주순이익률 (%) 의 경우 2019A 와 2020A 를 비교해서 설명해주세요. "

responses = generate(file_path, query)

for response in responses:
    print(response.text, end="")

2019년 지배주주순이익률은 19.8%인데 반해, 2020년에는 11.3%로 8.5%p 하락했습니다. 
