In [None]:
# Copyright 2024 Forusone(shins777@gmail.com)
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# Context Caching
* [Gemini Context Caching](https://cloud.google.com/vertex-ai/generative-ai/docs/context-cache/context-cache-overview)
  * Use context caching to reduce the cost of requests that contain repeat content with high input token counts. Cached context items, such as a large amount of text, an audio file, or a video file, can be used in prompt requests to the Gemini API to generate output.
  * For example, each prompt request that composes a chat conversation might include the same context cache that references a video along with unique text that comprises each turn in the chat. The minimum size of a context cache is 32,768 tokens.

* [Supported MIME types](https://cloud.google.com/vertex-ai/generative-ai/docs/context-cache/context-cache-overview#supported-mime-types)


## Set configuration

### Install packages

In [1]:
%pip install --upgrade --user --quiet google-cloud-aiplatform

### Authentication to access to GCP
* Only for Colab in Google Drive
* No need to do this process if in Colab Enteprise on Vertex AI.

In [2]:
# To use markdown for output data from LLM
from IPython.display import display, Markdown

# Use OAuth to access the GCP environment.
import sys
if "google.colab" in sys.modules:
    from google.colab import auth
    auth.authenticate_user()

## Lab Execution

### Define constants

In [3]:

PROJECT_ID = "ai-hangsik"  # @param {type:"string"}
LOCATION = "us-central1"  # @param {type:"string"}
MODEL_NAME = "gemini-1.5-flash-002" # @param {type:"string"}

### Import libraries

In [5]:
import vertexai

from vertexai.generative_models import (
    GenerationConfig,
    # GenerativeModel, replace by prview version.
    HarmBlockThreshold,
    HarmCategory,
    GenerationResponse,
    Tool,
    Part,
    ChatSession
)

from vertexai.preview.generative_models import (
    grounding,
    GenerativeModel
)
from vertexai.preview import (
    caching
)

### Initialize Vertex AI

In [6]:

# https://cloud.google.com/python/docs/reference/aiplatform/latest#initialization
vertexai.init(project=PROJECT_ID, location=LOCATION)

# https://cloud.google.com/vertex-ai/generative-ai/docs/reference/python/latest/vertexai.generative_models.GenerativeModel
model = GenerativeModel(MODEL_NAME)

### Helper functions

### Considerations for preview version
* [Context cache limit](https://cloud.google.com/vertex-ai/generative-ai/docs/context-cache/context-cache-create#context_cache_limits)

In [12]:
#--------------------------------------------------------------------------------------------
def create_cache_pdf(pdfs:list, display_name:str):
  """
  Create pdf cache contents

  Args:
    pdfs: list
      pdf file list
    display_name: str
      display name of the cache content.

  Returns:
    cached_content: CachedContent
      cached content object

  """

  import datetime

  system_instruction = """
    당신은 체계적인 문헌 조사와 다양한 주제에 대한 메타 분석을 수행한 수년간의 경험을 가진 전문 연구자입니다.
    당신은 항상 제공된 출처의 사실을 고수하고 새로운 사실을 결코 만들어내지 않습니다.

  """

  contents = [Part.from_uri(pdf, mime_type="application/pdf") for pdf in pdfs ]

  cached_content = caching.CachedContent.create(
      model_name=MODEL_NAME,
      display_name=display_name,
      system_instruction=system_instruction,
      contents=contents,
      ttl=datetime.timedelta(minutes=60),
  )

  return cached_content

#--------------------------------------------------------------------------------------------
def generate_with_cache(query:str,
                        cache_content_name:str)-> GenerationResponse:

  """
  Generate a response from the model.

  Args:
    query: str
      query to send to the model.
    cache_content_name: str
      cached content name.

  Returns:
    resonse: GenerationResponse
      generated response.

  """

  cached_content = caching.CachedContent(cached_content_name=cache_content_name)
  model = GenerativeModel.from_cached_content(cached_content=cached_content)
  response = model.generate_content(query)

  return response

#--------------------------------------------------------------------------------------------
def multi_turn_with_cache(cha:ChatSession,
                          query:str, )->GenerationResponse:
  """
  Generate a response from the chat model.

  Args:
    chat:ChatSession
      chat session object.
    query: str
      query to send to the model.

  Returns:
    resonse: GenerationResponse
      generated response.

  """
  response = chat.send_message(query)
  return response


### Create context cache

In [13]:

pdfs = ["gs://cloud-samples-data/generative-ai/pdf/2312.11805v3.pdf",
        "gs://cloud-samples-data/generative-ai/pdf/2403.05530.pdf"
        ]
display_name = "Gemini-pdf-cache"

cached_content = create_cache_pdf(pdfs, display_name)

print(cached_content.name)
print(cached_content.display_name)


7771568087446323200
Gemini-pdf-cache


### Use context caching in single turn operation

In [14]:

query = "이 연구 논문들은 무슨 내용을 이야기하고 있나요?"
response = generate_with_cache(query, cached_content.name )

print(response.text)


이 연구 논문은 구글 딥마인드의 새로운 다중 모드 모델인 제미니(Gemini) 1.5 Pro에 관한 내용을 다룹니다. 제미니 1.5 Pro는 이미지, 오디오, 비디오 및 텍스트를 이해하는 데 탁월한 능력을 보이는 모델입니다. 이 논문은 제미니 1.5 Pro 모델의 아키텍처, 훈련 인프라, 데이터 세트, 성능 평가 결과 및 책임 있는 배포에 대한 접근 방식을 자세히 설명합니다. 또한, 제미니 1.5 Pro 모델의 특징인 긴 문맥을 처리하는 능력과 다양한 다중 모드 작업에서의 성능을 조명하고, 긴 문맥의 이해 능력에 대한 제한 사항과 이에 대한 후속 연구의 필요성을 지적합니다. 특히, 새로운 언어를 학습하여 번역하는 기능과 같은 놀라운 능력을 보이는 사례를 제시합니다.


### Use context caching in multi-turn chat

In [15]:
chat = model.start_chat()

while True:
  query = input('사용자: ')

  if query == '종료': break

  prompt = f"""
  당신은 Generatie AI 기술을 설명해주는 AI Assistant 입니다.
  아래 질문에 대해서 친절하게 10줄 이내로 답해주세요.

  질문 : {query}
  """

  response = multi_turn_with_cache(chat, prompt)
  display(Markdown(f"AI Agent : {response.text}"))
  print(f"------------------------------------ ")


사용자: 안녕하세요. 


AI Agent : 안녕하세요! 무엇을 도와드릴까요?  Generative AI 기술에 대해 궁금하신 점이 있으시면 언제든지 질문해주세요.  제가 아는 한도 내에서 최선을 다해 설명해 드리겠습니다.  어떤 주제에 대해 알고 싶으신가요? (예: 이미지 생성, 텍스트 생성, 코드 생성 등)


------------------------------------ 
사용자: 제미나이가 이미지 생성을 어떻게 하나요 ?


AI Agent : 구글의 Gemini는 방대한 이미지와 텍스트 데이터셋을 학습하여 이미지를 생성합니다.  텍스트 설명(프롬프트)을 입력하면, 학습된 정보를 바탕으로 이미지의 구성요소, 스타일, 색상 등을 예측하고 생성합니다.  이는 복잡한 수학적 모델(일반적으로는 확산 모델의 변형)을 사용하여 이루어지며,  픽셀 단위로 이미지를 점진적으로 생성해 나갑니다.  결과적으로 사용자의 프롬프트에 부합하는 이미지가 만들어지는 것입니다.  단순히 텍스트를 이미지로 변환하는 것이 아니라,  학습된 지식을 토대로 새로운 이미지를 생성하는 능력을 갖춘 것이죠.  즉,  새로운 창작물을 만들어내는 것입니다.


------------------------------------ 
사용자: 종료


## Cache context management

### Management helper functions

In [18]:
#--------------------------------------------------------------------------------------------
def print_cache_content_list():

  """
  Print cache content list in the current project and region that are specified when initializing vertex ai.

  Args: None
  Returns: None
  """

  cache_list = caching.CachedContent.list()
  for cached_content in cache_list:

    print_cache_content(cached_content.name)

#--------------------------------------------------------------------------------------------
def print_cache_content(cache_content_name:str):

  """
  Print a cache content corresponding to the given cache content name.

  Args:
    cache_content_name: str
      cache content name.

  Returns: None
  """

  cached_content = caching.CachedContent(cached_content_name=cache_content_name)

  print("-"*30)
  print(f"[cache_content]name : {cached_content.display_name}")
  print(f"[cache_content]name : {cached_content.name}")
  print(f"[cache_content]resource_name : {cached_content.resource_name}")
  print(f"[cache_content]model_name : {cached_content.model_name}")
  print(f"[cache_content]create_time : {cached_content.create_time}")
  print(f"[cache_content]update_time : {cached_content.update_time}")
  print(f"[cache_content]expire_time : {cached_content.expire_time}")

#--------------------------------------------------------------------------------------------
def update_cache_ttl(cache_content_name:str):

  """
  Update TTL of the cache content corresponding to the given cache content name.

  Args:
    cache_content_name: str
      cache content name.

  Returns: None
  """

  import datetime

  cached_content = caching.CachedContent(cached_content_name=cache_content_name)
  cached_content.update(ttl=datetime.timedelta(minutes=30))

  cached_content.refresh()
  print(f"Updated TTL: {cached_content.expire_time}")

  print_cache_content(cached_content.name)

#--------------------------------------------------------------------------------------------
def delete_cache(cache_content_name:str):
  """
  Delete a cache content corresponding to the given cache content name.

  Args:
    cache_content_name: str
      cache content name.

  Returns: None
  """

  cached_content = caching.CachedContent(cached_content_name=cache_content_name)
  cached_content.delete()
  print(f"Deleted {cached_content.name}")


### Get Cache contents

In [22]:
print_cache_content_list()

------------------------------
[cache_content]name : Gemini-pdf-cache
[cache_content]name : 7771568087446323200
[cache_content]resource_name : projects/ai-hangsik/locations/us-central1/cachedContents/7771568087446323200
[cache_content]model_name : projects/ai-hangsik/locations/us-central1/publishers/google/models/gemini-1.5-flash-002
[cache_content]create_time : 2024-12-25 23:22:17.655480+00:00
[cache_content]update_time : 2024-12-25 23:22:17.655480+00:00
[cache_content]expire_time : 2024-12-26 00:22:17.645143+00:00
------------------------------
[cache_content]name : Gemini-pdf-cache
[cache_content]name : 6535329989733122048
[cache_content]resource_name : projects/ai-hangsik/locations/us-central1/cachedContents/6535329989733122048
[cache_content]model_name : projects/ai-hangsik/locations/us-central1/publishers/google/models/gemini-1.5-flash-002
[cache_content]create_time : 2024-12-25 23:21:26.172392+00:00
[cache_content]update_time : 2024-12-25 23:21:26.172392+00:00
[cache_content]exp

### Update cache TTL

In [20]:
cached_content_name = "151477517935443968"

update_cache_ttl(cached_content_name)


Updated TTL: 2024-12-25 23:56:06.179969+00:00
------------------------------
[cache_content]name : Gemini-pdf-cache
[cache_content]name : 151477517935443968
[cache_content]resource_name : projects/ai-hangsik/locations/us-central1/cachedContents/151477517935443968
[cache_content]model_name : projects/ai-hangsik/locations/us-central1/publishers/google/models/gemini-1.5-flash-002
[cache_content]create_time : 2024-12-25 23:19:18.625584+00:00
[cache_content]update_time : 2024-12-25 23:26:06.181863+00:00
[cache_content]expire_time : 2024-12-25 23:56:06.179969+00:00


### Delete cache

In [21]:
cached_content_name = "151477517935443968"

delete_cache(cached_content_name)

INFO:google.cloud.aiplatform.base:Deleting CachedContent : projects/ai-hangsik/locations/us-central1/cachedContents/151477517935443968


Deleted 151477517935443968
