In [None]:
# Copyright 2024 Forus(shins777@gmail.com)
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# Image Recognition


### Install Vertex AI SDK for Python


In [1]:
!pip3 install --upgrade --quiet google-cloud-aiplatform \
                                google-cloud-vision

[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/6.3 MB[0m [31m?[0m eta [36m-:--:--[0m[2K   [91m━━[0m[91m╸[0m[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.4/6.3 MB[0m [31m13.1 MB/s[0m eta [36m0:00:01[0m[2K   [91m━━━━━━━━━━━━━━━━━━━[0m[90m╺[0m[90m━━━━━━━━━━━━━━━━━━━━[0m [32m3.0/6.3 MB[0m [31m45.8 MB/s[0m eta [36m0:00:01[0m[2K   [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[91m╸[0m [32m6.3/6.3 MB[0m [31m67.6 MB/s[0m eta [36m0:00:01[0m[2K   [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[91m╸[0m [32m6.3/6.3 MB[0m [31m67.6 MB/s[0m eta [36m0:00:01[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m6.3/6.3 MB[0m [31m40.8 MB/s[0m eta [36m0:00:00[0m
[?25h[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/486.9 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m486.9/486.9 kB[0m [31m26.1 MB/s[0m eta [36m0:00:00[0m
[?25h

### Authentication to access to the GCP

In [2]:
import sys
from IPython.display import Markdown, display

if "google.colab" in sys.modules:
    from google.colab import auth
    auth.authenticate_user(project_id="ai-hangsik")

!gcloud config set project ai-hangsik

Updated property [core/project].


In [3]:
MODEL_NAME="gemini-1.5-flash"
PROJECT_ID="ai-hangsik"
REGION="asia-northeast3"

### Vertex AI initialization


In [4]:
from vertexai.generative_models import (
    GenerationConfig,
    GenerationResponse,
    GenerativeModel,
    HarmBlockThreshold,
    HarmCategory,
    Part,
    Tool
)

import vertexai
from vertexai.preview.generative_models import grounding

vertexai.init(project=PROJECT_ID, location=REGION)
model = GenerativeModel(MODEL_NAME)

## Detect Web entities and pages by Vision API
* https://cloud.google.com/vision/docs/detecting-web

### Helper functions

In [13]:
def detect_landmarks(path):
    """Detects landmarks in the file."""
    from google.cloud import vision

    client = vision.ImageAnnotatorClient()

    with open(path, "rb") as image_file:
        content = image_file.read()

    image = vision.Image(content=content)

    response = client.landmark_detection(image=image)
    landmarks = response.landmark_annotations
    print(f"Landmarks:{landmarks}")

    for landmark in landmarks:
        print(landmark.description)
        for location in landmark.locations:
            lat_lng = location.lat_lng
            print(f"Latitude {lat_lng.latitude}")
            print(f"Longitude {lat_lng.longitude}")

    if response.error.message:
        raise Exception(
            "{}\nFor more info on error messages, check: "
            "https://cloud.google.com/apis/design/errors".format(response.error.message)
        )


def web_detection(path:str):
  """ Detects web annotations given an image. """

  from google.cloud import vision

  client = vision.ImageAnnotatorClient() # ImageAnnotatorClient 클래스의 인스턴스 생성

  with open(path, "rb") as image_file:
      content = image_file.read()

  image = vision.Image(content=content)

  #[web_detection_requests] https://cloud.google.com/vision/docs/detecting-web#web_detection_requests
  response = client.web_detection(image=image) # 인스턴스를 통해 web_detection() 메서드 호출
  web_detection = response.web_detection

  return web_detection, web_detection.best_guess_labels[0].label

def search(prompt:str)->str:

  tool = Tool.from_google_search_retrieval(grounding.GoogleSearchRetrieval())

  response = model.generate_content(
      prompt,
      tools=[tool],
      generation_config=GenerationConfig(
          temperature=0.0,
      ),
  )

  return response


def print_grounding_response(response: GenerationResponse):

    """Prints Gemini response with grounding citations."""
    grounding_metadata = response.candidates[0].grounding_metadata

    # Citation indices are in byte units
    ENCODING = "utf-8"
    text_bytes = response.text.encode(ENCODING)

    prev_index = 0
    markdown_text = ""

    for grounding_support in grounding_metadata.grounding_supports:
        text_segment = text_bytes[
            prev_index : grounding_support.segment.end_index
        ].decode(ENCODING)

        footnotes_text = ""
        for grounding_chunk_index in grounding_support.grounding_chunk_indices:
            footnotes_text += f"[{grounding_chunk_index + 1}]"

        markdown_text += f"{text_segment} {footnotes_text}\n"
        prev_index = grounding_support.segment.end_index

    if prev_index < len(text_bytes):
        markdown_text += str(text_bytes[prev_index:], encoding=ENCODING)

    markdown_text += "\n----\n## Grounding Sources\n"

    if grounding_metadata.web_search_queries:
        markdown_text += (
            f"\n**Web Search Queries:** {grounding_metadata.web_search_queries}\n"
        )
        if grounding_metadata.search_entry_point:
            markdown_text += f"\n**Search Entry Point:**\n {grounding_metadata.search_entry_point.rendered_content}\n"
    elif grounding_metadata.retrieval_queries:
        markdown_text += (
            f"\n**Retrieval Queries:** {grounding_metadata.retrieval_queries}\n"
        )

    markdown_text += "### Grounding Chunks\n"

    for index, grounding_chunk in enumerate(
        grounding_metadata.grounding_chunks, start=1
    ):
        context = grounding_chunk.web or grounding_chunk.retrieved_context
        if not context:
            print(f"Skipping Grounding Chunk {grounding_chunk}")
            continue

        markdown_text += f"{index}. [{context.title}]({context.uri})\n"
        markdown_text += f"\n\n- {context.uri}\n\n"

    # display(Markdown(markdown_text))
    return Markdown(markdown_text)

### Image Upload


In [11]:
from google.colab import files
from IPython.display import Image
uploaded = files.upload()


Saving gwang.jpg to gwang.jpg


In [15]:
detect_landmarks("./gwang.jpg")

Landmarks:[mid: "/m/0r8p49w"
description: "Gwanghwamun Square"
score: 0.656476438
bounding_poly {
  vertices {
  }
  vertices {
    x: 250
  }
  vertices {
    x: 250
    y: 167
  }
  vertices {
    y: 167
  }
}
locations {
  lat_lng {
    latitude: 37.571635199999996
    longitude: 126.97674349999998
  }
}
, mid: "/m/02pjss1"
description: "Gwanghwamun"
score: 0.645151436
bounding_poly {
  vertices {
  }
  vertices {
    x: 250
  }
  vertices {
    x: 250
    y: 167
  }
  vertices {
    y: 167
  }
}
locations {
  lat_lng {
    latitude: 37.5759607
    longitude: 126.97691239999999
  }
}
]
Gwanghwamun Square
Latitude 37.571635199999996
Longitude 126.97674349999998
Gwanghwamun
Latitude 37.5759607
Longitude 126.97691239999999


In [16]:

detection_results, best_guess_label = web_detection("./gwang.jpg")

top_10 = []
print(f"Best Guess Label : {best_guess_label}")
print( "-"*100)
for entity in detection_results.web_entities:
    top_10.append(f"{entity.description}")
    # top_10.append(f"{entity.description}, Score : {entity.score}")

print(top_10[:5])

print( "-"*100)

detection_results

Best Guess Label : gwanghwamun gate
----------------------------------------------------------------------------------------------------
['Gwanghwamun Gate', 'Gyeongbokgung Palace', 'Gwanghwamun Square', 'Donhwamun Gate', 'Palace']
----------------------------------------------------------------------------------------------------


web_entities {
  entity_id: "/m/02pjss1"
  score: 1.7857337
  description: "Gwanghwamun Gate"
}
web_entities {
  entity_id: "/m/02v3t6"
  score: 1.74925566
  description: "Gyeongbokgung Palace"
}
web_entities {
  entity_id: "/m/0r8p49w"
  score: 1.40334
  description: "Gwanghwamun Square"
}
web_entities {
  entity_id: "/g/122mkvw_"
  score: 1.11983693
  description: "Donhwamun Gate"
}
web_entities {
  entity_id: "/m/05zp8"
  score: 0.734657943
  description: "Palace"
}
web_entities {
  entity_id: "/g/1232pj5h"
  score: 0.5539
  description: "Tourist attractiveness"
}
web_entities {
  entity_id: "/g/120yrv6h"
  score: 0.3734
  description: "Tourism"
}
web_entities {
  entity_id: "/m/014dsx"
  score: 0.3401
  description: "Travel"
}
web_entities {
  entity_id: "/m/07yr8h"
  score: 0.3323
  description: "Historic site"
}
web_entities {
  entity_id: "/m/03c79"
  score: 0.3146
  description: "Gate"
}
full_matching_images {
  url: "https://upload.wikimedia.org/wikipedia/commons/2/2f/Gwanghwa

### Search information from metadata

In [18]:
prompt = f"""
  당신은 주어진 참고정보를 바탕으로 정보를 자세하게 정리해주는 AI 어시스턴트 입니다.
  1. 참고자료에 초점을 두고 정리해주세요.
  2. 정리는 가급적 bullet point로 자세하게 정리해주세요.
  3. 한국어로 답변해주세요.

  참고정보 : {' '.join(top_10[:1])}

"""
response = search(prompt)
print_grounding_response(response)

## 광화문에 대한 정보 정리

* **위치:** 서울의 경복궁 남쪽에 위치하며, 세종로 북쪽 끝의 삼거리에 자리 잡고 있습니다. [1]

* **역사:**
    * 조선 왕조의 첫 번째 왕인 태조가 1395년에 건립했습니다. [2]

    * 조선 시대 서울의 수도로서 오랜 역사를 지닌 랜드마크이자 상징입니다. [1]

    * 여러 차례 파괴와 훼손을 겪었습니다. [1]

    * 흥선대원군이 경복궁을 270년 만에 재건하면서 광화문의 위상을 높이기 위해 건립했습니다. [3]

* **특징:**
    * 경복궁의 정문이자 가장 큰 문입니다.
    * 세 개의 아치형 문으로 구성되어 있습니다. [2]

    * 넓은 플랫폼과 새로운 표지판이 설치되었습니다. [4]

    * 전통적으로 궁궐을 화재로부터 보호하는 신화 속 사자 같은 생물인 해치의 석조 조각상이 문 양쪽에 있습니다. [5]

* **최근:**
    * 2023년 10월에 조선 시대 원래 디자인으로 복원되었습니다. [4]

    * 복원 기념식이 열렸습니다. [4]
 

----
## Grounding Sources

**Web Search Queries:** ['Gwanghwamun Gate']

**Search Entry Point:**
 <style>
.container {
  align-items: center;
  border-radius: 8px;
  display: flex;
  font-family: Google Sans, Roboto, sans-serif;
  font-size: 14px;
  line-height: 20px;
  padding: 8px 12px;
}
.chip {
  display: inline-block;
  border: solid 1px;
  border-radius: 16px;
  min-width: 14px;
  padding: 5px 16px;
  text-align: center;
  user-select: none;
  margin: 0 8px;
  -webkit-tap-highlight-color: transparent;
}
.carousel {
  overflow: auto;
  scrollbar-width: none;
  white-space: nowrap;
  margin-right: -12px;
}
.headline {
  display: flex;
  margin-right: 4px;
}
.gradient-container {
  position: relative;
}
.gradient {
  position: absolute;
  transform: translate(3px, -9px);
  height: 36px;
  width: 9px;
}
@media (prefers-color-scheme: light) {
  .container {
    background-color: #fafafa;
    box-shadow: 0 0 0 1px #0000000f;
  }
  .headline-label {
    color: #1f1f1f;
  }
  .chip {
    background-color: #ffffff;
    border-color: #d2d2d2;
    color: #5e5e5e;
    text-decoration: none;
  }
  .chip:hover {
    background-color: #f2f2f2;
  }
  .chip:focus {
    background-color: #f2f2f2;
  }
  .chip:active {
    background-color: #d8d8d8;
    border-color: #b6b6b6;
  }
  .logo-dark {
    display: none;
  }
  .gradient {
    background: linear-gradient(90deg, #fafafa 15%, #fafafa00 100%);
  }
}
@media (prefers-color-scheme: dark) {
  .container {
    background-color: #1f1f1f;
    box-shadow: 0 0 0 1px #ffffff26;
  }
  .headline-label {
    color: #fff;
  }
  .chip {
    background-color: #2c2c2c;
    border-color: #3c4043;
    color: #fff;
    text-decoration: none;
  }
  .chip:hover {
    background-color: #353536;
  }
  .chip:focus {
    background-color: #353536;
  }
  .chip:active {
    background-color: #464849;
    border-color: #53575b;
  }
  .logo-light {
    display: none;
  }
  .gradient {
    background: linear-gradient(90deg, #1f1f1f 15%, #1f1f1f00 100%);
  }
}
</style>
<div class="container">
  <div class="headline">
    <svg class="logo-light" width="18" height="18" viewBox="9 9 35 35" fill="none" xmlns="http://www.w3.org/2000/svg">
      <path fill-rule="evenodd" clip-rule="evenodd" d="M42.8622 27.0064C42.8622 25.7839 42.7525 24.6084 42.5487 23.4799H26.3109V30.1568H35.5897C35.1821 32.3041 33.9596 34.1222 32.1258 35.3448V39.6864H37.7213C40.9814 36.677 42.8622 32.2571 42.8622 27.0064V27.0064Z" fill="#4285F4"/>
      <path fill-rule="evenodd" clip-rule="evenodd" d="M26.3109 43.8555C30.9659 43.8555 34.8687 42.3195 37.7213 39.6863L32.1258 35.3447C30.5898 36.3792 28.6306 37.0061 26.3109 37.0061C21.8282 37.0061 18.0195 33.9811 16.6559 29.906H10.9194V34.3573C13.7563 39.9841 19.5712 43.8555 26.3109 43.8555V43.8555Z" fill="#34A853"/>
      <path fill-rule="evenodd" clip-rule="evenodd" d="M16.6559 29.8904C16.3111 28.8559 16.1074 27.7588 16.1074 26.6146C16.1074 25.4704 16.3111 24.3733 16.6559 23.3388V18.8875H10.9194C9.74388 21.2072 9.06992 23.8247 9.06992 26.6146C9.06992 29.4045 9.74388 32.022 10.9194 34.3417L15.3864 30.8621L16.6559 29.8904V29.8904Z" fill="#FBBC05"/>
      <path fill-rule="evenodd" clip-rule="evenodd" d="M26.3109 16.2386C28.85 16.2386 31.107 17.1164 32.9095 18.8091L37.8466 13.8719C34.853 11.082 30.9659 9.3736 26.3109 9.3736C19.5712 9.3736 13.7563 13.245 10.9194 18.8875L16.6559 23.3388C18.0195 19.2636 21.8282 16.2386 26.3109 16.2386V16.2386Z" fill="#EA4335"/>
    </svg>
    <svg class="logo-dark" width="18" height="18" viewBox="0 0 48 48" xmlns="http://www.w3.org/2000/svg">
      <circle cx="24" cy="23" fill="#FFF" r="22"/>
      <path d="M33.76 34.26c2.75-2.56 4.49-6.37 4.49-11.26 0-.89-.08-1.84-.29-3H24.01v5.99h8.03c-.4 2.02-1.5 3.56-3.07 4.56v.75l3.91 2.97h.88z" fill="#4285F4"/>
      <path d="M15.58 25.77A8.845 8.845 0 0 0 24 31.86c1.92 0 3.62-.46 4.97-1.31l4.79 3.71C31.14 36.7 27.65 38 24 38c-5.93 0-11.01-3.4-13.45-8.36l.17-1.01 4.06-2.85h.8z" fill="#34A853"/>
      <path d="M15.59 20.21a8.864 8.864 0 0 0 0 5.58l-5.03 3.86c-.98-2-1.53-4.25-1.53-6.64 0-2.39.55-4.64 1.53-6.64l1-.22 3.81 2.98.22 1.08z" fill="#FBBC05"/>
      <path d="M24 14.14c2.11 0 4.02.75 5.52 1.98l4.36-4.36C31.22 9.43 27.81 8 24 8c-5.93 0-11.01 3.4-13.45 8.36l5.03 3.85A8.86 8.86 0 0 1 24 14.14z" fill="#EA4335"/>
    </svg>
    <div class="gradient-container"><div class="gradient"></div></div>
  </div>
  <div class="carousel">
    <a class="chip" href="https://vertexaisearch.cloud.google.com/grounding-api-redirect/AZnLMfyOWjGunmGsMaP6p9pQttS30KYfZjocwvtuZD7gBgni1Vei786t4oFkhh_zYjRKq3IN9G2pND8NjymN9FXajHRjXuoFeo_JVuUL9jOMDoseGaCAbyaSKmVuQH5HoVAWyTB7hpjvREDTIo5S4y-E85shMp74LrfpHsBtjdsqjnpZT0jvb4eBQRqC7-3k-e_ujR97CAI=">Gwanghwamun Gate</a>
  </div>
</div>

### Grounding Chunks
1. [wikipedia.org](https://vertexaisearch.cloud.google.com/grounding-api-redirect/AZnLMfzSqBwYiPiNW_OL3hb79AEugiwbVf1ca-nAt5HWynGwfsteuJr02b3-me8nYpwV-k5g9mT6o8aTgKuYDgiperHPCenDfhD03NtXSqsaYsb0STMapX3xnKTyr4-biFHc_hFhijEk)


- https://vertexaisearch.cloud.google.com/grounding-api-redirect/AZnLMfzSqBwYiPiNW_OL3hb79AEugiwbVf1ca-nAt5HWynGwfsteuJr02b3-me8nYpwV-k5g9mT6o8aTgKuYDgiperHPCenDfhD03NtXSqsaYsb0STMapX3xnKTyr4-biFHc_hFhijEk

2. [visitkorea.or.kr](https://vertexaisearch.cloud.google.com/grounding-api-redirect/AZnLMfxHCVhjmDXsodBbxbNnpGQbp0-iihTfIxsYtkKh4-YjWpvbTVMOLwDTBBrsNd4EXhz_02Bod5UmEBSa5Lavj-b87Ust0zHQdmtY2oj7L8HOBvdSWGQnj4Ags_KZpKVMaE5YewAkwde-FKjzUg-ZOHOmZlnHcl-uvM2WXumcQ9wSsb0qb1OwMJFKZDvqvOnuEO458uMnAmI=)


- https://vertexaisearch.cloud.google.com/grounding-api-redirect/AZnLMfxHCVhjmDXsodBbxbNnpGQbp0-iihTfIxsYtkKh4-YjWpvbTVMOLwDTBBrsNd4EXhz_02Bod5UmEBSa5Lavj-b87Ust0zHQdmtY2oj7L8HOBvdSWGQnj4Ags_KZpKVMaE5YewAkwde-FKjzUg-ZOHOmZlnHcl-uvM2WXumcQ9wSsb0qb1OwMJFKZDvqvOnuEO458uMnAmI=

3. [fab.com](https://vertexaisearch.cloud.google.com/grounding-api-redirect/AZnLMfxvEcowhfzuaqMjTdvHRv_mtCa7B6DDDGROp7-exQBbiWkfHDlenHYvXDE1DqiFkF2WIdqSj-gwQ3RZhhR0iLIxBzFb7PDuirEgo3IsvBZ4NNr3RTiE3-g0F6to0CsN82dIFnzwOWrxt8JZ8uXitVazbR9WQ697cxlkzO8u)


- https://vertexaisearch.cloud.google.com/grounding-api-redirect/AZnLMfxvEcowhfzuaqMjTdvHRv_mtCa7B6DDDGROp7-exQBbiWkfHDlenHYvXDE1DqiFkF2WIdqSj-gwQ3RZhhR0iLIxBzFb7PDuirEgo3IsvBZ4NNr3RTiE3-g0F6to0CsN82dIFnzwOWrxt8JZ8uXitVazbR9WQ697cxlkzO8u

4. [joins.com](https://vertexaisearch.cloud.google.com/grounding-api-redirect/AZnLMfyB37zQO0YvLc8-f8hvPFMksIyCZiiPpEreTTJb0VBTqBiUKbC2i-r1AAmBVZmF6gRG-t7OqyzDVbSc5CVJ-Hl-hXntkncWUwofeFkxcWb2d2LBbV9HU7w5HAgvPMaWaCySk_if0S8SVgBbCM0UPB6hjL_0Itc094o92Mlp6EPg-7CQtThe2QP4PY4sm8LY0gBE_q7KFyT2cpGadMNfJlaPFaZYGgu4HJSNb_uTlxeCvPFTlBD_au6Ig2WKQTtrFqEzAfqw1CM=)


- https://vertexaisearch.cloud.google.com/grounding-api-redirect/AZnLMfyB37zQO0YvLc8-f8hvPFMksIyCZiiPpEreTTJb0VBTqBiUKbC2i-r1AAmBVZmF6gRG-t7OqyzDVbSc5CVJ-Hl-hXntkncWUwofeFkxcWb2d2LBbV9HU7w5HAgvPMaWaCySk_if0S8SVgBbCM0UPB6hjL_0Itc094o92Mlp6EPg-7CQtThe2QP4PY4sm8LY0gBE_q7KFyT2cpGadMNfJlaPFaZYGgu4HJSNb_uTlxeCvPFTlBD_au6Ig2WKQTtrFqEzAfqw1CM=

5. [lonelyplanet.com](https://vertexaisearch.cloud.google.com/grounding-api-redirect/AZnLMfyAGrvandYQoODti-QyioXZ32dFQcCaqnHy_Wo7qK3atp47KonzaWHb0PJH6mPa92J3eq1lTVSowTzUM9Y-7HA98ZGSmVVGyGC-PtNm0G4wHsewiJyLqqgsvdNHx7dzXW2r5EJ42vFpLeWk3OAbjJx1xUlQyulkZ0tftiY4iUaoh90VN3jBt43uDJwxVvoO8RtquK2qvxD2JvIlvzFq9d-lOntQzqm-Eg==)


- https://vertexaisearch.cloud.google.com/grounding-api-redirect/AZnLMfyAGrvandYQoODti-QyioXZ32dFQcCaqnHy_Wo7qK3atp47KonzaWHb0PJH6mPa92J3eq1lTVSowTzUM9Y-7HA98ZGSmVVGyGC-PtNm0G4wHsewiJyLqqgsvdNHx7dzXW2r5EJ42vFpLeWk3OAbjJx1xUlQyulkZ0tftiY4iUaoh90VN3jBt43uDJwxVvoO8RtquK2qvxD2JvIlvzFq9d-lOntQzqm-Eg==

