Copyright 2024 - Forusone : shins777@gmail.com

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

   https://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.

# Search metadata - Enterprise Knowledge Graph

## Configuration
### Install python packages


In [1]:
%pip install --upgrade --quiet google-cloud-aiplatform \
                               google-cloud-enterpriseknowledgegraph

[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m6.3/6.3 MB[0m [31m63.7 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m61.1/61.1 kB[0m [31m3.9 MB/s[0m eta [36m0:00:00[0m
[?25h

## Authentication to access to the GCP
* Use OAuth to access the GCP environment.
 * Refer to the authentication methods in GCP : https://cloud.google.com/docs/authentication?hl=ko

In [2]:
import sys
from IPython.display import Markdown, display

if "google.colab" in sys.modules:
    from google.colab import auth
    auth.authenticate_user(project_id="ai-hangsik")

!gcloud config set project ai-hangsik

Updated property [core/project].


## Set the environment on GCP Project


In [3]:
PROJECT_ID = "ai-hangsik"
LOCATION = "us-central1"
STAGING_BUCKET = "gs://reasoning_7424"
MODEL = "gemini-1.5-flash-002"
#MODEL = "gemini-pro-experimental"

import base64
import vertexai
from vertexai.generative_models import GenerativeModel

vertexai.init(project=PROJECT_ID, location=LOCATION, staging_bucket=STAGING_BUCKET)
model = GenerativeModel(MODEL)

## Metadata Search

### Helper functions to search metadata

In [4]:
from __future__ import annotations
from collections.abc import Sequence
from google.cloud import enterpriseknowledgegraph as ekg

def search_sample(
    search_query: str,
    languages: Sequence[str] = None,
    types: Sequence[str] = None,
    limit: int = 5,
):

  client = ekg.EnterpriseKnowledgeGraphServiceClient()
  parent = client.common_location_path(project=PROJECT_ID, location=LOCATION)

  request = ekg.SearchRequest(
      parent=parent,
      query=search_query,
      languages=languages,
      types=types,
      limit=limit,
  )

  response = client.search(request=request)

  print(f"Search Query: {search_query}\n")

  # Extract and print date from response
  for item in response.item_list_element:
      result = item.get("result")

      print(f"Name: {result.get('name')}")
      print(f"- Description: {result.get('description')}")
      print(f"- Types: {result.get('@type')}\n")

      detailed_description = result.get("detailedDescription")

      if detailed_description:
          print("- Detailed Description:")
          print(f"\t- Article Body: {detailed_description.get('articleBody')}")
          print(f"\t- URL: {detailed_description.get('url')}")
          print(f"\t- License: {detailed_description.get('license')}\n")

      print(f"- Cloud MID: {result.get('@id')}")
      for identifier in result.get("identifier"):
          print(f"\t- {identifier.get('name')}: {identifier.get('value')}")

      print("\n")

#----------------------------------------------------------------------------------------------------

def lookup_with_mid(
    ids: Sequence[str],
    languages: Sequence[str] = None,
):

  client = ekg.EnterpriseKnowledgeGraphServiceClient()
  parent = client.common_location_path(project=PROJECT_ID, location=LOCATION)

  request = ekg.LookupRequest(
      parent=parent,
      ids=ids,
      languages=languages,
  )

  # Make the request
  response = client.lookup(request=request)

  print(f"Lookup IDs: {ids}\n")

  print(response)

  # Extract and print date from response
  for item in response.item_list_element:
      result = item.get("result")

      print(f"Name: {result.get('name')}")
      print(f"- Description: {result.get('description')}")
      print(f"- Types: {result.get('@type')}\n")

      detailed_description = result.get("detailedDescription")

      if detailed_description:
          print("- Detailed Description:")
          print(f"\t- Article Body: {detailed_description.get('articleBody')}")
          print(f"\t- URL: {detailed_description.get('url')}")
          print(f"\t- License: {detailed_description.get('license')}\n")

      print(f"- Cloud MID: {result.get('@id')}")
      for identifier in result.get("identifier"):
          print(f"\t- {identifier.get('name')}: {identifier.get('value')}")

      print("\n")

In [5]:
search_query = "광해"
types = ["Movie"]
search_sample(search_query, types = types)

Search Query: 광해

Name: Masquerade
- Description: 2012 film
- Types: ['Thing', 'Movie']

- Detailed Description:
	- Article Body: Masquerade is a 2012 South Korean period drama film starring Lee Byung-hun in dual role as the bizarre King Gwanghae and the humble acrobat Ha-sun, who stands in for the monarch when he faces the threat of being poisoned.

	- URL: https://en.wikipedia.org/wiki/Masquerade_(2012_film)
	- License: https://en.wikipedia.org/wiki/Wikipedia:Text_of_Creative_Commons_Attribution-ShareAlike_3.0_Unported_License

- Cloud MID: c-0290gwhzt
	- googleKgMID: /m/0hhtb9l
	- wikidataQID: Q491957


Name: Warriors of the Dawn
- Description: 2017 film
- Types: ['Thing', 'Movie']

- Detailed Description:
	- Article Body: Warriors of the Dawn is a 2017 South Korean historical drama film by Jeong Yoon-cheol set during the 1592 Imjin War, starring Lee Jung-jae and Yeo Jin-goo. 
	- URL: https://en.wikipedia.org/wiki/Warriors_of_the_Dawn
	- License: https://en.wikipedia.org/wiki/Wikipe

In [6]:
ids = ["c-0290gwhzt"]
lookup_with_mid(ids=ids)

Lookup IDs: ['c-0290gwhzt']

context {
  struct_value {
    fields {
      key: "@vocab"
      value {
        string_value: "http://schema.org/"
      }
    }
  }
}
type_ {
  string_value: "ItemList"
}
item_list_element {
  values {
    struct_value {
      fields {
        key: "result"
        value {
          struct_value {
            fields {
              key: "url"
              value {
                string_value: "http://becameking.jp/"
              }
            }
            fields {
              key: "name"
              value {
                string_value: "Masquerade"
              }
            }
            fields {
              key: "identifier"
              value {
                list_value {
                  values {
                    struct_value {
                      fields {
                        key: "value"
                        value {
                          string_value: "/m/0hhtb9l"
                        }
                      }
      

## Metadata build

### Helper functions to build metadata

In [7]:
def get_wiki_contents(url):
  import requests
  response = requests.get(url)

  if response.status_code == 200:
      return response.text
  else:
      print(f"Request failed with status code: {response.status_code}")

#--------------------------------------------

def generate(prompt:str,
             response_schema=None)->str:
  """
  Function to get Movie information
  """

  from vertexai.generative_models import (
        Part,
        SafetySetting,
        GenerationConfig
  )

  # Configuration to contorl LLM generation.
  generation_config = GenerationConfig(
      max_output_tokens = 8192,
      temperature=1,
      top_p =0.95,
      response_mime_type="application/json",
      response_schema=response_schema
  )

  responses = model.generate_content(
      [prompt],
      generation_config=generation_config,
      stream=False,
  )

  return responses.text

In [8]:
url = "https://en.wikipedia.org/wiki/Masquerade_(2012_film)"
context = get_wiki_contents(url)
context

'<!DOCTYPE html>\n<html class="client-nojs vector-feature-language-in-header-enabled vector-feature-language-in-main-page-header-disabled vector-feature-sticky-header-disabled vector-feature-page-tools-pinned-disabled vector-feature-toc-pinned-clientpref-1 vector-feature-main-menu-pinned-disabled vector-feature-limited-width-clientpref-1 vector-feature-limited-width-content-enabled vector-feature-custom-font-size-clientpref-1 vector-feature-appearance-pinned-clientpref-1 vector-feature-night-mode-enabled skin-theme-clientpref-day vector-toc-available" lang="en" dir="ltr">\n<head>\n<meta charset="UTF-8">\n<title>Masquerade (2012 film) - Wikipedia</title>\n<script>(function(){var className="client-js vector-feature-language-in-header-enabled vector-feature-language-in-main-page-header-disabled vector-feature-sticky-header-disabled vector-feature-page-tools-pinned-disabled vector-feature-toc-pinned-clientpref-1 vector-feature-main-menu-pinned-disabled vector-feature-limited-width-clientpr

In [9]:
prompt = f"""
  당신은 주어진 영화 정보를 통해서 메타데이터 정보를 생성하는 AI Assiatant 입니다.
  아래 정보를 한국어로 추출해주세요.

  1. 영화 감독
  2. 영화배우 전체 리스트
  3. 줄거리
  4. 개봉일자
  5. 관객수
  6. 영화 포스터 링크

  Context : {context}

"""

generate(prompt)


'{"영화감독": "추창민", "영화배우": ["이병헌", "류승룡", "한효주", "장광", "김인권", "심은경", "박지아", "신정근", "김명곤", "전국향", "양준모", "문창길", "전배수", "도용구", "유순웅", "이양희", "박경근", "신운섭", "김종구", "이엘", "이준혁", "서진원", "김혜원", "김학준", "김혜화", "김승훈", "이봉련", "권방현", "이란희", "이수용", "김길동", "권은수", "서은정", "주영호", "조성희", "민정기", "김비비"], "줄거리": "조선 15대 왕 광해군은 암살 위협에 시달리며 궁궐 밖으로 나가지 못한다. 이에, 광해군의 명을 받은 허균은 외모가 광해군과 똑같은 천민 광대 하선을 찾아내어, 그를 대신하여 왕의 역할을 하도록 한다. 하선은 처음에는 가짜 왕 노릇에 익숙하지 못하지만, 점차 백성들의 고통에 공감하며 정치에 대한 통찰력을 키워간다. 그는 광해군보다 더욱 인간적이고 백성을 위한 정치를 펼치면서, 궁궐 안의 분위기를 바꾸고 백성들의 마음을 얻는다. 하지만, 그의 행동에 의심을 품은 박충서 등의 반대 세력과 진짜 왕 광해군과의 관계 속에서 갈등을 겪게 된다. 결국, 하선은 궁궐을 떠나게 되고, 광해군이 다시 왕위에 복귀하여 반란을 다스린다.", "개봉일자": "2012년 9월 13일", "관객수": "1230만 명", "영화포스터링크": "https://upload.wikimedia.org/wikipedia/en/8/85/Gwanghae.jpg"}'