<a href="https://colab.research.google.com/github/karasu1982/DataCatalog/blob/main/DataCatalog.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# 環境設定

In [None]:
!pip install --upgrade google-cloud-datacatalog

In [None]:
import os
os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = '<作成したJSONファイルの場所>'

In [None]:
from google.cloud import datacatalog_v1

datacatalog_client = datacatalog_v1.DataCatalogClient()

PROJECT = "<プロジェクト名>"
DATASET = "<データセット名>"
location = "us-central1" # ロケーション

tag_template_id = "tag_test" # 利用するタグ

# 処理関数

## タグを新規作成

In [None]:
tag_template = datacatalog_v1.types.TagTemplate()

# テンプレートの詳細
tag_template.display_name = "Demo Tag Template"

# フィールド
tag_template.fields["source"] = datacatalog_v1.types.TagTemplateField()
tag_template.fields["source"].display_name = "Source of data asset" #表示名
tag_template.fields["source"].type_.primitive_type = datacatalog_v1.types.FieldType.PrimitiveType.STRING #データ型

tag_template.fields["num_rows"] = datacatalog_v1.types.TagTemplateField()
tag_template.fields["num_rows"].display_name = "Number of rows in data asset"
tag_template.fields["num_rows"].type_.primitive_type = datacatalog_v1.types.FieldType.PrimitiveType.DOUBLE

tag_template.fields["has_pii"] = datacatalog_v1.types.TagTemplateField()
tag_template.fields["has_pii"].display_name = "Has PII"
tag_template.fields["has_pii"].type_.primitive_type = datacatalog_v1.types.FieldType.PrimitiveType.BOOL

tag_template.fields["pii_type"] = datacatalog_v1.types.TagTemplateField()
tag_template.fields["pii_type"].display_name = "PII type"

# 種類がENUMの場合、選択する値を記述
for display_name in ["EMAIL", "SOCIAL SECURITY NUMBER", "NONE"]:
    enum_value = datacatalog_v1.types.FieldType.EnumType.EnumValue(
        display_name=display_name
    )
    tag_template.fields["pii_type"].type_.enum_type.allowed_values.append(
        enum_value
    )

expected_template_name = datacatalog_v1.DataCatalogClient.tag_template_path(
    PROJECT, location, tag_template_id
)

In [None]:
try:
    tag_template = datacatalog_client.create_tag_template(
        parent=f"projects/{PROJECT}/locations/{location}",
        tag_template_id=tag_template_id,
        tag_template=tag_template,
    )
    print(f"Created template: {tag_template.name}")
    
except OSError as e:
    print(f"Cannot create template: {expected_template_name}")
    print(f"{e}")

## 作成したタグに情報を付与

In [None]:
# タグを付与するテーブルを指定

resource_name = (
    f"//bigquery.googleapis.com/projects/{PROJECT}"
    f"/datasets/{DATASET}/tables/{TABLE}"
)
table_entry = datacatalog_client.lookup_entry(
    request={"linked_resource": resource_name}
)

In [None]:
# タグに入力する情報
tag = datacatalog_v1.types.Tag()

tag.template = tag_template.name
tag.name = "my_super_cool_tag"

tag.fields["source"] = datacatalog_v1.types.TagField()
tag.fields["source"].string_value = "Copied from tlc_yellow_trips_2018"

tag.fields["num_rows"] = datacatalog_v1.types.TagField()
tag.fields["num_rows"].double_value = 113496874

tag.fields["has_pii"] = datacatalog_v1.types.TagField()
tag.fields["has_pii"].bool_value = False

tag.fields["pii_type"] = datacatalog_v1.types.TagField()
tag.fields["pii_type"].enum_value.display_name = "NONE"

# タグの情報を反映
tag = datacatalog_client.create_tag(parent=table_entry.name, tag=tag)

## 作成したTagを他のテーブルに付与

In [None]:
TABLE = "<付与するテーブル名>"

In [None]:
# タグを付与するテーブルを指定

resource_name = (
    f"//bigquery.googleapis.com/projects/{PROJECT}"
    f"/datasets/{DATASET}/tables/{TABLE}"
)
table_entry = datacatalog_client.lookup_entry(
    request={"linked_resource": resource_name}
)

In [None]:
# タグに入力する情報
tag = datacatalog_v1.types.Tag()

tag.template = "projects/truedata-sandbox/locations/us-central1/tagTemplates/test"
tag.name = "my_super_cool_tag"

tag.fields["source"] = datacatalog_v1.types.TagField()
tag.fields["source"].string_value = "Copied from tlc_yellow_trips_2018"

tag.fields["num_rows"] = datacatalog_v1.types.TagField()
tag.fields["num_rows"].double_value = 113496874

tag.fields["has_pii"] = datacatalog_v1.types.TagField()
tag.fields["has_pii"].bool_value = False

tag.fields["pii_type"] = datacatalog_v1.types.TagField()
tag.fields["pii_type"].enum_value.display_name = "NONE"

# タグの情報を反映
tag = datacatalog_client.create_tag(parent=table_entry.name, tag=tag)