# Adding classification and tags

In this tutorial, we will:
- Create a Database Service, a Database, a Schema and one Table,
- Create a Classification,
- Create a Tag for the Classification and add it to the Table.

In [1]:
from metadata.ingestion.ometa.ometa_api import OpenMetadata
from metadata.generated.schema.entity.services.connections.metadata.openMetadataConnection import (OpenMetadataConnection, AuthProvider)
from metadata.generated.schema.security.client.openMetadataJWTClientConfig import OpenMetadataJWTClientConfig

## 1. Create a connexion to the Open Metadata server
 
In the server config clause, we must define the `server host and port`, the credential(in our case is a jwt token)

In [2]:
from creds import om_admin_token
server_config = OpenMetadataConnection(
    hostPort="http://datacatalog.casd.local/api",
    authProvider=AuthProvider.openmetadata,
    securityConfig=OpenMetadataJWTClientConfig(
        jwtToken=om_admin_token,
    ),
)
metadata = OpenMetadata(server_config)

In [3]:
# if it returns true, it means the connection is success 
metadata.health_check()

True

## 2. Create the required entities

In [4]:
from metadata.generated.schema.api.services.createDatabaseService import CreateDatabaseServiceRequest
from metadata.generated.schema.entity.services.connections.database.common.basicAuth import BasicAuth
from metadata.generated.schema.entity.services.connections.database.mysqlConnection import MysqlConnection
from metadata.generated.schema.entity.services.databaseService import (DatabaseConnection, DatabaseService, DatabaseServiceType,)
from metadata.generated.schema.api.data.createDatabase import CreateDatabaseRequest
from metadata.generated.schema.api.data.createDatabaseSchema import CreateDatabaseSchemaRequest
from metadata.generated.schema.api.data.createTable import CreateTableRequest
from metadata.generated.schema.entity.data.table import Column, DataType

# create db service
db_service = CreateDatabaseServiceRequest(
    name="test-db-service",
    serviceType=DatabaseServiceType.Mysql,
    connection=DatabaseConnection(
        config=MysqlConnection(
            username="db_login",
            authType=BasicAuth(password="db_name"),
            hostPort="http://db_url:1234",
        )
    ),
)

# when we create an entity by using function `create_or_update`, it returns the created instance of the query
db_service_entity = metadata.create_or_update(data=db_service)

# create db
db_entity_req = CreateDatabaseRequest(
    name="test-db",
    service=db_service_entity.fullyQualifiedName,
)

db_entity = metadata.create_or_update(data=db_entity_req)

# create schema
create_schema_req = CreateDatabaseSchemaRequest(
    name="test-schema", database=db_entity.fullyQualifiedName
)

# the create request will return the fqn(fully qualified name) of the created schema
schema_entity = metadata.create_or_update(data=create_schema_req)

# create table

table_a = CreateTableRequest(
    name="test_user",
    databaseSchema=schema_entity.fullyQualifiedName,
    columns=[Column(name="id", dataType=DataType.BIGINT,description="id of the user"),
             Column(name="age", dataType=DataType.INT,description="age of the user")],
)

table_a_entity = metadata.create_or_update(data=table_a)

## 3.create a classification entity

In `OM`, a `Classification entity` contains `hierarchical terms called tags` used for categorizing and classifying `data assets and other entities`. For example, a default classification called `PII(Personally Identifiable Information)`, it contains three `tags`:
- **None**: do not contain sensitive data
- **NonSensitive**: contain PII, but they are easily accessible from public sources and can include zip code, race, gender, and date of birth.
- **Sensitive**: contain PII, and if lost, compromised, or disclosed without authorization, could result in substantial harm, embarrassment, inconvenience, or unfairness to an individual.

In below example, we will try to create a 

In [9]:
from metadata.generated.schema.api.classification.createClassification import CreateClassificationRequest
from metadata.generated.schema.api.classification.createTag import CreateTagRequest

classification_request=CreateClassificationRequest(
    name="DataClassification",
    description="MAC data classification",
)

classification_entity=metadata.create_or_update(classification_request)

In [13]:
if classification_entity:
    print(f"type of the classification: {type(classification_entity)}")
    print(f"content of the classification: {classification_entity}")
    print(f"name of the classification: {classification_entity.name.__root__}")

type of the classification: <class 'metadata.generated.schema.entity.classification.classification.Classification'>
content of the classification: id=Uuid(__root__=UUID('d104b0ad-cd71-4029-bb82-5ece649fc0ee')) name=EntityName(__root__='DataClassification') fullyQualifiedName=FullyQualifiedEntityName(__root__='DataClassification') displayName=None description=Markdown(__root__='MAC data classification') version=EntityVersion(__root__=0.1) termCount=None updatedAt=Timestamp(__root__=1719327150298) updatedBy='ingestion-bot' href=Href(__root__=AnyUrl('http://datacatalog.casd.local/api/v1/classifications/d104b0ad-cd71-4029-bb82-5ece649fc0ee', scheme='http', host='datacatalog.casd.local', tld='local', host_type='domain', path='/api/v1/classifications/d104b0ad-cd71-4029-bb82-5ece649fc0ee')) usageCount=None changeDescription=None deleted=False provider=<ProviderType.user: 'user'> disabled=None mutuallyExclusive=False
name of the classification: DataClassification


## 4. Creating dags in a classification

Now we want to add the following tags to the `DataClassification` classification.
- **TopSecret**: Such material would cause "exceptionally grave damage" to national security if made publicly available
- **Secret**: Secret material would cause "serious damage" to national security if it were publicly available
- **Confidential**: Confidential material would cause "damage" or be prejudicial to national security if publicly available
- **Restricted**: Restricted material would cause "undesirable effects" if publicly available.
- **Official**: Official material forms the generality of government business, public service delivery and commercial activity. This includes a diverse range of information, of varying sensitivities, and with differing consequences resulting from compromise or loss.
- **Unclassified**: Unclassified information is low-impact, and therefore does not require any special protection, such as vetting of personnel.

In [20]:
ts_tag_request=CreateTagRequest(
    classification=classification_request.name,
    name="TopSecret",
    displayName="TopSecret",
    description="Such material would cause `exceptionally grave damage` to national security if made publicly available",
)

s_tag_request=CreateTagRequest(
    classification=classification_request.name,
    name="Secret",
    displayName="Secret",
    description="Secret material would cause `serious damage` to national security if it were publicly available",
)


conf_tag_request=CreateTagRequest(
    classification=classification_request.name,
    name="Confidential",
    displayName="Confidential",
    description="Confidential material would cause `damage` or be prejudicial to national security if publicly available",
)

res_tag_request=CreateTagRequest(
    classification=classification_request.name,
    name="Restricted",
    displayName="Restricted",
    description="Restricted material would cause `undesirable effects` if publicly available",
)

off_tag_request=CreateTagRequest(
    classification=classification_request.name,
    name="Official",
    displayName="Official",
    description="Official material forms the generality of government business, public service delivery and commercial activity. This includes a diverse range of information, of varying sensitivities, and with differing consequences resulting from compromise or loss",
)


un_tag_request=CreateTagRequest(
    classification=classification_request.name,
    name="Unclassified",
    displayName="Unclassified",
    description="Unclassified information is low-impact, and therefore does not require any special protection, such as vetting of personnel.",
)



ts_tag_entity=metadata.create_or_update(ts_tag_request)
s_tag_entity=metadata.create_or_update(s_tag_request)
conf_tag_entity=metadata.create_or_update(conf_tag_request)
metadata.create_or_update(res_tag_request)
metadata.create_or_update(off_tag_request)
metadata.create_or_update(un_tag_request)



Tag(id=Uuid(__root__=UUID('b5743997-fd6e-4941-9223-2a15a01b529f')), name=EntityName(__root__='Unclassified'), displayName='Unclassified', fullyQualifiedName='DataClassification.Unclassified', description=Markdown(__root__='Unclassified information is low-impact, and therefore does not require any special protection, such as vetting of personnel.'), style=None, classification=EntityReference(id=Uuid(__root__=UUID('d104b0ad-cd71-4029-bb82-5ece649fc0ee')), type='classification', name='DataClassification', fullyQualifiedName='DataClassification', description=Markdown(__root__='MAC data classification'), displayName='DataClassification', deleted=False, inherited=None, href=Href(__root__=AnyUrl('http://datacatalog.casd.local/api/v1/classifications/d104b0ad-cd71-4029-bb82-5ece649fc0ee', scheme='http', host='datacatalog.casd.local', tld='local', host_type='domain', path='/api/v1/classifications/d104b0ad-cd71-4029-bb82-5ece649fc0ee'))), parent=None, children=None, version=EntityVersion(__root__

In [22]:
print(type(ts_tag_entity))
print(ts_tag_entity)

<class 'metadata.generated.schema.entity.classification.tag.Tag'>
id=Uuid(__root__=UUID('e859a9c5-4eae-4ac8-8014-b38a54da3bec')) name=EntityName(__root__='TopSecret') displayName='TopSecret' fullyQualifiedName='DataClassification.TopSecret' description=Markdown(__root__='Such material would cause `exceptionally grave damage` to national security if made publicly available') style=None classification=EntityReference(id=Uuid(__root__=UUID('d104b0ad-cd71-4029-bb82-5ece649fc0ee')), type='classification', name='DataClassification', fullyQualifiedName='DataClassification', description=Markdown(__root__='MAC data classification'), displayName='DataClassification', deleted=False, inherited=None, href=Href(__root__=AnyUrl('http://datacatalog.casd.local/api/v1/classifications/d104b0ad-cd71-4029-bb82-5ece649fc0ee', scheme='http', host='datacatalog.casd.local', tld='local', host_type='domain', path='/api/v1/classifications/d104b0ad-cd71-4029-bb82-5ece649fc0ee'))) parent=None children=None version=

## Tagging a Table

The source code can be found here: https://github.com/open-metadata/OpenMetadata/tree/e02ead8133e279a60554747954e2f681171cf7a5/ingestion/src/metadata/ingestion/ometa/mixins

In [30]:
from metadata.generated.schema.entity.data.table import Table
from metadata.ingestion.ometa.mixins.patch_mixin_utils import PatchOperation
from metadata.generated.schema.type.tagLabel import TagLabel, TagSource, State, LabelType

# we need to create tag label first to be able to add tag to other entities
# We only need tag_fqn to build the tag label of type str.
ts_tag_label=TagLabel(tagFQN=s_tag_entity.fullyQualifiedName,source=TagSource.Classification,  state=State.Suggested, labelType=LabelType.Automated,)
s_tag_label=TagLabel(tagFQN=s_tag_entity.fullyQualifiedName,source=TagSource.Classification,  state=State.Suggested, labelType=LabelType.Automated,)
conf_tag_label=TagLabel(tagFQN=conf_tag_entity.fullyQualifiedName,source=TagSource.Classification,  state=State.Suggested, labelType=LabelType.Automated,)

# if we know the value of the fqn, we can create the tag label from string value directly
res_tag_label=TagLabel(tagFQN="DataClassification.Restricted",source=TagSource.Classification,  state=State.Suggested, labelType=LabelType.Automated,)
off_tag_label=TagLabel(tagFQN="DataClassification.Official",source=TagSource.Classification,  state=State.Suggested, labelType=LabelType.Automated,)
un_tag_label=TagLabel(tagFQN="DataClassification.Unclassified",source=TagSource.Classification,  state=State.Suggested, labelType=LabelType.Automated,)

metadata.patch_tags(
    entity=Table,
    source=table_a_entity,
    tag_labels=[s_tag_label],
    operation=PatchOperation.ADD
)

Table(id=Uuid(__root__=UUID('d6b35523-124d-476e-b863-eb016aca2178')), name=EntityName(__root__='test_user'), displayName=None, fullyQualifiedName=FullyQualifiedEntityName(__root__='test-db-service.test-db.test-schema.test_user'), description=None, version=EntityVersion(__root__=0.2), updatedAt=Timestamp(__root__=1719387988655), updatedBy='ingestion-bot', href=Href(__root__=AnyUrl('http://datacatalog.casd.local/api/v1/tables/d6b35523-124d-476e-b863-eb016aca2178', scheme='http', host='datacatalog.casd.local', tld='local', host_type='domain', path='/api/v1/tables/d6b35523-124d-476e-b863-eb016aca2178')), tableType=None, columns=[Column(name=ColumnName(__root__='id'), displayName=None, dataType=<DataType.BIGINT: 'BIGINT'>, arrayDataType=None, dataLength=None, precision=None, scale=None, dataTypeDisplay='bigint', description=Markdown(__root__='id of the user'), fullyQualifiedName=FullyQualifiedEntityName(__root__='test-db-service.test-db.test-schema.test_user.id'), tags=[], constraint=None, 

## Tagging a column

To tagga a column, we need to creat an object of type `ColumnTag`, it requires:
- column_fqn
- tag_label

The source code of ColumnTag can be found here: https://github.com/open-metadata/OpenMetadata/blob/e02ead8133e279a60554747954e2f681171cf7a5/ingestion/src/metadata/ingestion/models/table_metadata.py

In [33]:
from metadata.ingestion.models.table_metadata import ColumnTag

col_id_tag=ColumnTag(column_fqn="test-db-service.test-db.test-schema.test_user.id",tag_label=ts_tag_label)
col_age_tag=ColumnTag(column_fqn="test-db-service.test-db.test-schema.test_user.age",tag_label=conf_tag_label)

metadata.patch_column_tags(table=table_a_entity,
                          column_tags=[col_id_tag,col_age_tag],
                          operation=PatchOperation.ADD,)

Table(id=Uuid(__root__=UUID('d6b35523-124d-476e-b863-eb016aca2178')), name=EntityName(__root__='test_user'), displayName=None, fullyQualifiedName=FullyQualifiedEntityName(__root__='test-db-service.test-db.test-schema.test_user'), description=None, version=EntityVersion(__root__=0.3), updatedAt=Timestamp(__root__=1719389375052), updatedBy='ingestion-bot', href=Href(__root__=AnyUrl('http://datacatalog.casd.local/api/v1/tables/d6b35523-124d-476e-b863-eb016aca2178', scheme='http', host='datacatalog.casd.local', tld='local', host_type='domain', path='/api/v1/tables/d6b35523-124d-476e-b863-eb016aca2178')), tableType=None, columns=[Column(name=ColumnName(__root__='id'), displayName=None, dataType=<DataType.BIGINT: 'BIGINT'>, arrayDataType=None, dataLength=None, precision=None, scale=None, dataTypeDisplay='bigint', description=Markdown(__root__='id of the user'), fullyQualifiedName=FullyQualifiedEntityName(__root__='test-db-service.test-db.test-schema.test_user.id'), tags=[TagLabel(tagFQN=TagF

## Cleanup

In [37]:
from metadata.generated.schema.entity.classification.classification import Classification

# remove the classification and related tags
# It will also remove tags from the column and tables
classification_id = metadata.get_by_name(
    entity=Classification, fqn="DataClassification"
).id

metadata.delete(
    entity=Classification,
    entity_id=classification_id,
    recursive=True,
    hard_delete=True,
)


AttributeError: 'NoneType' object has no attribute 'id'

In [38]:
# remove db service and so on
service_id = metadata.get_by_name(
    entity=DatabaseService, fqn="test-db-service"
).id

metadata.delete(
    entity=DatabaseService,
    entity_id=service_id,
    recursive=True,
    hard_delete=True,
)

AttributeError: 'NoneType' object has no attribute 'id'