# Create a storage based service


In [1]:
from metadata.ingestion.ometa.ometa_api import OpenMetadata
from metadata.generated.schema.entity.services.connections.metadata.openMetadataConnection import (OpenMetadataConnection, AuthProvider)
from metadata.generated.schema.security.client.openMetadataJWTClientConfig import OpenMetadataJWTClientConfig

In [2]:
from creds import om_admin_token
server_config = OpenMetadataConnection(
    hostPort="http://datacatalog.casd.local/api",
    authProvider=AuthProvider.openmetadata,
    securityConfig=OpenMetadataJWTClientConfig(
        jwtToken=om_admin_token,
    ),
)
metadata = OpenMetadata(server_config)

In [3]:
# if it returns true, it means the connection is success 
metadata.health_check()

True

In [5]:
from metadata.generated.schema.api.services.createStorageService import CreateStorageServiceRequest 
from metadata.generated.schema.entity.services.storageService import StorageServiceType, StorageConnection, StorageService
from metadata.generated.schema.entity.services.connections.storage.s3Connection import S3Connection
from metadata.generated.schema.security.credentials.awsCredentials import AWSCredentials

store_service_req=CreateStorageServiceRequest(
    name="test-datalake",
    serviceType=StorageServiceType.S3,
    connection=StorageConnection(config=S3Connection(awsConfig=AWSCredentials(awsRegion="casd-local"))),   
)

storage_service_entity = metadata.create_or_update(data=store_service_req)

In [7]:
print(type(storage_service_entity))
print(storage_service_entity.fullyQualifiedName)

<class 'metadata.generated.schema.entity.services.storageService.StorageService'>
__root__='test-datalake'


# Use Container to represent a bucket

`OM` has a predefined type called `Container`, which is an abstraction for any path(including the top level eg. bucket in S3) storing data in an Object store such as S3, GCP, Azure. It maps a tree-like structure, where each Container can have a parent and a list of sub-folders, and it can be structured - where it contains structured data, or unstructured where no schema for its data is defined
https://github.com/open-metadata/OpenMetadata/blob/main/openmetadata-spec/src/main/resources/json/schema/entity/data/container.json

In [9]:
from metadata.generated.schema.api.data.createContainer import CreateContainerRequest

# A Container 
container_req=CreateContainerRequest(name='test_container',
                    displayName='test_container',
                    description='this is my first container test',
                    parent=None,
                    prefix=None,
                    dataModel=None,
                    numberOfObjects=3,
                    size=123456.75,
                    fileFormats=['parquet','csv','json'],
                    service='test-datalake',)

container_entity = metadata.create_or_update(data=container_req)

In the ui, it shows 120.56 KB as size