In [7]:
import os
from dotenv import load_dotenv
from datahub.emitter.mcp import MetadataChangeProposalWrapper
from datahub.emitter.rest_emitter import DatahubRestEmitter
from datahub.metadata.schema_classes import (
    ChangeTypeClass,
    GlossaryTermInfoClass,
    GlossaryTermKeyClass,
)

# Load environment variables
load_dotenv()

# Constants
DATAHUB_SERVER_URL = os.getenv('DATAHUB_SERVER_URL')

# Define UK-localized data standards
UK_DATA_STANDARDS = [
    {
        "name": "Email",
        "description": "A valid email address as per RFC 5322 standards.",
        "examples": ["user@example.com", "test.user+alias@example.co.uk"],
    },
    {
        "name": "National Insurance Number",
        "description": "A unique identifier for individuals in the UK, formatted as two letters, six digits, and one letter (e.g., AB123456C).",
        "examples": ["AB123456C", "ZZ987654A"],
    },
    {
        "name": "Post Code",
        "description": "A UK postal code, which can be in one of the following formats: A9 9AA, A99 9AA, AA9 9AA, AA99 9AA, A9A 9AA, or AA9A 9AA.",
        "examples": ["SW1A 1AA", "M1 1AE", "EH12 5NY"],
    },
    {
        "name": "UPRN",
        "description": "Unique Property Reference Number, a unique identifier for every addressable location in the UK.",
        "examples": ["100023336956", "200010971123"],
    },
    {
        "name": "EPC Rating",
        "description": "Energy Performance Certificate rating, ranging from A (most efficient) to G (least efficient).",
        "examples": ["A", "C", "G"],
    },
    {
        "name": "Telephone Number",
        "description": "A UK telephone number, typically starting with +44 or 0, followed by 9-10 digits.",
        "examples": ["+441234567890", "01234567890"],
    },
    {
        "name": "Date of Birth",
        "description": "A date in the format DD/MM/YYYY, representing a person's date of birth.",
        "examples": ["01/01/1990", "31/12/1985"],
    },
    {
        "name": "VAT Number",
        "description": "A UK VAT registration number, typically 9 digits long, sometimes prefixed with 'GB'.",
        "examples": ["GB123456789", "123456789"],
    },
    {
        "name": "Bank Sort Code",
        "description": "A 6-digit code identifying the bank and branch for UK bank accounts.",
        "examples": ["40-04-15", "601613"],
    },
    {
        "name": "Bank Account Number",
        "description": "An 8-digit number identifying a UK bank account.",
        "examples": ["12345678", "87654321"],
    },
    {
        "name": "Driving License Number",
        "description": "A UK driving license number, typically 16 characters long, including letters and numbers.",
        "examples": ["SURRE123456AB9CD", "WALES987654ZY3XW"],
    },
    {
        "name": "NHS Number",
        "description": "A 10-digit unique identifier for patients in the UK National Health Service.",
        "examples": ["1234567890", "9876543210"],
    },
    {
        "name": "Vehicle Registration Number",
        "description": "A UK vehicle registration number, typically in the format AA99 AAA.",
        "examples": ["AB12 CDE", "XY99 ZZZ"],
    },
    {
        "name": "Passport Number",
        "description": "A UK passport number, typically 9 digits long.",
        "examples": ["123456789", "987654321"],
    },
    {
        "name": "Company Registration Number",
        "description": "A unique identifier for companies registered in the UK, typically 8 digits long.",
        "examples": ["01234567", "98765432"],
    },
    {
        "name": "Landline Number",
        "description": "A UK landline number, typically 11 digits, starting with 01 or 02.",
        "examples": ["01234567890", "02012345678"],
    },
    {
        "name": "Trust Registration Number",
        "description": "A unique identifier for trusts registered in the UK, used for tax and legal purposes.",
        "examples": ["12345678TR", "98765432TR"],
    },
    {
        "name": "Universal Credit Number",
        "description": "A unique identifier for individuals receiving Universal Credit in the UK.",
        "examples": ["UC1234567890", "UC9876543210"],
    },
    {
        "name": "Online Account Username",
        "description": "A username used to access online accounts, often associated with social media, banking, or e-commerce platforms.",
        "examples": ["john_doe123", "alice_smith456"],
    },
    {
        "name": "Pension Reference Number",
        "description": "A unique identifier for individuals in the UK receiving pension benefits.",
        "examples": ["P12345678", "P98765432"],
    },
]

def create_glossary_term_mcp(term_name: str, description: str, examples: list) -> MetadataChangeProposalWrapper:
    """Create a MetadataChangeProposalWrapper for a glossary term."""
    term_urn = f"urn:li:glossaryTerm:{term_name.replace(' ', '_')}"

    # Solution 2: Set the description attribute directly
    term_info = GlossaryTermInfoClass(name=term_name, examples=examples)
    term_info.description = description

    return MetadataChangeProposalWrapper(
        entityType="glossaryTerm",
        entityUrn=term_urn,
        aspectName="glossaryTermInfo",
        aspect=term_info,
        changeType=ChangeTypeClass.UPSERT,
    )

def emit_to_datahub(emitter: DatahubRestEmitter, mcp: MetadataChangeProposalWrapper) -> None:
    """Emit MCP to DataHub with error handling."""
    try:
        emitter.emit_mcp(mcp)
        print(f"Successfully created glossary term: {mcp.entityUrn}")
    except Exception as e:
        print(f"Failed to create glossary term: {mcp.entityUrn}")
        print(f"Error: {str(e)}")

def main():
    try:
        # Initialize emitter
        emitter = DatahubRestEmitter(
            gms_server=DATAHUB_SERVER_URL,
        )

        # Create glossary terms for each UK data standard
        for standard in UK_DATA_STANDARDS:
            term_mcp = create_glossary_term_mcp(
                term_name=standard["name"],
                description=standard["description"],
                examples=standard["examples"],
            )
            emit_to_datahub(emitter, term_mcp)

    except Exception as e:
        print(f"Unexpected error: {str(e)}")

if __name__ == "__main__":
    main()

Unexpected error: GlossaryTermInfoClass.__init__() got an unexpected keyword argument 'examples'
