In [None]:
import os
import yaml
from dotenv import load_dotenv
import datahub.emitter.mce_builder as builder
from datahub.emitter.mcp import MetadataChangeProposalWrapper
from datahub.emitter.rest_emitter import DatahubRestEmitter
from datahub.metadata.com.linkedin.pegasus2avro.dataset import (
    DatasetLineageType,
    FineGrainedLineage,
    FineGrainedLineageDownstreamType,
    FineGrainedLineageUpstreamType,
    Upstream,
    UpstreamLineage,
)

# Load environment variables
load_dotenv()

DATAHUB_SERVER_URL = os.getenv("DATAHUB_SERVER_URL")
DATAHUB_TOKEN = os.getenv("DATAHUB_TOKEN")

# Initialize the DataHub emitter
emitter = DatahubRestEmitter(gms_server=DATAHUB_SERVER_URL, token=DATAHUB_TOKEN)

# Load lineage data from the YAML file
def load_lineage(file_path):
    with open(file_path, 'r') as file:
        return yaml.safe_load(file)

lineage_data = load_lineage("lineage.yml")

# Helper function to create URNs in the proper format
def datasetUrn(platform, name, env):
    return f"urn:li:dataset:(urn:li:dataPlatform:{platform},{name},{env})"

def fldUrn(dataset_urn, fld):
    return builder.make_schema_field_urn(dataset_urn, fld)

# Iterate through lineage data and emit lineage MCPs
for lineage_entry in lineage_data['lineages']:
    # Correct URN formatting from the source and target
    source_urn = datasetUrn(lineage_entry['source']['platform'], lineage_entry['source']['dataset'], "PROD")
    target_urn = datasetUrn(lineage_entry['target']['platform'], lineage_entry['target']['dataset'], "PROD")

    # Define the column lineage mappings
    field_lineages = []
    for field_map in lineage_entry['field_mappings']:
        field_lineages.append(
            FineGrainedLineage(
                upstreamType=FineGrainedLineageUpstreamType.FIELD_SET,
                upstreams=[fldUrn(source_urn, field_map['source_field'])],
                downstreamType=FineGrainedLineageDownstreamType.FIELD,
                downstreams=[fldUrn(target_urn, field_map['target_field'])],
            )
        )

    # Create upstreams and field lineage structure
    upstreams = [Upstream(dataset=source_urn, type=DatasetLineageType.TRANSFORMED)]
    fieldLineages = UpstreamLineage(upstreams=upstreams, fineGrainedLineages=field_lineages)

    # Create and emit MCP
    lineageMcp = MetadataChangeProposalWrapper(entityUrn=target_urn, aspect=fieldLineages)
    emitter.emit_mcp(lineageMcp)

print("Lineage emitted successfully!")
