In [1]:
## Copied from pipeline.prediction.components

from typing import Dict, List, NamedTuple, Optional

import logging

# Configure logging
logging.basicConfig(level=logging.INFO)  # Set the desired logging level

def build_input(
    global_entity_id: str,
    lead_sources: List[str],
    run_type: str,
    side: str,
    environment: str,
    region: str,
    version: str,
    right_source: str = "salesforce",
    lookback_days: Optional[int] = None,
    dry_run: bool = False,
    additional_labels: str = None,
    filter_right_objects: bool = False,
) -> str:
    # import logging

    from match.achilles import LeftInput, RightInput
    from match.achilles.io import extract_additional_labels, patch_labels
    from match.achilles.vertex.config import PipelineRuntimeConfigFactory

    labels = (
        extract_additional_labels(additional_labels)
        if additional_labels
        else {}
    )
    patch_labels(
        global_entity_id=global_entity_id,
        pipeline_name="vendor_match",
        pipeline_step="build_input",
        **labels,
    )
    # log = logging.getLogger()
    log = logging.getLogger(__name__)  # Get logger with current module name
    config = PipelineRuntimeConfigFactory(
        environment, region, global_entity_id, run_type, version
    ).get()
    is_delta = run_type == "delta"
    left_input_data_table = config.common.leads_input_data_table
    log.info(f"Environment Name: {environment}")
    log.info(
        f"Left Table Name: {config.prediction.input_left.output_table_id}"
    )
    log.info(
        f"Right Table Name: {config.prediction.input_left.output_table_id}"
    )

    if side == "left":
        output_table = config.prediction.input_left.output_table_id
        left_input = LeftInput(
            global_entity_id=global_entity_id,
            output_table=output_table,
            lead_source=lead_sources,
            delta=is_delta,
            leads_input_data_table=left_input_data_table,
        )
        left_input.build_input(dry_run=dry_run)
        log.info(
            f"Left input from sources {lead_sources} is written to: {output_table}"
        )
    elif side == "right":
        output_table = config.prediction.input_right.output_table_id
        right_input = RightInput(
            global_entity_id=global_entity_id,
            output_table=output_table,
            lead_source=right_source,
            delta=is_delta,
            filter_older_objects=filter_right_objects,
            lookback_days=lookback_days,
            leads_input_data_table=left_input_data_table,
        )
        right_input.build_input(dry_run=dry_run)
        log.info(
            f"Right input from source {right_source} is written to: {output_table}"
        )
    else:
        raise ValueError("Side must be either left or right")

    return output_table

In [8]:
# generate right input query with delta

global_entity_id = "PO_FI"
lead_sources = ["wolt","oiva","facebook","profinder","vainu","google","yelp","tripadvisor"]
environment = "dev"
region = "eur"
version = "latest"
lookback_days = None
dry_run = True
filter_right_objects = False

build_input(
    global_entity_id=global_entity_id,
    lead_sources=lead_sources,
    run_type="delta",
    side="right",
    environment=environment,
    region=region,
    version=version,
    lookback_days=lookback_days,
    dry_run=dry_run,
    filter_right_objects=filter_right_objects,
)

INFO:achilles_io:Labels after update: {'dh_app': 'vendor-matching', 'dh_cc_id': '1001025045', 'global_entity_id': 'po_fi', 'pipeline_name': 'vendor_match', 'pipeline_step': 'build_input'}
INFO:__main__:Environment Name: dev
INFO:__main__:Left Table Name: dh-global-sales-data-dev.leadgen_sf_match_vertex_delta_raw.left_leadgen_PO_FI
INFO:__main__:Right Table Name: dh-global-sales-data-dev.leadgen_sf_match_vertex_delta_raw.left_leadgen_PO_FI
INFO:match.achilles:Vendors Data Table: dh-global-sales-data.leadgen_cl.vendor_complete
INFO:match.achilles:Query:
CREATE TABLE `dh-global-sales-data-dev.leadgen_sf_match_vertex_delta_raw.right_sf_PO_FI` AS
(
WITH country_map AS (
    SELECT DISTINCT
        country_code AS country_iso,
        global_entity_id
    FROM
        `fulfillment-dwh-production.curated_data_shared_coredata.global_entities`
    WHERE global_entity_id in ('PO_FI')
)
,

sf_account AS (
    SELECT 
        global_entity_id, grid__c,account_status__c, 
        type as branch_typ

'dh-global-sales-data-dev.leadgen_sf_match_vertex_delta_raw.right_sf_PO_FI'

In [9]:
# generate left input query with delta

build_input(
    global_entity_id=global_entity_id,
    lead_sources=lead_sources,
    run_type="delta",
    side="left",
    environment=environment,
    region=region,
    version=version,
    lookback_days=lookback_days,
    dry_run=dry_run,
    filter_right_objects=filter_right_objects,
)

INFO:achilles_io:Labels after update: {'dh_app': 'vendor-matching', 'dh_cc_id': '1001025045', 'global_entity_id': 'po_fi', 'pipeline_name': 'vendor_match', 'pipeline_step': 'build_input'}
INFO:__main__:Environment Name: dev
INFO:__main__:Left Table Name: dh-global-sales-data-dev.leadgen_sf_match_vertex_delta_raw.left_leadgen_PO_FI
INFO:__main__:Right Table Name: dh-global-sales-data-dev.leadgen_sf_match_vertex_delta_raw.left_leadgen_PO_FI
INFO:match.achilles:Vendors Data Table: dh-global-sales-data.leadgen_cl.vendor_complete
INFO:match.achilles:Query:
CREATE TABLE `dh-global-sales-data-dev.leadgen_sf_match_vertex_delta_raw.left_leadgen_PO_FI` AS
(
WITH country_map AS (
    SELECT DISTINCT
        country_code AS country_iso,
        global_entity_id
    FROM
        `fulfillment-dwh-production.curated_data_shared_coredata.global_entities`
    WHERE global_entity_id in ('PO_FI')
)
,

sf_account AS (
    SELECT 
        global_entity_id, grid__c,account_status__c, 
        type as branch

'dh-global-sales-data-dev.leadgen_sf_match_vertex_delta_raw.left_leadgen_PO_FI'