In [1]:
import os

os.environ["CONFIG_APP_DIR"] = "tests/test_custom/config_app/"

In [3]:
from copy import deepcopy
from etl_pipeline.config import alert_agents_config
from etl_pipeline.config import columns_namespace as cn
from etl_pipeline.custom.ms.datatypes.field import InputRecordField
from etl_pipeline.custom.ms.payload_loader import PayloadLoader
from etl_pipeline.custom.ms.transformations import (
    create_agent_input_agg_col_config,
    prepend_agent_name_to_ap_or_wl_or_aliases_key,
)
from etl_pipeline.custom.ms.watchlist_extractor import WatchlistExtractor
from etl_pipeline.pipeline import ETLPipeline


class MSPipeline(ETLPipeline):
    def convert_raw_to_standardized(self, df):
        return df
    
    def connect_input_record_with_match_record(self, payload):
        for input_record in payload[cn.ALERT_FIELD][cn.INPUT_RECORD_HIST]:
            input_record['INPUT_FIELD'] = {
                i["name"]: InputRecordField(**i)
                for i in input_record['field']
            }
            
        new_payloads = []
        for input_record in payload[cn.ALERT_FIELD][cn.INPUT_RECORD_HIST]:
            for match_record in payload[cn.ALERT_FIELD][cn.MATCH_RECORDS]:
                if input_record['versionId'] ==  match_record['inputVersionId']:
                    pair_payload = deepcopy(payload)
                    for num, input_record_to_remove in enumerate(payload[cn.ALERT_FIELD][cn.INPUT_RECORD_HIST]):
                        if input_record['versionId'] !=  input_record_to_remove['versionId']:
                            del pair_payload[cn.ALERT_FIELD][cn.INPUT_RECORD_HIST][num]
                                
                    for num, match_record_to_remove in  enumerate(payload[cn.ALERT_FIELD][cn.MATCH_RECORDS]):
                        if match_record['inputVersionId'] !=  match_record_to_remove['inputVersionId']:
                            del pair_payload[cn.ALERT_FIELD][cn.MATCH_RECORDS][num]
                            
                    new_payloads.append(pair_payload)
                    
        return new_payloads        
        
    def transform_standardized_to_cleansed(self, payload):
        match_ids = payload[cn.MATCH_IDS]
        matches = payload[cn.ALERT_FIELD][cn.MATCH_RECORDS]

        parties = payload[cn.SUPPLEMENTAL_INFO][cn.RELATED_PARTIES][cn.PARTIES]

        for num, party in enumerate(parties):
            parties[num] = party["fields"]
                   
        new_payloads = self.connect_input_record_with_match_record(payload)
        
        for payload in new_payloads:
            fields = payload[cn.ALERT_FIELD][cn.INPUT_RECORD_HIST][0]["INPUT_FIELD"]
            for match_id in match_ids:
                match = matches[match_id]
                WatchlistExtractor().update_match_with_wl_values(match)
                match[cn.TRIGGERED_BY] = self.engine.set_trigger_reasons(
                    match, self.pipeline_config.FUZZINESS_LEVEL
                )
                self.engine.set_beneficiary_hits(match)

            self.engine.connect_full_names(parties)

            self.engine.collect_party_values(parties, payload)
            payload[cn.ALL_CONNECTED_PARTY_TYPES] = payload[cn.ALL_PARTY_TYPES]
            names_source_cols = [
                cn.ALL_PARTY_NAMES,
                cn.ALL_CONNECTED_PARTIES_NAMES,
            ]

            payload.update(
                {
                    cn.CLEANED_NAMES: self.engine.get_clean_names_from_concat_name(
                        fields.get(cn.CONCAT_ADDRESS, None).value,
                        {key: payload[key] for key in names_source_cols},
                    )
                }
            )

            payload.update({cn.CONCAT_RESIDUE: payload[cn.CLEANED_NAMES][cn.CONCAT_RESIDUE]})

            concat_residue = payload[cn.CONCAT_RESIDUE]
            concat_address = fields.get(cn.CONCAT_ADDRESS, None).value

            payload.update({cn.CONCAT_ADDRESS_NO_CHANGES: concat_residue == concat_address})
            for match_id in match_ids:
                match = matches[match_id]
                match[cn.AP_TRIGGERS] = self.engine.set_triggered_tokens_discovery(
                    payload, match, fields
                )

        return new_payloads

    def get_key(self, payload, match, conf):
        new_config = {}
        for key, value in dict(conf).items():
            temp_dict = dict(value)
            for new_key in temp_dict:
                for element in temp_dict[new_key]:
                    elements = element.split(".")
                    if cn.MATCH_RECORDS in element:
                        value = match
                        elements = elements[1:]
                    else:
                        value = payload

                    for field_name in elements:
                        if field_name == "INPUT_FIELD":
                            value = value[0][field_name][elements[-1]].value
                            break
                        try:
                            value = value.get(field_name, None)
                        except TypeError:
                            key = PayloadLoader.LIST_ELEMENT_REGEX.sub("", field_name)
                            ix = int(PayloadLoader.LIST_ELEMENT_REGEX.match(field_name).groups(0))
                            value = value[key][ix]
                    new_config[elements[-1]] = value
        return new_config

    def load_agent_config(self, alert_type="WM_ADDRESS"):
        alert_config = alert_agents_config[alert_type]
        parsed_agent_config = {}
        for agent_name, agent_config in dict(alert_config).items():
            particular_agent_config = dict(agent_config)
            parsed_agent_config[agent_name] = {}
            for new_key in particular_agent_config:
                parsed_agent_config[agent_name][new_key] = []
                for element in particular_agent_config[new_key]:
                    elements = element.split(".")
                    parsed_agent_config[agent_name][new_key].append(elements[-1])
        return parsed_agent_config, alert_config

    def transform_cleansed_to_application(self, payload):
        import pdb; pdb.set_trace()
        new_payloads = payload
        for payload in new_payloads:
            match_ids = payload[cn.MATCH_IDS]
            matches = payload[cn.ALERT_FIELD][cn.MATCH_RECORDS]
            agent_config, yaml_conf = self.load_agent_config()
            agent_input_prepended_agent_name_config = prepend_agent_name_to_ap_or_wl_or_aliases_key(
                agent_config
            )
            
            agent_input_agg_col_config = create_agent_input_agg_col_config(
                agent_input_prepended_agent_name_config
            )

            for match_id in match_ids:
                match = matches[match_id]
                config = self.get_key(payload, match, yaml_conf)
                self.engine.sql_to_merge_specific_columns_to_standardized(
                    agent_input_prepended_agent_name_config,
                    match,
                    config,
                    False,
                )
                config.update(
                    {
                        key: match.get(key)
                        for key in match
                        if key.endswith("_ap") or key.endswith("_wl")
                    }
                )
                self.engine.sql_to_merge_specific_columns_to_standardized(
                    agent_input_agg_col_config, match, config, True
                )

        return new_payloads


In [4]:
def load_alert():
    with open("API/alert.json", "r") as f:
        text = json.load(f)
        match1 = Match(match_id="0", match_name="1")
        match2 = Match(match_id="1", match_name="2")
        alert = Alert(batch_id="1", alert_name="2", matches=[match1, match2])
        for key, value in text.items():
            alert.flat_payload[str(key)] = str(value)
    return alert

In [14]:
alert = load_alert()

In [15]:
payload = load_alert()
payload = payload.flat_payload

In [16]:
payload_json = {key: payload[key] for key in sorted(payload)}
payload_json = PayloadLoader().load_payload_from_json(payload_json)
payload_json['match_ids'] = [int(i.match_id) for i in alert.matches]

In [17]:
payload_json['alert'].keys()

dict_keys(['headerInfo', 'inputRecordHist', 'matchRecords', 'supplementalInfo'])

In [18]:
engine = JsonProcessingEngine(pipeline_config)
pipeline = MSPipeline(engine, config=pipeline_config)

In [19]:

payload = pipeline.transform_standardized_to_cleansed(payload_json)
payload = pipeline.transform_cleansed_to_application(payload)

### payload

In [20]:
for match in payload["alert"]["matchRecords"]:
    print("++++++++")
    print(match["ap_all_dobs_aggregated"], match["wl_all_dobs_aggregated"])
    print(match["ap_all_nationalities_aggregated"], match["wl_all_nationalities_aggregated"])
    print(match["ap_all_residencies_aggregated"], match["wl_all_residencies_aggregated"])

++++++++
[['', '10/10/1969']] [[]]
[None] [[[], [], [], None, None]]
[None] [None]
++++++++
[['', '10/10/1969']] [['01/11/1924']]
[None] [[['CHIC'], [], [], None, None]]
[None] [None]


In [None]:
# new_json listy 