In [43]:
import pandas as pd
from pprint import pprint

In [44]:
def dim_counterparty_dataframe(**dataframes):
    """
    Transforms and returns a dimension dataframe for counterparties by merging with address data.

    This function takes a dictionary of extracted dataframes, expects 'counterparty' and 'address' keys,
    merges the counterparty data with the corresponding legal address details, renames the address fields
    to OLAP-compliant snake_case names, and selects a subset of relevant columns.

    Parameters:
        extracted_dataframes (dict): A dictionary containing:
            - 'counterparty': pd.DataFrame with at least 'counterparty_id', 'counterparty_legal_name', and 'legal_address_id'.
            - 'address': pd.DataFrame with address information including 'legal_address_id' and address fields.

    Returns:
        pd.DataFrame: A transformed dimension dataframe containing counterparty and legal address fields.

    Raises:
        ValueError: If either the 'counterparty' or 'address' dataframe is missing.
        KeyError: If required columns are missing from the 'address' dataframe.
        Exception: Propagates any other unexpected exception that occurs during processing.
    """
    required_keys = ["counterparty", "address"]

    for key in required_keys:
        if key not in dataframes:
            raise ValueError(f"Error: Missing required dataframe '{key}'.")

    counterparty_df = dataframes.get("counterparty")
    address_df = dataframes.get("address")

    try:
        renamed_address_df = address_df.rename(
            columns={
                "address_id": "legal_address_id",
                "address_line_1": "counterparty_legal_address_line_1",
                "address_line_2": "counterparty_legal_address_line_2",
                "district": "counterparty_legal_district",
                "city": "counterparty_legal_city",
                "postal_code": "counterparty_legal_postal_code",
                "country": "counterparty_legal_country",
                "phone": "counterparty_legal_phone_number",
            }
        )

        counterparty_address_merged_df = counterparty_df.merge(
            renamed_address_df,
            on="legal_address_id",
        )

        dim_counterparty_df = counterparty_address_merged_df[
            [
                "counterparty_id",
                "counterparty_legal_name",
                "counterparty_legal_address_line_1",
                "counterparty_legal_address_line_2",
                "counterparty_legal_district",
                "counterparty_legal_city",
                "counterparty_legal_postal_code",
                "counterparty_legal_country",
                "counterparty_legal_phone_number",
            ]
        ]

        return dim_counterparty_df
    except Exception as e:
        raise e

In [45]:
counterparty_df = pd.read_parquet(
    "../sql_local_tests/seed_data/counterparty_2022-11-3_14-20-51_563000.parquet"
)
address_df = pd.read_parquet(
    "../sql_local_tests/seed_data/address_2022-11-3_14-20-49_962000.parquet"
)
print(counterparty_df.head(2))
print(address_df.head(2))

   counterparty_id      counterparty_legal_name  legal_address_id  \
0                1               Fahey and Sons                15   
1                2  Leannon, Predovic and Morar                28   

  commercial_contact         delivery_contact              created_at  \
0        Micheal Toy  Mrs. Lucy Runolfsdottir 2022-11-03 14:20:51.563   
1      Melba Sanford            Jean Hane III 2022-11-03 14:20:51.563   

             last_updated  
0 2022-11-03 14:20:51.563  
1 2022-11-03 14:20:51.563  
   address_id     address_line_1 address_line_2 district               city  \
0           1    6826 Herzog Via           None     Avon  New Patienceburgh   
1           2  179 Alexie Cliffs           None     None        Aliso Viejo   

  postal_code     country        phone              created_at  \
0       28441      Turkey  1803 637401 2022-11-03 14:20:49.962   
1  99305-7380  San Marino  9621 880720 2022-11-03 14:20:49.962   

             last_updated  
0 2022-11-03 14:20:49.9

In [46]:
dim_counterparty_df = dim_counterparty_dataframe(
    counterparty=counterparty_df, address=address_df
)
pprint(dim_counterparty_df)

    counterparty_id         counterparty_legal_name  \
0                 1                  Fahey and Sons   
1                 2     Leannon, Predovic and Morar   
2                 3                   Armstrong Inc   
3                 4                      Kohler Inc   
4                 5     Frami, Yundt and Macejkovic   
5                 6                        Mraz LLC   
6                 7  Padberg, Lueilwitz and Johnson   
7                 8                   Grant - Lakin   
8                 9                       Price LLC   
9                10                   Bosco - Grant   
10               11                Corkery - Nienow   
11               12              Keeling - Emmerich   
12               13       Hand, Krajcik and Kuhlman   
13               14           Hintz, Hayes and Will   
14               15    Emmerich, Rohan and Reynolds   
15               16    Hartmann, Franecki and Ratke   
16               17                      Kihn Group   
17        