In [0]:
from pyspark.sql.functions import *
from pyspark.sql.window import *
from pyspark.sql.types import *

In [0]:
def MergeDeltaSingleKey(table_name, src_location, des_location, mode):
    print(f'Process: {table_name}')
    silver_dim_path = f"{des_location}/{table_name}"
    bronze_dim_path = f"{src_location}/{table_name}"

    # Source
    df = spark.read.format("parquet").load(f"/mnt/dw01bronze/Dim/pre_tx_{table_name}")
    df.createOrReplaceTempView("src")

    # Destination and Structure
    df = spark.read.format("delta").load(silver_dim_path)
    schema = df.schema
    primary_key = schema[1].name  # Assuming the second column is the Primary Key
    columns = [field.name for field in schema]

    update_clauses = ",\n        ".join([
        f"des.{col} = src.{col}" for col in columns
        if (col != primary_key)
        and (col != "CreatedBy")
        and (col != "CreatedDate")
        and (col != "ModifiedBy")
        and (col != "ModifiedDate")
        and (col[-3:] != "Key")
        and (col != "IsActive")
    ])
    insert_columns = ", ".join(columns[1:])
    insert_values = (
        ", ".join([f"src.{col}" for col in columns[1:-6]])
        + ", 1, src.Source, 'spark', CURRENT_TIMESTAMP, 'spark', CURRENT_TIMESTAMP"
    )

    query = f"""
    MERGE INTO delta.`{silver_dim_path}` AS des
    USING src AS src
    ON des.{primary_key} = src.{primary_key}
    WHEN MATCHED THEN
    UPDATE SET
        {update_clauses},
        des.ModifiedDate = CURRENT_TIMESTAMP,
        des.ModifiedBy = 'spark',
        des.IsActive = 1
    WHEN NOT MATCHED THEN
    INSERT (
        {insert_columns}
    )
    VALUES (
        {insert_values}
    );
    """

    if table_name == "DimMaterial":
        query = query.replace("src.MaterialDescription", "src.MaterialNameEN")

    if mode == "dev":
        print(query)

    if mode == "run":
        result = spark.sql(query)

        # Show the merge operation result summary

        print(result.select("num_affected_rows").collect()[0])
        print(result.select("num_updated_rows").collect()[0])
        print(result.select("num_deleted_rows").collect()[0])
        print(result.select("num_inserted_rows").collect()[0])
        


In [0]:
# List the table that you fucking want to use
dimensions = [
    "DimBrand",
    "DimClassOfTrade",
    "DimCompany",
    # "DimCustomer", This one also fucking didn't matched
    "DimCustomerGroup",
    "DimCustomerGroup5",
    "DimDistributionChannel",
    "DimDivision",
    # "DimMainProduct", This one got fucking extra transformation
    "DimMaterial", # This one use Fucking weird name from actual src need to do it manually 
    "DimProductBrand",
    "DimProductBrandGroup",
    "DimProductCategory",
    "DimProductHierarchy",
    # "DimProfitCenterData"
]

for i in dimensions:
    table_name = i
    print(f'MergeDeltaSingleKey("{i}","/mnt/dw01bronze/Dim/pre_tx_","/mnt/dw01silver/Dim/","run")')

MergeDeltaSingleKey("DimBrand","/mnt/dw01bronze/Dim/pre_tx_","/mnt/dw01silver/Dim/","run")
MergeDeltaSingleKey("DimClassOfTrade","/mnt/dw01bronze/Dim/pre_tx_","/mnt/dw01silver/Dim/","run")
MergeDeltaSingleKey("DimCompany","/mnt/dw01bronze/Dim/pre_tx_","/mnt/dw01silver/Dim/","run")
MergeDeltaSingleKey("DimCustomerGroup","/mnt/dw01bronze/Dim/pre_tx_","/mnt/dw01silver/Dim/","run")
MergeDeltaSingleKey("DimCustomerGroup5","/mnt/dw01bronze/Dim/pre_tx_","/mnt/dw01silver/Dim/","run")
MergeDeltaSingleKey("DimDistributionChannel","/mnt/dw01bronze/Dim/pre_tx_","/mnt/dw01silver/Dim/","run")
MergeDeltaSingleKey("DimDivision","/mnt/dw01bronze/Dim/pre_tx_","/mnt/dw01silver/Dim/","run")
MergeDeltaSingleKey("DimMaterial","/mnt/dw01bronze/Dim/pre_tx_","/mnt/dw01silver/Dim/","run")
MergeDeltaSingleKey("DimProductBrand","/mnt/dw01bronze/Dim/pre_tx_","/mnt/dw01silver/Dim/","run")
MergeDeltaSingleKey("DimProductBrandGroup","/mnt/dw01bronze/Dim/pre_tx_","/mnt/dw01silver/Dim/","run")
MergeDeltaSingleKey("DimP

In [0]:
MergeDeltaSingleKey("DimBrand","/mnt/dw01bronze/Dim/pre_tx_","/mnt/dw01silver/Dim/","run")
MergeDeltaSingleKey("DimClassOfTrade","/mnt/dw01bronze/Dim/pre_tx_","/mnt/dw01silver/Dim/","run")
MergeDeltaSingleKey("DimCompany","/mnt/dw01bronze/Dim/pre_tx_","/mnt/dw01silver/Dim/","run")
MergeDeltaSingleKey("DimCustomerGroup","/mnt/dw01bronze/Dim/pre_tx_","/mnt/dw01silver/Dim/","run")
MergeDeltaSingleKey("DimCustomerGroup5","/mnt/dw01bronze/Dim/pre_tx_","/mnt/dw01silver/Dim/","run")
MergeDeltaSingleKey("DimDistributionChannel","/mnt/dw01bronze/Dim/pre_tx_","/mnt/dw01silver/Dim/","run")
MergeDeltaSingleKey("DimDivision","/mnt/dw01bronze/Dim/pre_tx_","/mnt/dw01silver/Dim/","run")
MergeDeltaSingleKey("DimMaterial","/mnt/dw01bronze/Dim/pre_tx_","/mnt/dw01silver/Dim/","run")
MergeDeltaSingleKey("DimProductBrand","/mnt/dw01bronze/Dim/pre_tx_","/mnt/dw01silver/Dim/","run")
MergeDeltaSingleKey("DimProductBrandGroup","/mnt/dw01bronze/Dim/pre_tx_","/mnt/dw01silver/Dim/","run")
MergeDeltaSingleKey("DimProductCategory","/mnt/dw01bronze/Dim/pre_tx_","/mnt/dw01silver/Dim/","run")
MergeDeltaSingleKey("DimProductHierarchy","/mnt/dw01bronze/Dim/pre_tx_","/mnt/dw01silver/Dim/","run")


Process: DimBrand
Row(num_affected_rows=2280)
Row(num_updated_rows=2280)
Row(num_deleted_rows=0)
Row(num_inserted_rows=0)
Process: DimClassOfTrade
Row(num_affected_rows=239)
Row(num_updated_rows=239)
Row(num_deleted_rows=0)
Row(num_inserted_rows=0)
Process: DimCompany
Row(num_affected_rows=120)
Row(num_updated_rows=120)
Row(num_deleted_rows=0)
Row(num_inserted_rows=0)
Process: DimCustomerGroup
Row(num_affected_rows=77)
Row(num_updated_rows=77)
Row(num_deleted_rows=0)
Row(num_inserted_rows=0)
Process: DimCustomerGroup5
Row(num_affected_rows=160)
Row(num_updated_rows=160)
Row(num_deleted_rows=0)
Row(num_inserted_rows=0)
Process: DimDistributionChannel
Row(num_affected_rows=95)
Row(num_updated_rows=95)
Row(num_deleted_rows=0)
Row(num_inserted_rows=0)
Process: DimDivision
Row(num_affected_rows=64)
Row(num_updated_rows=64)
Row(num_deleted_rows=0)
Row(num_inserted_rows=0)
Process: DimMaterial
Row(num_affected_rows=391520)
Row(num_updated_rows=391520)
Row(num_deleted_rows=0)
Row(num_inserted_


#Column that cannot automate by the function, currently!!!!! get me?????

In [0]:
def read_fact_par(path):
    df = spark.read.format("parquet").load(path)
    print(f"Read from: {path}")
    return df
def gen_temp(DimTable):
    path_src = f'/mnt/dw01bronze/Dim/pre_tx_{DimTable}'
    path_des = path_src.replace('pre_tx_','')
    df = read_fact_par(path_src)
    df.createOrReplaceTempView("src")
    print(f'created temp view {DimTable}')


In [0]:
path_src = f'/mnt/dw01bronze/Dim/pre_tx_DimMainProduct'
path_des = path_src.replace('pre_tx_','')
df = read_fact_par(path_src)
df.createOrReplaceTempView('src_a')


Read from: /mnt/dw01bronze/Dim/pre_tx_DimMainProduct


In [0]:
%sql
CREATE OR REPLACE TEMP VIEW src AS
SELECT 
  CONCAT(MainProductGroupCode, '_', ProfitSupplyChainCode, '_', ProfitCategoryCode) AS MainProductCode,
  MainProductGroupCode,
  MAX(MainProductGroupName) AS MainProductGroupName,
  ProfitCategoryCode,
  ProfitSupplyChainCode,
  MAX(Source) AS Source,
  MAX(ModifiedDate) AS ModifiedDate
FROM src_a
GROUP BY MainProductGroupCode,ProfitSupplyChainCode,ProfitCategoryCode
ORDER BY MainProductCode;


In [0]:
%sql
MERGE INTO delta.`/mnt/dw01silver/Dim/DimMainProduct` AS des
USING src AS src
ON des.MainProductCode = src.MainProductCode
WHEN MATCHED THEN
  UPDATE
  SET
    des.MainProductGroupCode = src.MainProductGroupCode,
    des.MainProductGroupName = src.MainProductGroupName,
    des.ProfitCategoryCode = src.ProfitCategoryCode,
    des.ProfitSupplyChainCode = src.ProfitSupplyChainCode,
    des.IsActive = 1,
    des.Source = src.Source,
    des.ModifiedDate = CURRENT_TIMESTAMP,
    des.ModifiedBy = 'spark'
WHEN NOT MATCHED THEN
  INSERT (
    MainProductCode,
    MainProductGroupCode,
    MainProductGroupName,
    ProfitCategoryCode,
    ProfitSupplyChainCode,
    IsActive,
    Source,
    CreatedBy,
    CreatedDate,
    ModifiedBy,
    ModifiedDate
  )
  VALUES (
    src.MainProductCode,
    src.MainProductGroupCode,
    src.MainProductGroupName,
    src.ProfitCategoryCode,
    src.ProfitSupplyChainCode,
    1,
    src.Source,
    'spark',
    CURRENT_TIMESTAMP,
    'spark',
    CURRENT_TIMESTAMP
  );


num_affected_rows,num_updated_rows,num_deleted_rows,num_inserted_rows
90,0,0,90


In [0]:
gen_temp('DimCustomer')

Read from: /mnt/dw01bronze/Dim/pre_tx_DimCustomer
created temp view DimCustomer


In [0]:
%sql
MERGE INTO delta.`/mnt/dw01silver/Dim/DimCustomer` AS des
USING src AS src
ON des.CustomerCode = src.CustomerCode
WHEN MATCHED THEN
  UPDATE
  SET
    des.CustomerName = src.CustomerName,
    des.CustomerNameTH = src.CustomerNameTH,
    des.CustomerNameEN = src.CustomerNameEN,
    des.IsActive = 1,
    des.Source = src.Source,
    des.ModifiedDate = CURRENT_TIMESTAMP,
    des.ModifiedBy = 'spark'
WHEN NOT MATCHED THEN
  INSERT (
    CustomerCode,
    CustomerName,
    CustomerNameTH,
    CustomerNameEN,
    IsActive,
    Source,
    CreatedBy,
    CreatedDate,
    ModifiedBy,
    ModifiedDate
  )
  VALUES (
    src.CustomerCode,
    src.CustomerName,
    src.CustomerNameTH,
    src.CustomerNameEN,
    1,
    src.Source,
    'spark',
    CURRENT_TIMESTAMP,
    'spark',
    CURRENT_TIMESTAMP
  );

num_affected_rows,num_updated_rows,num_deleted_rows,num_inserted_rows
35240,35240,0,0


In [0]:
[i.name for i in dbutils.fs.ls('/mnt/dw01bronze/Dim/') if 'Profit' in i.name]

Out[10]: ['pre_tx_DimProfitCategory/',
 'pre_tx_DimProfitMap/',
 'vw_dm_DimProfitCategory/',
 'vw_dm_DimProfitMap/']

In [0]:
spark.read.format('parquet').load('/mnt/dw01bronze/Dim/pre_tx_DimProfitMap').display()

ControllingAreaCode,ProfitCenterCode,TradingPartner,CompanyCode,CompanyName,ProfitCenterName,SegmentCode,BusinessGroupCode,SupplyChainCode,ProfitSegmentCode,ProfitCategoryCode,ProfitBSCCode,ProfitCombineCode,ProfitSupplyChainCode,HFMCode0,HFMCode1,HFMCode2,HFMCode3,PurchasingOrgCode,SalesOrgCode,ProvisionSegmentCode,InventoryAgingTypeCode,ModifiedDate,Source
1000,1110,21001,1000,Thai Glass Industries PCL,Rajburana Glass,S01,PS,PB,SG101,CG101,BS101,CB101,SC1,1L0001,1L1001,1L2001,1L3001,4000,1000.0,TGI,,2024-12-27 17:14:01.417,SAP
1000,1210,21001,1000,Thai Glass Industries PCL,Bangplee Glass,S01,PS,PB,SG101,CG101,BS101,CB101,SC1,1L0001,1L1001,1L2001,1L3001,4000,1000.0,TGI,,2024-12-27 17:14:00.967,SAP
1000,1220,21001,1000,Thai Glass Industries PCL,Bangplee ACL,S01,PS,PB,SG101,CG101,BS101,CB101,SC1,1L0001,1L1001,1L2001,1L3001,4000,1000.0,TGI,,2024-12-27 17:14:00.967,SAP
1000,1910,21001,1000,Thai Glass Industries PCL,Central Profit Center,S01,PS,PB,SG101,CG101,BS101,CB101,SC1,1L0001,1L1001,1L2001,1L3001,4000,1000.0,TGI,,2024-12-27 17:14:00.970,SAP
1000,1910,25007,1400,Thai Malaya Glass Co Ltd,Central Profit Center,S01,PS,PB,SG101,CG101,BS101,CB101,SC1,1L0001,1L1001,1L2001,1L3001,4000,1400.0,TMG,,2024-12-27 17:14:01.463,SAP
1000,1940,25007,1400,Thai Malaya Glass Co Ltd,TMG Central Profit Center,S01,PS,PB,SG101,CG101,BS101,CB101,SC1,1L0001,1L1001,1L2001,1L3001,4000,1400.0,TMG,,2024-12-27 17:14:01.463,SAP
1000,2400,25011,2400,บีเจซี กลาส (ประเทศไทย),BJC Glass (Thailand),S01,PS,PB,SG101,CG101,BS101,CB101,SC1,1L0001,1L1001,1L2001,1L3001,,,,,2024-12-27 17:14:01.447,SAP
1000,2800,21008,2800,บีเจซี แพคเกจจิ้ง,BJC Packaging,S01,PS,PB,SG101,CG101,BS101,CB101,SC1,1L0001,1L1001,1L2001,1L3001,,,,,2024-12-27 17:14:00.973,SAP
1000,4010,25007,1400,Thai Malaya Glass Co Ltd,Thai Malaya Glass,S01,PS,PB,SG101,CG101,BS101,CB101,SC1,1L0001,1L1001,1L2001,1L3001,4000,1400.0,TMG,,2024-12-27 17:14:00.973,SAP
1000,4012,25007,1400,Thai Malaya Glass Co Ltd,TM2 BOI,S01,PS,PB,SG101,CG101,BS101,CB101,SC1,1L0001,1L1001,1L2001,1L3001,4000,1400.0,TMG,,2024-12-27 17:14:00.973,SAP


In [0]:
from pyspark.sql.functions import col, concat_ws, lit, current_timestamp, when

# Step 1: Read the source Parquet file
source_path = '/mnt/dw01bronze/Dim/pre_tx_DimProfitMap'
df = spark.read.format("parquet").load(source_path)

# Step 2: Implement the first query
query1_df = df.filter(col("ModifiedDate").isNotNull()) \
    .select(
        concat_ws("_", col("ControllingAreaCode"), col("ProfitCenterCode"), col("TradingPartner")).alias("ProfitCenterDataCode"),
        col("ControllingAreaCode"),
        col("ProfitCenterCode"),
        col("TradingPartner"),
        col("CompanyCode"),
        col("CompanyName"),
        col("ProfitCenterName"),
        col("SegmentCode"),
        col("BusinessGroupCode"),
        col("SupplyChainCode"),
        col("ProfitSegmentCode"),
        col("ProfitCategoryCode"),
        col("ProfitBSCCode"),
        col("ProfitCombineCode"),
        col("ProfitSupplyChainCode"),
        col("HFMCode0"),
        col("HFMCode1"),
        col("HFMCode2"),
        col("HFMCode3"),
        col("PurchasingOrgCode"),
        col("SalesOrgCode"),
        col("ProvisionSegmentCode"),
        col("InventoryAgingTypeCode"),
        col("Source"),
        lit("system").alias("ModifiedBy"),
        current_timestamp().alias("ModifiedDate"),
        col("ModifiedDate").alias("lastupdateDate")
    )

# Step 3: Implement the second query (self-join for CROSS APPLY equivalent)
query2_df = df.alias("Map1") \
    .join(
        df.alias("Map2"),
        (col("Map1.ControllingAreaCode") == col("Map2.ControllingAreaCode")) &
        (col("Map1.ProfitCenterCode") == col("Map2.ProfitCenterCode")) &
        (col("Map1.ControllingAreaCode").isNotNull() & (col("Map1.ControllingAreaCode") != "")),
        "inner"
    ) \
    .select(
        concat_ws("_", col("Map1.ControllingAreaCode"), col("Map1.ProfitCenterCode")).alias("ProfitCenterDataCode"),
        col("Map2.ControllingAreaCode"),
        col("Map2.ProfitCenterCode"),
        col("Map2.TradingPartner"),
        col("Map2.CompanyCode"),
        col("Map2.CompanyName"),
        col("Map2.ProfitCenterName"),
        col("Map2.SegmentCode"),
        col("Map2.BusinessGroupCode"),
        col("Map2.SupplyChainCode"),
        col("Map2.ProfitSegmentCode"),
        col("Map2.ProfitCategoryCode"),
        col("Map2.ProfitBSCCode"),
        col("Map2.ProfitCombineCode"),
        col("Map2.ProfitSupplyChainCode"),
        col("Map2.HFMCode0"),
        col("Map2.HFMCode1"),
        col("Map2.HFMCode2"),
        col("Map2.HFMCode3"),
        col("Map2.PurchasingOrgCode"),
        col("Map2.SalesOrgCode"),
        col("Map2.ProvisionSegmentCode"),
        col("Map2.InventoryAgingTypeCode"),
        col("Map2.Source"),
        lit("system").alias("ModifiedBy"),
        current_timestamp().alias("ModifiedDate"),
        col("Map2.ModifiedDate").alias("lastupdateDate")
    ).distinct()

# Step 4: Union the results
final_df = query1_df.union(query2_df)

# Step 5: Sort the result by ProfitCenterDataCode
final_df = final_df.orderBy("ProfitCenterDataCode")

# Show the result
final_df.show(truncate=False)


+--------------------+-------------------+----------------+--------------+-----------+-------------------------+-------------------------+-----------+-----------------+---------------+-----------------+------------------+-------------+-----------------+---------------------+--------+--------+--------+--------+-----------------+------------+--------------------+----------------------+------+----------+-----------------------+-----------------------+
|ProfitCenterDataCode|ControllingAreaCode|ProfitCenterCode|TradingPartner|CompanyCode|CompanyName              |ProfitCenterName         |SegmentCode|BusinessGroupCode|SupplyChainCode|ProfitSegmentCode|ProfitCategoryCode|ProfitBSCCode|ProfitCombineCode|ProfitSupplyChainCode|HFMCode0|HFMCode1|HFMCode2|HFMCode3|PurchasingOrgCode|SalesOrgCode|ProvisionSegmentCode|InventoryAgingTypeCode|Source|ModifiedBy|ModifiedDate           |lastupdateDate         |
+--------------------+-------------------+----------------+--------------+-----------+--------

In [0]:
from pyspark.sql.functions import col, max

# Deduplicate final_df
deduplicated_df = final_df.groupBy("ProfitCenterDataCode").agg(
    max(col("ControllingAreaCode")).alias("ControllingAreaCode"),
    max(col("ProfitCenterCode")).alias("ProfitCenterCode"),
    max(col("TradingPartner")).alias("TradingPartner"),
    max(col("CompanyCode")).alias("CompanyCode"),
    max(col("CompanyName")).alias("CompanyName"),
    max(col("ProfitCenterName")).alias("ProfitCenterName"),
    max(col("SegmentCode")).alias("SegmentCode"),
    max(col("BusinessGroupCode")).alias("BusinessGroupCode"),
    max(col("SupplyChainCode")).alias("SupplyChainCode"),
    max(col("ProfitSegmentCode")).alias("ProfitSegmentCode"),
    max(col("ProfitCategoryCode")).alias("ProfitCategoryCode"),
    max(col("ProfitBSCCode")).alias("ProfitBSCCode"),
    max(col("ProfitCombineCode")).alias("ProfitCombineCode"),
    max(col("ProfitSupplyChainCode")).alias("ProfitSupplyChainCode"),
    max(col("HFMCode0")).alias("HFMCode0"),
    max(col("HFMCode1")).alias("HFMCode1"),
    max(col("HFMCode2")).alias("HFMCode2"),
    max(col("HFMCode3")).alias("HFMCode3"),
    max(col("PurchasingOrgCode")).alias("PurchasingOrgCode"),
    max(col("SalesOrgCode")).alias("SalesOrgCode"),
    max(col("ProvisionSegmentCode")).alias("ProvisionSegmentCode"),
    max(col("InventoryAgingTypeCode")).alias("InventoryAgingTypeCode"),
    max(col("Source")).alias("Source"),
    max(col("ModifiedBy")).alias("ModifiedBy"),
    max(col("ModifiedDate")).alias("ModifiedDate")
)


In [0]:
delta_table_path = "/mnt/dw01silver/Dim/DimProfitCenterData"

# Read the Delta table (destination table)
delta_table = spark.read.format("delta").load(delta_table_path)

# Perform the MERGE operation
deduplicated_df.createOrReplaceTempView("staging_data")

merge_query = f"""
MERGE INTO delta.`{delta_table_path}` AS target
USING staging_data AS source
ON target.ProfitCenterDataCode = source.ProfitCenterDataCode
WHEN MATCHED THEN
  UPDATE SET
    target.ControllingAreaCode = source.ControllingAreaCode,
    target.ProfitCenterCode = source.ProfitCenterCode,
    target.TradingPartner = source.TradingPartner,
    target.CompanyCode = source.CompanyCode,
    target.CompanyName = source.CompanyName,
    target.ProfitCenterName = source.ProfitCenterName,
    target.SegmentCode = source.SegmentCode,
    target.BusinessGroupCode = source.BusinessGroupCode,
    target.SupplyChainCode = source.SupplyChainCode,
    target.ProfitSegmentCode = source.ProfitSegmentCode,
    target.ProfitCategoryCode = source.ProfitCategoryCode,
    target.ProfitBSCCode = source.ProfitBSCCode,
    target.ProfitCombineCode = source.ProfitCombineCode,
    target.ProfitSupplyChainCode = source.ProfitSupplyChainCode,
    target.HFMCode0 = source.HFMCode0,
    target.HFMCode1 = source.HFMCode1,
    target.HFMCode2 = source.HFMCode2,
    target.HFMCode3 = source.HFMCode3,
    target.PurchasingOrgCode = source.PurchasingOrgCode,
    target.SalesOrgCode = source.SalesOrgCode,
    target.ProvisionSegmentCode = source.ProvisionSegmentCode,
    target.InventoryAgingTypeCode = source.InventoryAgingTypeCode,
    target.Source = source.Source,
    target.ModifiedBy = source.ModifiedBy,
    target.ModifiedDate = source.ModifiedDate
WHEN NOT MATCHED THEN
  INSERT (
    ProfitCenterDataCode,
    ControllingAreaCode,
    ProfitCenterCode,
    TradingPartner,
    CompanyCode,
    CompanyName,
    ProfitCenterName,
    SegmentCode,
    BusinessGroupCode,
    SupplyChainCode,
    ProfitSegmentCode,
    ProfitCategoryCode,
    ProfitBSCCode,
    ProfitCombineCode,
    ProfitSupplyChainCode,
    HFMCode0,
    HFMCode1,
    HFMCode2,
    HFMCode3,
    PurchasingOrgCode,
    SalesOrgCode,
    ProvisionSegmentCode,
    InventoryAgingTypeCode,
    Source,
    ModifiedBy,
    ModifiedDate
  ) VALUES (
    source.ProfitCenterDataCode,
    source.ControllingAreaCode,
    source.ProfitCenterCode,
    source.TradingPartner,
    source.CompanyCode,
    source.CompanyName,
    source.ProfitCenterName,
    source.SegmentCode,
    source.BusinessGroupCode,
    source.SupplyChainCode,
    source.ProfitSegmentCode,
    source.ProfitCategoryCode,
    source.ProfitBSCCode,
    source.ProfitCombineCode,
    source.ProfitSupplyChainCode,
    source.HFMCode0,
    source.HFMCode1,
    source.HFMCode2,
    source.HFMCode3,
    source.PurchasingOrgCode,
    source.SalesOrgCode,
    source.ProvisionSegmentCode,
    source.InventoryAgingTypeCode,
    source.Source,
    source.ModifiedBy,
    source.ModifiedDate
  )
"""

# Execute the MERGE statement
spark.sql(merge_query)


Out[16]: DataFrame[num_affected_rows: bigint, num_updated_rows: bigint, num_deleted_rows: bigint, num_inserted_rows: bigint]

In [0]:

spark.sql(f"SELECT * FROM delta.`{delta_table_path}`").display()


ProfitCenterDataKey,ProfitCenterDataCode,ControllingAreaCode,ProfitCenterCode,TradingPartner,CompanyCode,CompanyName,ProfitCenterName,SegmentCode,BusinessGroupCode,SupplyChainCode,ProfitSegmentCode,ProfitCategoryCode,ProfitBSCCode,ProfitCombineCode,ProfitSupplyChainCode,HFMCode0,HFMCode1,HFMCode2,HFMCode3,PurchasingOrgCode,SalesOrgCode,ProvisionSegmentCode,InventoryAgingTypeCode,BusinessGroupName,SupplyChainName,ProfitSegmentShortName,ProfitSegmentName,ProfitCategoryShortName,ProfitCategoryName,ProfitBSCShortName,ProfitBSCName,ProfitCombineShortName,ProfitCombineName,ProfitSupplyChainName,ProfitSupplyChainShortName,ProvisionSegmentShortName,ProvisionSegmentName,HFMShortName0,HFMName0,HFMShortName1,HFMName1,HFMShortName2,HFMName2,HFMShortName3,HFMName3,ProfitCenterFilter,ProfitPnLFlag,IsActive,Source,CreatedBy,CreatedDate,ModifiedBy,ModifiedDate
0.0,1000_1110,1000,1110,21001.0,1000,Thai Glass Industries PCL,Rajburana Glass,S01,PS,PB,SG101,CG101,BS101,CB101,SC1,1L0001,1L1001,1L2001,1L3001,4000,1000.0,TGI,,Packaging Supply Chain (PSC),Packaging Supply Chain,GLASS TH,GLASS TH,GLASS TH,GLASS THAILAND,GLASS TH,GLASS TH,GLASS,GLASS,PACKAGING BUSINESS,PB,TGI,GLASS TH,PSC_TOT,PSC_TOT,GLASS,GLASS,GLASS,GLASS,GLASS,GLASS,1110 - Rajburana Glass,False,True,SAP,system,2567-07-31T15:48:05.007+0000,system,2025-01-18T07:40:21.823+0000
1.0,1000_1110_21001,1000,1110,21001.0,1000,Thai Glass Industries PCL,Rajburana Glass,S01,PS,PB,SG101,CG101,BS101,CB101,SC1,1L0001,1L1001,1L2001,1L3001,4000,1000.0,TGI,,Packaging Supply Chain (PSC),Packaging Supply Chain,GLASS TH,GLASS TH,GLASS TH,GLASS THAILAND,GLASS TH,GLASS TH,GLASS,GLASS,PACKAGING BUSINESS,PB,TGI,GLASS TH,PSC_TOT,PSC_TOT,GLASS,GLASS,GLASS,GLASS,GLASS,GLASS,1110 - Rajburana Glass,False,True,SAP,system,2567-07-31T15:48:05.007+0000,system,2025-01-18T07:40:21.823+0000
2.0,1000_1210,1000,1210,21001.0,1000,Thai Glass Industries PCL,Bangplee Glass,S01,PS,PB,SG101,CG101,BS101,CB101,SC1,1L0001,1L1001,1L2001,1L3001,4000,1000.0,TGI,,Packaging Supply Chain (PSC),Packaging Supply Chain,GLASS TH,GLASS TH,GLASS TH,GLASS THAILAND,GLASS TH,GLASS TH,GLASS,GLASS,PACKAGING BUSINESS,PB,TGI,GLASS TH,PSC_TOT,PSC_TOT,GLASS,GLASS,GLASS,GLASS,GLASS,GLASS,1210 - Bangplee Glass,False,True,SAP,system,2567-07-31T15:48:05.007+0000,system,2025-01-18T07:40:21.823+0000
3.0,1000_1210_21001,1000,1210,21001.0,1000,Thai Glass Industries PCL,Bangplee Glass,S01,PS,PB,SG101,CG101,BS101,CB101,SC1,1L0001,1L1001,1L2001,1L3001,4000,1000.0,TGI,,Packaging Supply Chain (PSC),Packaging Supply Chain,GLASS TH,GLASS TH,GLASS TH,GLASS THAILAND,GLASS TH,GLASS TH,GLASS,GLASS,PACKAGING BUSINESS,PB,TGI,GLASS TH,PSC_TOT,PSC_TOT,GLASS,GLASS,GLASS,GLASS,GLASS,GLASS,1210 - Bangplee Glass,False,True,SAP,system,2567-07-31T15:48:05.007+0000,system,2025-01-18T07:40:21.823+0000
4.0,1000_1220,1000,1220,21001.0,1000,Thai Glass Industries PCL,Bangplee ACL,S01,PS,PB,SG101,CG101,BS101,CB101,SC1,1L0001,1L1001,1L2001,1L3001,4000,1000.0,TGI,,Packaging Supply Chain (PSC),Packaging Supply Chain,GLASS TH,GLASS TH,GLASS TH,GLASS THAILAND,GLASS TH,GLASS TH,GLASS,GLASS,PACKAGING BUSINESS,PB,TGI,GLASS TH,PSC_TOT,PSC_TOT,GLASS,GLASS,GLASS,GLASS,GLASS,GLASS,1220 - Bangplee ACL,False,True,SAP,system,2567-07-31T15:48:05.007+0000,system,2025-01-18T07:40:21.823+0000
5.0,1000_1220_21001,1000,1220,21001.0,1000,Thai Glass Industries PCL,Bangplee ACL,S01,PS,PB,SG101,CG101,BS101,CB101,SC1,1L0001,1L1001,1L2001,1L3001,4000,1000.0,TGI,,Packaging Supply Chain (PSC),Packaging Supply Chain,GLASS TH,GLASS TH,GLASS TH,GLASS THAILAND,GLASS TH,GLASS TH,GLASS,GLASS,PACKAGING BUSINESS,PB,TGI,GLASS TH,PSC_TOT,PSC_TOT,GLASS,GLASS,GLASS,GLASS,GLASS,GLASS,1220 - Bangplee ACL,False,True,SAP,system,2567-07-31T15:48:05.007+0000,system,2025-01-18T07:40:21.823+0000
6.0,1000_1910,1000,1910,25007.0,1400,Thai Malaya Glass Co Ltd,Central Profit Center,S01,PS,PB,SG101,CG101,BS101,CB101,SC1,1L0001,1L1001,1L2001,1L3001,4000,1400.0,TMG,,Packaging Supply Chain (PSC),Packaging Supply Chain,GLASS TH,GLASS TH,GLASS TH,GLASS THAILAND,GLASS TH,GLASS TH,GLASS,GLASS,PACKAGING BUSINESS,PB,TGI,GLASS TH,PSC_TOT,PSC_TOT,GLASS,GLASS,GLASS,GLASS,GLASS,GLASS,1910 - Central Profit Center,False,True,SAP,system,2567-07-31T15:48:05.007+0000,system,2025-01-18T07:40:21.823+0000
7.0,1000_1910_21001,1000,1910,21001.0,1000,Thai Glass Industries PCL,Central Profit Center,S01,PS,PB,SG101,CG101,BS101,CB101,SC1,1L0001,1L1001,1L2001,1L3001,4000,1000.0,TGI,,Packaging Supply Chain (PSC),Packaging Supply Chain,GLASS TH,GLASS TH,GLASS TH,GLASS THAILAND,GLASS TH,GLASS TH,GLASS,GLASS,PACKAGING BUSINESS,PB,TGI,GLASS TH,PSC_TOT,PSC_TOT,GLASS,GLASS,GLASS,GLASS,GLASS,GLASS,1910 - Central Profit Center,False,True,SAP,system,2567-07-31T15:48:05.007+0000,system,2025-01-18T07:40:21.823+0000
8.0,1000_1910_25007,1000,1910,25007.0,1400,Thai Malaya Glass Co Ltd,Central Profit Center,S01,PS,PB,SG101,CG101,BS101,CB101,SC1,1L0001,1L1001,1L2001,1L3001,4000,1400.0,TMG,,Packaging Supply Chain (PSC),Packaging Supply Chain,GLASS TH,GLASS TH,GLASS TH,GLASS THAILAND,GLASS TH,GLASS TH,GLASS,GLASS,PACKAGING BUSINESS,PB,TMG,GLASS TH,PSC_TOT,PSC_TOT,GLASS,GLASS,GLASS,GLASS,GLASS,GLASS,1910 - Central Profit Center,False,True,SAP,system,2567-07-31T15:48:05.007+0000,system,2025-01-18T07:40:21.823+0000
9.0,1000_1940,1000,1940,25007.0,1400,Thai Malaya Glass Co Ltd,TMG Central Profit Center,S01,PS,PB,SG101,CG101,BS101,CB101,SC1,1L0001,1L1001,1L2001,1L3001,4000,1400.0,TMG,,Packaging Supply Chain (PSC),Packaging Supply Chain,GLASS TH,GLASS TH,GLASS TH,GLASS THAILAND,GLASS TH,GLASS TH,GLASS,GLASS,PACKAGING BUSINESS,PB,TMG,GLASS TH,PSC_TOT,PSC_TOT,GLASS,GLASS,GLASS,GLASS,GLASS,GLASS,1940 - TMG Central Profit Center,False,True,SAP,system,2567-07-31T15:48:05.007+0000,system,2025-01-18T07:40:21.823+0000
