In [1]:
# Parameter Cell
object_name="INB_ORDER_DF"
instance_name="czpoh_2"

config_path = 'abfss://development@dscglblceedadlstpldnsazr.dfs.core.windows.net/config'
bronze_path = f'abfss://development@dscglblceedadlstpldnsazr.dfs.core.windows.net/bronze/blueyonder_wms/{instance_name}'
silver_path = f'abfss://development@dscglblceedadlstpldnsazr.dfs.core.windows.net/silver/{instance_name}'
gold_path = f'abfss://development@dscglblceedadlstpldnsazr.dfs.core.windows.net/gold/{instance_name}'

## Imports


In [2]:
#imports
import delta.tables as DT
import pyspark.sql.functions as F
import pyspark.sql.types as T
from datetime import *
from pyspark.sql.types import StringType,ArrayType

## Loading Data


In [3]:
# load tables that we need (trlr, rcvinv,rcvtrk,inbqlt,usr_rcpt_line_note, calmst)
# Base tables
TRLR_string = "TRLR"
RCVTRK_string = "RCVTRK" #1:1
RCVINV_string = "RCVINV" # 1:n
USR_RCPT_LINE_NOTE_string = "USR_RCPT_LINE_NOTE" #link with rcvinv
USR_RCPT_NOTE_string='USR_RCPT_NOTE'

#Additional information tables
ADRMST_string = "ADRMST"
CARHDR_string = "CARHDR"
SUPMST_string = 'SUPMST'
INBQLT_string = 'INBQLT'


df_TRLR= (spark
.read
.format("delta")
.load(f"{bronze_path}/{TRLR_string}")
.where("TRLR_COD='RCV'")
.select('TRLR_ID','CARCOD','TRLR_NUM','MODDTE','MOD_USR_ID','DW_SKID_CARHDR','DW_BKID_TRLR')) # 'DW_REFFERENCE_DATE'

df_RCVTRK= (spark
.read
.format("delta")
.load(f"{bronze_path}/RCVTRK")
.select('WH_ID','TRKNUM', 'TRLR_ID', 'TRKREF', 'RCVTRK_STAT')
)

df_RCVINV= (spark
.read
.format("delta")
.load(f"{bronze_path}/{RCVINV_string}")
.select('CLIENT_ID', 'COMPLETED_DATE', 'CSTMS_TYP', 'DOC_NUM', 'INVNUM', 'INVTYP', 'ORGREF', "PO_NUM", 'SADNUM', 'TRKNUM', 
'WAYBIL', 'WH_ID', 'DW_BKID_RCVINV', 'DW_SKID_SUPMST')
)

df_RCVLIN= (spark
.read
.format("delta")
.load(f"{bronze_path}/RCVLIN"))

df_USR_RCPT_LINE_NOTE= (spark
.read
.format("delta")
.load(f"{bronze_path}/{USR_RCPT_LINE_NOTE_string}")
.where("NOTTYP = 'REFNR2POS' and DW_VALID_FLAG = TRUE")
.select( 'WH_ID', 'NOTTXT', 'CLIENT_ID', 'INVNUM'))

df_USR_RCPT_NOTE= (spark
.read
.format("delta")
.load(f"{bronze_path}/{USR_RCPT_NOTE_string}")
.where("NOTTYP = 'ABSLRNAME' and DW_VALID_FLAG = TRUE")
.select( 'WH_ID', 'NOTTXT', 'CLIENT_ID', 'INVNUM')
.withColumnRenamed('NOTTXT','DW_VENDOR_NAME'))

df_ADRMST= (spark
.read
.format("delta")
.load(f"{bronze_path}/{ADRMST_string}")
.where("DW_VALID_FLAG = TRUE")
.select('ADR_ID', 'CTRY_NAME', 'ADRNAM'))

df_CARHDR= (spark
.read
.format("delta")
.load(f"{bronze_path}/{CARHDR_string}")
.where("DW_VALID_FLAG = TRUE")
.select('DW_SKID_CARHDR', 'CARNAM')
)

df_SUPMST= (spark
.read
.format("delta")
.load(f"{bronze_path}/{SUPMST_string}")
.where("DW_VALID_FLAG = TRUE")
.select('DW_SKID_SUPMST', 'ADR_ID', 'SUPNUM'))

df_INBQLT= (spark
.read
.format("delta")
.load(f"{bronze_path}/{INBQLT_string}")
.where("IB_ISSUE like 'DCP%'")
.select('WH_ID','TRKNUM','IB_ISSUE')
.distinct())


## columns to select

In [4]:
columns =[ 'DW_BKID_INBOUND_ORDER',
             'WH_ID',
             'INVNUM',
             'INVTYP',
             'TRLR_ID',
             'RCVTRK_STAT',
             'DW_CHANNEL',
             'DW_CATEGORY',
             'DW_VENDOR_NAME',
             F.expr('COMPLETED_DATE as END_DATE'),
             'DW_REFERENCE_DTE',
             'DW_IB_ISSUE_SET',
             'DW_NOTTXT_SET',       
             'KPI_RELEVANT_FLG',
             'DW_NBR_LINES',
             'DW_NBR_ITEMS',
             'DW_QTY_EXP',
             'DW_QTY_REC']

In [5]:
KPI_relevant_list = "'DCP01', 'DCP02', 'DCP04', 'DCP14', 'DCP15', 'DCP16', 'DCP18', 'DCP19', 'DCP20', 'DCP21', 'DCP50', 'DCP99'"
NORMAL_list = "'05','06' "
PRIO_list ="'07','08', '09' "

## joins


In [6]:
#modify and select from INBQLT and USR_RCPT_LINE_NOTE  to make it joinable
joinable_df_INBQLT = df_INBQLT.groupBy('TRKNUM','WH_ID').agg(F.collect_list('IB_ISSUE').alias("DW_IB_ISSUE_SET"))
joinable_USR_RCPT_LINE_NOTE = df_USR_RCPT_LINE_NOTE.groupBy('WH_ID','INVNUM',"CLIENT_ID").agg(F.collect_set('NOTTXT').alias("DW_NOTTXT_SET"))

In [7]:
df_INBOUND_ORDER=(df_RCVINV
.join(df_RCVTRK, ['TRKNUM','WH_ID'], 'left')
.join(df_TRLR, ['TRLR_ID'], 'left')
.join(joinable_df_INBQLT,['TRKNUM','WH_ID'], 'left').alias("INBOUND")
.join(joinable_USR_RCPT_LINE_NOTE,[df_RCVINV.WH_ID == joinable_USR_RCPT_LINE_NOTE.WH_ID, df_RCVINV.CLIENT_ID == joinable_USR_RCPT_LINE_NOTE.CLIENT_ID, df_RCVINV.PO_NUM == joinable_USR_RCPT_LINE_NOTE.INVNUM],'left')
.selectExpr('INBOUND.*', 'DW_NOTTXT_SET')
.join(df_SUPMST, ['DW_SKID_SUPMST'], 'left')
.join(df_ADRMST, ['ADR_ID'], 'left')
.join(df_CARHDR, ['DW_SKID_CARHDR'],'left').alias("INBOUND")
.join(df_USR_RCPT_NOTE,[df_RCVINV.WH_ID == df_USR_RCPT_NOTE.WH_ID, df_RCVINV.CLIENT_ID == df_USR_RCPT_NOTE.CLIENT_ID, df_RCVINV.PO_NUM == df_USR_RCPT_NOTE.INVNUM],'left')
.selectExpr('INBOUND.*', 'DW_VENDOR_NAME')
.withColumn('DW_BKID_INBOUND_ORDER', F.concat(*['TRKNUM', 'INVNUM',"WH_ID"])))

## calculations and column addition  



In [8]:
df_INBOUND_ORDER = (df_INBOUND_ORDER
.select('*',
F.lit("ALL").alias('DW_CHANNEL'),
F.col('COMPLETED_DATE').cast(T.DateType()).alias('DW_REFERENCE_DTE'))
.withColumn('DW_CATEGORY',
    F.when(F.array_intersect(F.col('DW_NOTTXT_SET'),F.expr(f"array({NORMAL_list})"))!=F.expr("array()"),'NORMAL')
      .when(F.array_intersect(F.col('DW_NOTTXT_SET'),F.expr(f"array({PRIO_list})"))!=F.expr("array()"),'PRIO'))
.withColumn('KPI_RELEVANT_FLG',  
    F.when((F.array_intersect(F.col('DW_IB_ISSUE_SET'),F.expr(f"array({KPI_relevant_list})"))==F.expr('array()')) | F.expr('DW_IB_ISSUE_SET IS NULL'), 1).otherwise(0)))



## Getting the Facts using RCVLIN

In [9]:
df_inord = (df_RCVLIN
.groupBy('TRKNUM','INVNUM',"WH_ID")
.agg(F.countDistinct('PRTNUM').alias('DW_NBR_LINES'), 
F.count('PRTNUM').alias('DW_NBR_ITEMS'), 
F.sum('EXPQTY').cast(T.IntegerType()).alias('DW_QTY_EXP'), 
F.sum('RCVQTY').cast(T.IntegerType()).alias('DW_QTY_REC'))
.withColumn('DW_BKID_INBOUND_ORDER', F.concat(*['TRKNUM', 'INVNUM',"WH_ID"])))


In [10]:
df_INBOUND = (df_INBOUND_ORDER
.join(df_inord,['DW_BKID_INBOUND_ORDER','WH_ID','INVNUM','TRKNUM'])
.select(columns))

## Saving to silver

In [11]:
(df_INBOUND
.write\
.format("delta")
.mode("overwrite")
.option("overwriteSchema", "true")
.save(f"{silver_path}/{object_name}"))