# OUTBOUND 
#### DATA: ALL ORDERS WITH - KPI WITH INFO ABOUT SHIPMENT ALLOCATED BEFORE SCHEDULED TRANSPORT
#### GRANULARITY LEVEL: ORDER

## Parameters

In [56]:
# Parameter Cell
object_name="OUT_ORD_SALC"
instance_name="czpoh_2"


config_path = 'abfss://production@dscglblceedadlsppldnsazr.dfs.core.windows.net/config'
bronze_path = f'abfss://production@dscglblceedadlsppldnsazr.dfs.core.windows.net/bronze/blueyonder_wms/{instance_name}'
silver_path = f'abfss://production@dscglblceedadlsppldnsazr.dfs.core.windows.net/silver/{instance_name}'
gold_path = f'abfss://production@dscglblceedadlsppldnsazr.dfs.core.windows.net/gold/{instance_name}'

## Data Calculation Parameters

In [57]:
# cond_dict_KPI_FLG = {'NEW_COLUMN':'DW_KPI_RELEVANT_FLAG',
#                     'SOURCE_COLUMN':'USR_ID',
#                      'VALUE_O_LIST':[],
#                      'VALUE_1_LIST':['NOUSER','slInAdapter_111'],
#                     'DEFAULT_VALUE':0}

In [58]:
# column_order = ['WH_ID',
#                 'CLIENT_ID',
#                 'ORDNUM',
#                 'ADDDTE',
#                  'DW_MIN_TRNDTE',
#                  'DW_MAX_TRNDTE',
#                 'DW_CHANNEL',
#                 'DW_CATEGORY',
#                 'DW_SUBCATEGORY',
#                 'ORDTYP',
#                 'CPOTYP',
#                 'CPONUM',
#                 'STORE_TYP'
#                 ]

## Imports and Configuration

In [59]:
## code to make timestamps below 1900-01-01 work
spark.conf.set("spark.sql.legacy.parquet.int96RebaseModeInRead", "CORRECTED")
spark.conf.set("spark.sql.legacy.parquet.int96RebaseModeInWrite", "CORRECTED")
spark.conf.set("spark.sql.legacy.parquet.datetimeRebaseModeInRead", "CORRECTED")

In [60]:
import delta.tables as DT
import pyspark.sql.functions as F
import pyspark.sql.types as T
from datetime import datetime
from pyspark.sql.window import Window
from pyspark.sql.functions import row_number

##  Loading Source Tables from Bronze Layer

In [61]:
## ORDER related tables
df_ORDACT= (spark
.read
.format("delta")
.load(f"{bronze_path}/ORDACT")
.where("DW_DELETED_FLAG = FALSE AND ACTCOD IN ('SALC')")
)


df_ORD=(spark
.read
.format("delta")
.load(f"{bronze_path}/ORD")
.where('DW_DELETED_FLAG = FALSE AND CPOTYP = "50"')
)

df_ORD_NOTE=(spark
.read
.format("delta")
.load(f"{bronze_path}/ORD_NOTE")
.where('DW_DELETED_FLAG = FALSE AND NOTTXT like "%Cancelled due to store limit"')
)

df_SHIPMENT=(spark
.read
.format("delta")
.load(f"{bronze_path}/SHIPMENT")
.where('DW_DELETED_FLAG = FALSE AND SHPSTS <> "B"')
)

df_CSTMST=(spark
.read
.format("delta")
.load(f"{bronze_path}/CSTMST")
.where('DW_DELETED_FLAG = FALSE AND DW_VALID_FLAG = TRUE')
.select(F.expr('CSTNUM AS STCUST'),'CLIENT_ID','STORE_TYP')
)
# TRLR DISPATCH related tables


### 1.Take all orders with cpotyp = 50 and add info about store_typ

In [62]:
df_ORD_basic_info = df_ORD\
.select('WH_ID','CLIENT_ID','ORDNUM','ADDDTE', 'CPONUM','ORDTYP','CPOTYP','STCUST')\
.join(df_CSTMST, ['STCUST', 'CLIENT_ID'], how='left')

In [63]:
## commments
## df_ORD_basic_info.count()  2890295
## df_ORD_basic_info.where('STORE_TYP IS NULL').count() 101

### 2. Take shipment allocation for shipments that were not cancelled

In [64]:
df_ORDACT_SALC_time = df_ORDACT.join(df_SHIPMENT,['SHIP_ID','WH_ID'],how='inner')\
.groupBy('ORDNUM','CLIENT_ID','WH_ID')\
.agg(F.min('TRNDTE').alias('DW_MIN_TRNDTE'),
     F.max('TRNDTE').alias('DW_MAX_TRNDTE'))

## 3. Take info about the order cancellations due to store limit

In [65]:
## We take for every order that in ord_note had cancellation due to store limit,
## ordnum, wh_id, client_id, min(ins_dt), full concatenated note, 
df_notes = df_ORD_NOTE.groupBy('ORDNUM','WH_ID','CLIENT_ID')\
.agg(F.min('INS_DT').alias('DW_CANC_STORE_LIMIT_MIN_INS_DT'),
    F.concat_ws(", ", F.collect_set("NOTTXT")).alias("DW_FULL_NOTTXT"))\
.withColumn('DW_CANC_STORE_LIMIT_FLAG',F.lit(1))


## 4. Joining the info about order and allocation

In [67]:
df_OUT_ORD_SALC = df_ORD_basic_info\
.join(df_ORDACT_SALC_time,['ORDNUM','CLIENT_ID','WH_ID'], how='left')\
.join(df_notes,['ORDNUM','CLIENT_ID','WH_ID'], how='left')\
.select( 
    'WH_ID',
    'CLIENT_ID',
    'ORDNUM',
    'ADDDTE',
    'DW_MIN_TRNDTE',
    'DW_MAX_TRNDTE',
    F.when(F.col("CPOTYP").isin(["40","45"]), "B2C")# DW_CHANNEL
    .when(F.col("CPOTYP").isin(["50","66","30","20"]),"B2B" )
    .otherwise('NA').alias("DW_CHANNEL"),
    F.when(F.col("CPOTYP").isin(["66","30","20"]), "PRIO") # DW_CATEGORY
    .when(F.col("CPOTYP").isin(["50"]),    "NORMAL")
    .otherwise('NA').alias("DW_CATEGORY"),
    F.when(F.col("CPOTYP") == "66", "Two-Step-Cross-Docking") # DW_SUBCATEGORY
    .when(F.col("CPOTYP") == "30", "Replenishment")
    .when(F.col("CPOTYP") == "20", "KEP-Deliveries")
    .otherwise('NA').alias("DW_SUBCATEGORY"),
    'ORDTYP',
    'CPOTYP',
    'CPONUM',
    'STORE_TYP',
    "DW_FULL_NOTTXT",
    'DW_CANC_STORE_LIMIT_MIN_INS_DT',
    "DW_CANC_STORE_LIMIT_FLAG",
    F.date_format(F.date_sub(F.col('ADDDTE'),0),'yQQQ').alias('DW_PARTITION')
    
).fillna({"DW_CANC_STORE_LIMIT_FLAG":0})


In [68]:
df_OUT_ORD_SALC = df_OUT_ORD_SALC\
.withColumn('DW_KPI_RELEVANT_FLAG', F.when(F.col("DW_CANC_STORE_LIMIT_FLAG")==1,F.lit(1))
                                      .otherwise(F.lit(0)))

## Saving to Silver

In [69]:
df_OUT_ORD_SALC\
.coalesce(1)\
.write\
.format("delta")\
.mode("overwrite")\
.option("overwriteSchema", "true")\
.partitionBy('DW_PARTITION')\
.save(f"{silver_path}/{object_name}")