In [0]:
%sql
refresh table dp_inventory.plant_dim;
refresh table dp_inventory.inv_fct_na;
refresh table dp_inventory.inv_fct_la;
refresh table dp_inventory.inv_fct_asia;
refresh table dp_inventory.inv_fct_emea;
refresh table dp_inventory.conversion_dim;
refresh table dp_inventory.matl_dim;
refresh table dp_inventory.val_class_dim;
refresh table dp_masterdata_g11.marm_units_of_measure_for_material;
refresh table dp_inventory.tdc_val_dim;

In [0]:
spark.conf.set("spark.sql.adaptive.enabled","true")
spark.conf.set("spark.sql.adaptive.coalescePartitions.enabled","true")
spark.conf.set("spark.sql.adaptive.skewJoin.enabled","true")

In [0]:
from pyspark import StorageLevel
from pyspark.sql.functions import col

In [0]:
inv_fct_na_df=spark.sql(
  """select calendar_date,calendar_year_month,complex_decomposition,region,msu_total_plant_stock,msu_mrp_available_csp_agg,msu_safety_stock,msu_quality_inspection,msu_stock_in_transit,usd_total_plant_stock,usd_mrp_available_csp_agg,usd_safety_stock,usd_quality_inspection,usd_stock_in_transit,raw_total_plant_stock,raw_mrp_available_csp_agg,raw_safety_stock,raw_quality_inspection,raw_stock_in_transit,buom_total_plant_stock,tdc_val,material,valuation_class,currency_key,plant,"inv_fct_na" fct_type from dp_inventory.inv_fct_na
  WHERE calendar_date BETWEEN CAST( DATEADD(YEAR, -2, current_date) AS DATE) AND current_date
""")

In [0]:
inv_fct_la_df=spark.sql("""select calendar_date,calendar_year_month,complex_decomposition,region,msu_total_plant_stock,msu_mrp_available_csp_agg,msu_safety_stock,msu_quality_inspection,msu_stock_in_transit,usd_total_plant_stock,usd_mrp_available_csp_agg,usd_safety_stock,usd_quality_inspection,usd_stock_in_transit,raw_total_plant_stock,raw_mrp_available_csp_agg,raw_safety_stock,raw_quality_inspection,raw_stock_in_transit,buom_total_plant_stock,tdc_val,material,valuation_class,currency_key,plant,"inv_fct_la" fct_type from dp_inventory.inv_fct_la
WHERE calendar_date BETWEEN CAST( DATEADD(YEAR, -2, current_date) AS DATE) AND current_date
""")

In [0]:
inv_fct_asia_df=spark.sql("""select calendar_date,calendar_year_month,complex_decomposition,region,msu_total_plant_stock,msu_mrp_available_csp_agg,msu_safety_stock,msu_quality_inspection,msu_stock_in_transit,usd_total_plant_stock,usd_mrp_available_csp_agg,usd_safety_stock,usd_quality_inspection,usd_stock_in_transit,raw_total_plant_stock,raw_mrp_available_csp_agg,raw_safety_stock,raw_quality_inspection,raw_stock_in_transit,buom_total_plant_stock,tdc_val,material,valuation_class,currency_key,plant,"inv_fct_asia" fct_type from dp_inventory.inv_fct_asia
WHERE calendar_date BETWEEN CAST( DATEADD(YEAR, -2, current_date) AS DATE) AND current_date
""")

In [0]:
inv_fct_emea_df=spark.sql("""select calendar_date,calendar_year_month,complex_decomposition,region,msu_total_plant_stock,msu_mrp_available_csp_agg,msu_safety_stock,msu_quality_inspection,msu_stock_in_transit,usd_total_plant_stock,usd_mrp_available_csp_agg,usd_safety_stock,usd_quality_inspection,usd_stock_in_transit,raw_total_plant_stock,raw_mrp_available_csp_agg,raw_safety_stock,raw_quality_inspection,raw_stock_in_transit,buom_total_plant_stock,tdc_val,material,valuation_class,currency_key,plant,"inv_fct_emea" fct_type from dp_inventory.inv_fct_emea
WHERE calendar_date BETWEEN CAST( DATEADD(YEAR, -2, current_date) AS DATE) AND current_date
""")

In [0]:
from functools import reduce
from pyspark.sql import DataFrame

dfs = [inv_fct_na_df, inv_fct_la_df,inv_fct_asia_df,inv_fct_emea_df]
union_df = reduce(lambda x, y: x.unionByName(y, allowMissingColumns=True), dfs)

In [0]:
union_df=union_df.repartition(200)

In [0]:
union_df.count()

In [0]:
#test

In [0]:
union_df = union_df.persist(StorageLevel.MEMORY_AND_DISK )

In [0]:
union_df.createOrReplaceTempView("union_table")

In [0]:
inv_fct_df= spark.sql("""SELECT * FROM union_table fact WHERE fact.complex_decomposition in ("C", "S") AND (
    fact.msu_total_plant_stock != 0
    or fact.msu_mrp_available_csp_agg != 0
    or fact.msu_safety_stock != 0
    or fact.usd_total_plant_stock != 0
    or fact.usd_mrp_available_csp_agg != 0
    or fact.usd_safety_stock != 0
  ) 
    AND calendar_date BETWEEN CAST( DATEADD(YEAR, -2, current_date) AS DATE) AND current_date
  """)

In [0]:
inv_fct_df.count()

In [0]:
# Test 

In [0]:
inv_fct_df.createOrReplaceTempView("stg_inv_fct") 

In [0]:
#daily

In [0]:
daily_stg_output_df = spark.sql("""select
  fact.calendar_date,
  fact.calendar_year_month,
  "" as `fact.material`,
  "" as `matl.material_text`,
  matl.material_group,
  matl.material_group_text,
  matl.material_type,
  matl.segment,
  matl.global_market_segment,
  matl_dim.apo_logical_product_as,
  matl_dim.apo_logical_product_eu,
  matl_dim.apo_logical_product_la,
  matl_dim.apo_logical_product_na,
  matl_dim.sop_family_as,
  matl_dim.sop_family_eu,
  matl_dim.sop_family_la,
  matl_dim.sop_family_na,
  val.material_type_derived,
  val.material_type_derived_text,
  -- rds.prod_7_long_name,
  tdc.tdc_val_text,
  tdc.category,
  fact.complex_decomposition,
  fact.region,
  "" as `plant`,
  "" as `plant_text`,
  "" as `country`,
  "" as `country_text`,
  "" as `geographic_group`,
  "isc_daily" as `data_type`,
  
  sum(fact.msu_total_plant_stock) as `msu_total_plant_stock`,
  sum(fact.msu_mrp_available_csp_agg) as `msu_mrp_available_csp_agg`,
  sum(fact.msu_safety_stock) as `msu_safety_stock`,
  sum(fact.msu_quality_inspection) as `msu_quality_inspection`,
  sum(fact.msu_stock_in_transit) as `msu_stock_in_transit`,
  
  sum(fact.usd_total_plant_stock) as `usd_total_plant_stock`,
  sum(fact.usd_mrp_available_csp_agg) as `usd_mrp_available_csp_agg`,
  sum(fact.usd_safety_stock) as `usd_safety_stock`,
  sum(fact.usd_quality_inspection) as `usd_quality_inspection`,
  sum(fact.usd_stock_in_transit) as `usd_stock_in_transit`,
  
  sum(fact.raw_total_plant_stock * ifnull(conv.pyjune_factor, 1)) as `usd_total_plant_stock_py_june_30_rate`,
  sum(fact.raw_mrp_available_csp_agg * ifnull(conv.pyjune_factor, 1)) as `usd_mrp_available_csp_agg_py_june_30_rate`,
  sum(fact.raw_safety_stock * ifnull(conv.pyjune_factor, 1)) as `usd_safety_stock_py_june_30_rate`,  
  sum(fact.raw_quality_inspection * ifnull(conv.pyjune_factor, 1)) as `usd_quality_inspection_py_june_30_rate`,
  sum(fact.raw_stock_in_transit * ifnull(conv.pyjune_factor, 1)) as `usd_stock_in_transit_py_june_30_rate`,
  
  sum(fact.buom_total_plant_stock) as `buom_total_plant_stock`,
  0 as `target`
  , 0 as `actual_usd`
  , 0 as `ff_target_usd`
  , "" as `ff_version`,
  tdc.subsector,
  fact.fct_type
from
  stg_inv_fct fact
  join dp_inventory.tdc_val_dim tdc on fact.tdc_val = tdc.tdc_val
  and tdc.subsector in ("FABRICCARE","HOMECARE")
  join dp_inventory.matl_dim matl on fact.material = matl.material
  join dp_inventory.val_class_dim val on fact.valuation_class = val.valuation_class
  join dp_inventory.matl_dim matl_dim on fact.material = matl_dim.material
  left join dp_inventory.conversion_dim conv on fact.currency_key =conv.currency_key
  -- left join rds.prod_hier_dim rds on rds.prod_19_id = right(fact.material, 8)
  --and rds.prod_hier_id = "710"
  -- and rds.curr_ind = "Y"
  left join dp_inventory.plant_dim plant_d on fact.plant =plant_d.plant
where
  -- fact.calendar_year_month >= "2021-07"
  -- and fact.complex_decomposition in ("C", "S")
  -- and (
  --   fact.msu_total_plant_stock != 0
  --   or fact.msu_mrp_available_csp_agg != 0
  --   or fact.msu_safety_stock != 0
  --   or fact.usd_total_plant_stock != 0
  --   or fact.usd_mrp_available_csp_agg != 0
  --   or fact.usd_safety_stock != 0
  -- )
  -- and 
  ifnull(plant_d.joint_venture_flag, "") <> "JV"
group by
  fact.calendar_date,
  fact.calendar_year_month,
  matl.material_group,
  matl.material_group_text,
  matl.material_type,
  matl.segment,
  matl.global_market_segment,
  matl_dim.apo_logical_product_as,
  matl_dim.apo_logical_product_eu,
  matl_dim.apo_logical_product_la,
  matl_dim.apo_logical_product_na,
  matl_dim.sop_family_as,
  matl_dim.sop_family_eu,
  matl_dim.sop_family_la,
  matl_dim.sop_family_na,
  val.material_type_derived,
  val.material_type_derived_text,
  -- rds.prod_7_long_name,
  fact.tdc_val,
  tdc.tdc_val_text,
  tdc.category,
  fact.complex_decomposition,
  fact.region,
  tdc.subsector,
  fact.fct_type """)

In [0]:
#monthly

In [0]:
monthly_stg_df=spark.sql("""select
  fact.calendar_date,
  fact.calendar_year_month,
  fact.material,
  matl.material_text,
  matl.material_group_text,
  matl.material_type,
  matl.segment,
  matl.global_market_segment,
  matl_dim.apo_logical_product_as,
  matl_dim.apo_logical_product_eu,
  matl_dim.apo_logical_product_la,
  matl_dim.apo_logical_product_na,
  matl_dim.sop_family_as,
  matl_dim.sop_family_eu,
  matl_dim.sop_family_la,
  matl_dim.sop_family_na,
  val.material_type_derived,
  val.material_type_derived_text,
  -- rds.prod_7_long_name,
  tdc.tdc_val_text,
  tdc.category,
  fact.complex_decomposition,
  fact.region,
  plant_d.plant,
  plant_d.plant_text,
  plant_d.country,
  plant_d.country_text,
  plant_d.geographic_group,
  -- "isc_monthly" as `data_type`,
  CASE WHEN fact.calendar_date IN (
  select
      max(calendar_date)
    from
      stg_inv_fct
    group by
      calendar_year_month
  ) THEN 'isc_monthly'
  WHEN fact.calendar_year_month IN (
    select
      max(calendar_year_month)
    from
      stg_inv_fct
  ) THEN 'isc_current_month'
  WHEN fact.calendar_year_month in (
    select
      max( concat( year( date_trunc('MM', date_add( date_trunc('MM', calendar_date), -1))),
      '-',
      case
      when month(date_trunc('MM', date_add( date_trunc('MM', calendar_date), -1))) < 10 then '0'
            else ''
          end,
          month( date_trunc('MM', date_add( date_trunc('MM', calendar_date), -1)))))
    from
      stg_inv_fct
  ) THEN 'isc_previous_month'
  END as `data_type`,
fact.msu_total_plant_stock,
fact.msu_mrp_available_csp_agg,
fact.msu_safety_stock,
fact.msu_quality_inspection,
fact.msu_stock_in_transit,
fact.usd_total_plant_stock,
fact.usd_mrp_available_csp_agg,
fact.usd_safety_stock,
fact.usd_quality_inspection,
fact.usd_stock_in_transit,
fact.raw_total_plant_stock,
fact.raw_mrp_available_csp_agg,
fact.raw_safety_stock,
fact.raw_quality_inspection,
fact.raw_stock_in_transit,
fact.buom_total_plant_stock,
conv.pyjune_factor,
matl.material_group,
fact.tdc_val,
tdc.subsector,
fact.fct_type
from
  stg_inv_fct fact
  join dp_inventory.tdc_val_dim tdc on fact.tdc_val = tdc.tdc_val
  and tdc.subsector in ("FABRICCARE","HOMECARE") 
  join dp_inventory.matl_dim matl on fact.material = matl.material
  join dp_inventory.val_class_dim val on fact.valuation_class = val.valuation_class
  join dp_inventory.matl_dim matl_dim on fact.material = matl_dim.material
  left join dp_inventory.conversion_dim conv on fact.currency_key = conv.currency_key
  -- left join rds.prod_hier_dim rds on rds.prod_19_id = right(fact.material, 8)
  --and rds.prod_hier_id = "710"
  -- and rds.curr_ind = "Y"
  left join dp_inventory.plant_dim plant_d on fact.plant = plant_d.plant
where
  -- and fact.complex_decomposition in ("C", "S")
  -- and (
  --   fact.msu_total_plant_stock != 0
  --   or fact.msu_mrp_available_csp_agg != 0
  --   or fact.msu_safety_stock != 0
  --   or fact.usd_total_plant_stock != 0
  --   or fact.usd_mrp_available_csp_agg != 0
  --   or fact.usd_safety_stock != 0
  -- )
  --and 
  ifnull(plant_d.joint_venture_flag, "") <> "JV"
""")

In [0]:
monthly_stg_df.createOrReplaceTempView("stg_ouput") 

In [0]:
monthly_stg_ouput_df=spark.sql("""select
  calendar_date,
  calendar_year_month,
  material,
  material_text,
  material_group_text,
  material_type,
  segment,
  global_market_segment,
  apo_logical_product_as,
  apo_logical_product_eu,
  apo_logical_product_la,
  apo_logical_product_na,
  sop_family_as,
  sop_family_eu,
  sop_family_la,
  sop_family_na,
  material_type_derived,
  material_type_derived_text,
  -- prod_7_long_name,
  tdc_val_text,
  category,
  complex_decomposition,
  region,
  plant,
  plant_text,
  country,
  country_text,
  geographic_group,
  max(data_type) as data_type,
  
  sum(msu_total_plant_stock) as `msu_total_plant_stock`,
  sum(msu_mrp_available_csp_agg) as `msu_mrp_available_csp_agg`,
  sum(msu_safety_stock) as `msu_safety_stock`,
  sum(msu_quality_inspection) as `msu_quality_inspection`,
  sum(msu_stock_in_transit) as `msu_stock_in_transit`,
  
  sum(usd_total_plant_stock) as `usd_total_plant_stock`,
  sum(usd_mrp_available_csp_agg) as `usd_mrp_available_csp_agg`,
  sum(usd_safety_stock) as `usd_safety_stock`,
  sum(usd_quality_inspection) as `usd_quality_inspection`,
  sum(usd_stock_in_transit) as `usd_stock_in_transit`,
  
  sum(raw_total_plant_stock * ifnull(pyjune_factor, 1)) as `usd_total_plant_stock_py_june_30_rate`,
  sum(raw_mrp_available_csp_agg * ifnull(pyjune_factor, 1)) as `usd_mrp_available_csp_agg_py_june_30_rate`,
  sum(raw_safety_stock * ifnull(pyjune_factor, 1)) as `usd_safety_stock_py_june_30_rate`,  
  sum(raw_quality_inspection * ifnull(pyjune_factor, 1)) as `usd_quality_inspection_py_june_30_rate`,
  sum(raw_stock_in_transit * ifnull(pyjune_factor, 1)) as `usd_stock_in_transit_py_june_30_rate`,
  sum(buom_total_plant_stock) as `buom_total_plant_stock`,
  0 as `target`,
  0 as `actual_usd`,
  0 as `ff_target_usd`,
  "" as `ff_version`,
  subsector,
  fct_type
from
stg_ouput
group by
  calendar_date,
  calendar_year_month,
  material,
  material_text,
  material_group,
  material_group_text,
  material_type,
  segment,
  global_market_segment,
  apo_logical_product_as,
  apo_logical_product_eu,
  apo_logical_product_la,
  apo_logical_product_na,
  sop_family_as,
  sop_family_eu,
  sop_family_la,
  sop_family_na,
  material_type_derived,
  material_type_derived_text,
  -- prod_7_long_name,
  tdc_val,
  tdc_val_text,
  category,
  complex_decomposition,
  region,
  plant,
  plant_text,
  country,
  country_text,
  geographic_group,
  subsector,
  fct_type
  """)

In [0]:
# from pyspark import *
# from pyspark.sql.functions import *

In [0]:
from functools import reduce
from pyspark.sql import DataFrame

dfs = [monthly_stg_ouput_df,daily_stg_output_df]
stg_ouput_df = reduce(lambda x, y: x.unionByName(y, allowMissingColumns=True), dfs)

In [0]:
output_df=stg_ouput_df.repartition("fct_type", "subsector","data_type")

In [0]:
# output_df.write \
#   .format("delta") \
#   .mode("overwrite") \
#   .option("overwriteSchema", "true") \
#   .partitionBy("fct_type", "subsector","data_type") \
#   .saveAsTable("userdb_saravana_sk.fabric_care_isc_global")

In [0]:
# output_df.write \
#   .format("delta") \
#   .mode("overwrite") \
#   .option("overwriteSchema", "true") \
#   .partitionBy("fct_type", "subsector","data_type") \
#   .saveAsTable("groupdb_fhc_eng_analytics.fhc_isc_global") 

In [0]:
%sql
select count(*) from groupdb_fhc_eng_analytics.fhc_isc_global

In [0]:
#Testing

In [0]:
# from pyspark.sql.functions import *
# #& (col("subsector")=='HOMECARE')
# daily_stg_output_df.filter((col("fct_type")== 'inv_fct_na') & (col('calendar_year_month') == "2023-04"))\
#   .groupBy('calendar_date','subsector').sum('msu_total_plant_stock')\
#     .withColumn('su_total_plant_stock',col('sum(msu_total_plant_stock)')*1000)\
#       .selectExpr('calendar_date','su_total_plant_stock','subsector').sort(col('calendar_date')).display()

In [0]:
%sql
select calendar_date,((sum(msu_total_plant_stock))*1000 ) as su_total_plant_stock,subsector from groupdb_fhc_eng_analytics.fhc_isc_global
where fct_type='inv_fct_na' and calendar_year_month="2023-04" and  data_type= "isc_daily"
group by calendar_date,subsector
ORDER BY calendar_date


In [0]:
output_df.count()

In [0]:
display(output_df)

In [0]:
%sql
SELECT count(*) from userdb_saravana_sk.fabric_care_isc_global

In [0]:
%sql
SELECT * from userdb_nigam_an.fabric_care_isc_global where data_type = 'isc_monthly' AND subsector='FABRICCARE' AND fct_type= 'inv_fct_na'  

In [0]:
%sql
SELECT count(*) from userdb_nigam_an.fabric_care_isc_monthly

In [0]:
%sql
select max(calendar_date) from userdb_nigam_an.fabric_care_isc_global