In [2]:
from pyspark.sql.functions import lit
from datetime import datetime
from pyspark.sql.types import *
from pyspark.sql.functions import col, unix_timestamp, to_date,col,year,quarter,month,to_timestamp
from pyspark.sql.types import DateType
from pyspark.sql.functions import col, unix_timestamp, to_date,col,year,quarter,month
from pyspark.sql.functions import col
from pyspark.sql.types import IntegerType
from pyspark.sql.types import DoubleType
from pyspark.sql.types import DateType
from pyspark.sql import functions as F

StatementMeta(, ec991221-8c1e-47bb-bf35-f0d9e8e033e2, 6, Finished, Available)

## Set input parameters

In [3]:

# # Set the report start and end dates
report_start_date = "2022-01-01"
report_end_date = "2022-12-31"
report_start_date = datetime.strptime(report_start_date, '%Y-%m-%d')
report_end_date = datetime.strptime(report_end_date, '%Y-%m-%d')

StatementMeta(, ec991221-8c1e-47bb-bf35-f0d9e8e033e2, 7, Finished, Available)

## Import the required silver layer tables

In [4]:
df = spark.sql("SELECT * FROM ##LAKEHOUSE_SILVER_NAME#_NAME#.Water_WaterUtilization LIMIT 1000")
display(df)

df = spark.sql("SELECT * FROM #LAKEHOUSE_SILVER_NAME#.Waste_Party LIMIT 1000")
display(df)

StatementMeta(, ec991221-8c1e-47bb-bf35-f0d9e8e033e2, 8, Finished, Available)

SynapseWidget(Synapse.DataFrame, bc1a871e-3a6d-4b2f-ac79-ac6ed4b2a192)

SynapseWidget(Synapse.DataFrame, 5acf44b5-cf55-4140-a87f-579df5ba3cb6)

## Silver to Gold Transformation: `DenormalizedPartyWaste`

In [5]:
# Define the SQL query
sql_query = f"""
SELECT
    cast('{report_start_date}'as date) AS ReportStartDate,
    cast('{report_end_date}' as date) AS ReportEndDate,
    pw.PeriodStartDate AS PeriodStartDate,
    pw.PeriodEndDate AS PeriodEndDate,
    p.PartyId,
    p.PartyName,
    pt.PartyTypeName,
    wmt.WasteMaterialTypeName,
    wmt.HazardousWasteIndicator,
    wmt.RadioactiveWasteIndicator,
    wdm.WasteDiversionMethodName,
    wdm.WasteDiversionFromIndicator,
    wc.WasteCategoryName,
    ws.WasteStreamName,
    pw.WasteWeightUnits,
    um.UnitOfMeasureName
FROM
    #LAKEHOUSE_SILVER_NAME#.Waste_PartyWaste pw
    JOIN #LAKEHOUSE_SILVER_NAME#.Waste_Party p ON pw.PartyId = p.PartyId
    JOIN #LAKEHOUSE_SILVER_NAME#.Waste_PartyType pt ON p.PartyTypeId = pt.PartyTypeId
    JOIN #LAKEHOUSE_SILVER_NAME#.Waste_UnitOfMeasure um ON pw.WasteWeightUomId = um.UnitOfMeasureId
    JOIN #LAKEHOUSE_SILVER_NAME#.Waste_WasteMaterialType wmt ON pw.WasteMaterialTypeId = wmt.WasteMaterialTypeId
    JOIN #LAKEHOUSE_SILVER_NAME#.Waste_WasteCategory wc ON wc.WasteCategoryId = wmt.WasteCategoryId
    JOIN #LAKEHOUSE_SILVER_NAME#.Waste_WasteStream ws ON ws.WasteStreamId = pw.WasteStreamId
    JOIN #LAKEHOUSE_SILVER_NAME#.Waste_WasteDiversionMethod wdm ON wdm.WasteDiversionMethodId = pw.WasteDiversionMethodId
    JOIN #LAKEHOUSE_SILVER_NAME#.Waste_MetricPurpose mp ON mp.MetricPurposeId = pw.MetricPurposeId
WHERE
    ((pw.PeriodStartDate BETWEEN '{report_start_date}' AND '{report_end_date}')
    OR (pw.PeriodEndDate BETWEEN '{report_start_date}' AND '{report_end_date}')
    OR ('{report_start_date}' BETWEEN pw.PeriodStartDate AND pw.PeriodEndDate)
    OR ('{report_end_date}' BETWEEN pw.PeriodStartDate AND pw.PeriodEndDate))
    AND pt.PartyTypeName = 'Facility'
"""

# Execute the SQL query
DenormalizedPartyWaste_table = spark.sql(sql_query)

# Show the result
# DenormalizedPartyWaste_table.show()
DenormalizedPartyWaste_table
# Register the DataFrame as a temporary view
DenormalizedPartyWaste_table.createOrReplaceTempView("DenormalizedPartyWaste")

StatementMeta(, ec991221-8c1e-47bb-bf35-f0d9e8e033e2, 9, Finished, Available)

## Silver to Gold Transformation: `E5-5-3-a`

In [6]:
# Define the SQL query
sql_query = """
SELECT
    ReportStartDate,
    ReportEndDate,
    'E5-5-3-a' AS MetricId,
    PartyId,
    PartyName,
    PartyTypeName,
    UnitOfMeasureName,
    SUM(WasteWeightUnits) AS MetricValue    
FROM
    DenormalizedPartyWaste
GROUP BY
    ReportStartDate,
    ReportEndDate,
    PartyId,
    PartyName,
    PartyTypeName,
    UnitOfMeasureName
"""

# Execute the SQL query
E553a = spark.sql(sql_query)

# Show the result
# E553a.show()


StatementMeta(, ec991221-8c1e-47bb-bf35-f0d9e8e033e2, 10, Finished, Available)

## Silver to Gold Transformation: `E5-5-3-b-1`

In [7]:
# Define the SQL query
sql_query = """
SELECT
    ReportStartDate,
    ReportEndDate,
    'E5-5-3-b-1' AS MetricId,
    PartyId,
    PartyName,
    PartyTypeName,
    HazardousWasteIndicator,
    WasteDiversionMethodName,
    WasteDiversionFromIndicator,
    UnitOfMeasureName,
    SUM(WasteWeightUnits) AS MetricValue    
FROM
    DenormalizedPartyWaste
WHERE (WasteDiversionMethodName = 'Reuse' AND WasteDiversionFromIndicator = true)
GROUP BY
    ReportStartDate,
    ReportEndDate,
    PartyId,
    PartyName,
    PartyTypeName,
    HazardousWasteIndicator,
    WasteDiversionMethodName,
    WasteDiversionFromIndicator,
    UnitOfMeasureName
"""

# Execute the SQL query
E553b1 = spark.sql(sql_query)

# Show the result
# E553b1.show()


StatementMeta(, ec991221-8c1e-47bb-bf35-f0d9e8e033e2, 11, Finished, Available)

## Silver to Gold Transformation: `E5-5-3-b-2`

In [8]:
# Define the SQL query
sql_query = """
SELECT
    ReportStartDate,
    ReportEndDate,
    'E5-5-3-b-2' AS MetricId,
    PartyId,
    PartyName,
    PartyTypeName,
    HazardousWasteIndicator,
    WasteDiversionMethodName,
    WasteDiversionFromIndicator,
    UnitOfMeasureName,
    SUM(WasteWeightUnits) AS MetricValue    
FROM
    DenormalizedPartyWaste
WHERE (WasteDiversionMethodName = 'Recycling' AND WasteDiversionFromIndicator = true)
GROUP BY
    ReportStartDate,
    ReportEndDate,
    PartyId,
    PartyName,
    PartyTypeName,
    HazardousWasteIndicator,
    WasteDiversionMethodName,
    WasteDiversionFromIndicator,
    UnitOfMeasureName
"""

# Execute the SQL query
E553b2 = spark.sql(sql_query)

# Show the result
# E553b2.show()


StatementMeta(, ec991221-8c1e-47bb-bf35-f0d9e8e033e2, 12, Finished, Available)

## Silver to Gold Transformation: `E5-5-3-b-3`

In [9]:
# Define the SQL query
sql_query = """
SELECT
    ReportStartDate,
    ReportEndDate,
    'E5-5-3-b-3' AS MetricId,
    PartyId,
    PartyName,
    PartyTypeName,
    HazardousWasteIndicator,
    WasteDiversionMethodName,
    WasteDiversionFromIndicator,
    UnitOfMeasureName,
    SUM(WasteWeightUnits) AS MetricValue    
FROM
    DenormalizedPartyWaste
WHERE (WasteDiversionMethodName = 'Others' AND WasteDiversionFromIndicator = true)
GROUP BY
    ReportStartDate,
    ReportEndDate,
    PartyId,
    PartyName,
    PartyTypeName,
    HazardousWasteIndicator,
    WasteDiversionMethodName,
    WasteDiversionFromIndicator,
    UnitOfMeasureName
"""

# Execute the SQL query
E553b3 = spark.sql(sql_query)

# Show the result
# E553b3.show()


StatementMeta(, ec991221-8c1e-47bb-bf35-f0d9e8e033e2, 13, Finished, Available)

## Silver to Gold Transformation: `E5-5-3-c-1`

In [10]:
# Define the SQL query
sql_query = """
SELECT
    ReportStartDate,
    ReportEndDate,
    'E5-5-3-c-1' AS MetricId,
    PartyId,
    PartyName,
    PartyTypeName,
    HazardousWasteIndicator,
    WasteDiversionMethodName,
    WasteDiversionFromIndicator,
    UnitOfMeasureName,
    SUM(WasteWeightUnits) AS MetricValue    
FROM
    DenormalizedPartyWaste
WHERE (WasteDiversionMethodName = 'Incineration' AND WasteDiversionFromIndicator = false)
GROUP BY
    ReportStartDate,
    ReportEndDate,
    PartyId,
    PartyName,
    PartyTypeName,
    HazardousWasteIndicator,
    WasteDiversionMethodName,
    WasteDiversionFromIndicator,
    UnitOfMeasureName
"""

# Execute the SQL query
E553c1 = spark.sql(sql_query)

# Show the result
# E553c1.show()


StatementMeta(, ec991221-8c1e-47bb-bf35-f0d9e8e033e2, 14, Finished, Available)

## Silver to Gold Transformation: `E5-5-3-c-2`

In [11]:
# Define the SQL query
sql_query = """
SELECT
    ReportStartDate,
    ReportEndDate,
    'E5-5-3-c-2' AS MetricId,
    PartyId,
    PartyName,
    PartyTypeName,
    HazardousWasteIndicator,
    WasteDiversionMethodName,
    WasteDiversionFromIndicator,
    UnitOfMeasureName,
    SUM(WasteWeightUnits) AS MetricValue    
FROM
    DenormalizedPartyWaste
WHERE (WasteDiversionMethodName = 'Landfilling' AND WasteDiversionFromIndicator = false)
GROUP BY
    ReportStartDate,
    ReportEndDate,
    PartyId,
    PartyName,
    PartyTypeName,
    HazardousWasteIndicator,
    WasteDiversionMethodName,
    WasteDiversionFromIndicator,
    UnitOfMeasureName
"""

# Execute the SQL query
E553c2 = spark.sql(sql_query)

# Show the result
# E553c2.show()


StatementMeta(, ec991221-8c1e-47bb-bf35-f0d9e8e033e2, 15, Finished, Available)

## Silver to Gold Transformation: `E5-5-3-c-3`

In [12]:
# Define the SQL query
sql_query = """
SELECT
    ReportStartDate,
    ReportEndDate,
    'E5-5-3-c-3' AS MetricId,
    PartyId,
    PartyName,
    PartyTypeName,
    HazardousWasteIndicator,
    WasteDiversionMethodName,
    WasteDiversionFromIndicator,
    UnitOfMeasureName,
    SUM(WasteWeightUnits) AS MetricValue    
FROM
    DenormalizedPartyWaste
WHERE (WasteDiversionMethodName = 'Others' AND WasteDiversionFromIndicator = false)
GROUP BY
    ReportStartDate,
    ReportEndDate,
    PartyId,
    PartyName,
    PartyTypeName,
    HazardousWasteIndicator,
    WasteDiversionMethodName,
    WasteDiversionFromIndicator,
    UnitOfMeasureName
"""

# Execute the SQL query
E553c3 = spark.sql(sql_query)

# Show the result
# E553c3.show()


StatementMeta(, ec991221-8c1e-47bb-bf35-f0d9e8e033e2, 16, Finished, Available)

## Silver to Gold Transformation: `E5-5-4-a`

In [13]:
# Define the SQL query
sql_query = """
SELECT
    ReportStartDate,
    ReportEndDate,
    'E5-5-4-a' AS MetricId,
    PartyId,
    PartyName,
    PartyTypeName,
    WasteCategoryName,
    WasteStreamName,
    UnitOfMeasureName,
    SUM(WasteWeightUnits) AS MetricValue    
FROM
    DenormalizedPartyWaste
GROUP BY
    ReportStartDate,
    ReportEndDate,
    PartyId,
    PartyName,
    PartyTypeName,
    WasteCategoryName,
    WasteStreamName,
    UnitOfMeasureName
"""

# Execute the SQL query
E554a = spark.sql(sql_query)

# Show the result
# E554a.show()


StatementMeta(, ec991221-8c1e-47bb-bf35-f0d9e8e033e2, 17, Finished, Available)

## Silver to Gold Transformation: `E5-5-4-b`

In [14]:
# Define the SQL query
sql_query = """
SELECT
    ReportStartDate,
    ReportEndDate,
    'E5-5-4-b' AS MetricId,
    PartyId,
    PartyName,
    PartyTypeName,
    WasteCategoryName,
    WasteStreamName,
    WasteMaterialTypeName,
    UnitOfMeasureName,
    SUM(WasteWeightUnits) AS MetricValue    
FROM
    DenormalizedPartyWaste
GROUP BY
    ReportStartDate,
    ReportEndDate,
    PartyId,
    PartyName,
    PartyTypeName,
    WasteCategoryName,
    WasteStreamName,
    WasteMaterialTypeName,
    UnitOfMeasureName
"""

# Execute the SQL query
E554b = spark.sql(sql_query)

# Show the result
# E554b.show()


StatementMeta(, ec991221-8c1e-47bb-bf35-f0d9e8e033e2, 18, Finished, Available)

## Silver to Gold Transformation: `E5-5-5`

In [15]:
# Define the SQL query
sql_query = """
SELECT
    ReportStartDate,
    ReportEndDate,
    'E5-5-5' AS MetricId,
    PartyId,
    PartyName,
    PartyTypeName,
    HazardousWasteIndicator,
    RadioactiveWasteIndicator,
    UnitOfMeasureName,
    SUM(WasteWeightUnits) AS MetricValue    
FROM
    DenormalizedPartyWaste
WHERE (HazardousWasteIndicator = true OR RadioactiveWasteIndicator = true)
GROUP BY
    ReportStartDate,
    ReportEndDate,
    PartyId,
    PartyName,
    PartyTypeName,
    HazardousWasteIndicator,
    RadioactiveWasteIndicator,
    UnitOfMeasureName
"""

# Execute the SQL query
E555 = spark.sql(sql_query)

# Show the result
# E555.show()


StatementMeta(, ec991221-8c1e-47bb-bf35-f0d9e8e033e2, 19, Finished, Available)

## Merge all metrics into single table

In [16]:
# Combine the individual metrics dataFrames
wastedata1 = E553a
wastedata2 = E553b1.unionAll(E553b2)\
    .unionAll(E553b3)\
    .unionAll(E553c1)\
    .unionAll(E553c2)\
    .unionAll(E553c3)
wastedata3 = E554a
wastedata4 = E554b
wastedata5 = E555

# Show the final output tables
#wastedata1.show()
# wastedata2.show()
# wastedata3.show()
# wastedata4.show()
# wastedata5.show()


StatementMeta(, ec991221-8c1e-47bb-bf35-f0d9e8e033e2, 20, Finished, Available)

In [17]:
TotalWaste_PerParty_ByPartyType = wastedata1
TotalWaste_PerPartyName_Type_ByDiversionMethod_Indicator = wastedata2
TotalWaste_PerPartyName_Type_ByWasteCategory_Stream = wastedata3
TotalWaste_PerPartyName_Type_ByWasteCategory_Stream_WasteMaterialType = wastedata4
TotalWaste_PerPartyName_Type_ByWasteIndicator_Hazardous_Redioactive = wastedata5

StatementMeta(, ec991221-8c1e-47bb-bf35-f0d9e8e033e2, 21, Finished, Available)

In [18]:
display(wastedata1)

StatementMeta(, ec991221-8c1e-47bb-bf35-f0d9e8e033e2, 22, Finished, Available)

SynapseWidget(Synapse.DataFrame, c96afb66-370d-46cc-88ec-e971b3901441)

In [19]:
# df4=wastedata5.select(col("ReportStartDate"),col("ReportEndDate"))
# df4.write.mode("append").format("delta").saveAsTable("#LAKEHOUSE_SILVER_NAME#.Waste_Reporting_Year")

StatementMeta(, ec991221-8c1e-47bb-bf35-f0d9e8e033e2, 23, Finished, Available)

In [20]:
# # # Export the final output tables to gold layer in data lake.
outputs = [TotalWaste_PerParty_ByPartyType, TotalWaste_PerPartyName_Type_ByDiversionMethod_Indicator, TotalWaste_PerPartyName_Type_ByWasteCategory_Stream,
 TotalWaste_PerPartyName_Type_ByWasteCategory_Stream_WasteMaterialType, TotalWaste_PerPartyName_Type_ByWasteIndicator_Hazardous_Redioactive]

TableList = ["TotalWaste_PerParty_ByPartyType", "TotalWaste_PerPartyName_Type_ByDiversionMethod_Indicator", "TotalWaste_PerPartyName_Type_ByWasteCategory_Stream",
 "TotalWaste_PerPartyName_Type_ByWasteCategory_Stream_WasteMaterialType", "TotalWaste_PerPartyName_Type_ByWasteIndicator_Hazardous_Redioactive"]


for i, output in enumerate(outputs):
    if i==0:
        print(output)
        output=output.select(col("MetricValue"),col("MetricId"),col("PartyId"),col("PartyName"),col("PartyTypeName"),col("ReportEndDate"),col("ReportStartDate"),col("UnitOfMeasureName"))
        output.write.format("delta").mode("overwrite").saveAsTable('#LAKEHOUSE_SILVER_NAME#.Waste_'+TableList[i]) 
    #display(output)
    
#         print(TableList[i])
#     elif i==1:
#         output=output.select(col("HazardousWasteIndicator"),col("MetricId"),col("MetricValue"),col("PartyId"),col("PartyName"),col("PartyTypeName"),col("ReportEndDate"),col("ReportStartDate"),col("UnitOfMeasureName"),col("WasteDiversionFromIndicator"),col("WasteDiversionMethodName"))
#         output.write.format("delta").mode("overwrite").saveAsTable('#LAKEHOUSE_SILVER_NAME#.Waste_'+TableList[i]) 
             
#         print(TableList[i])
#     elif i==2: 
#         output=output.select(col("MetricId"),col("MetricValue"),col("PartyId"),col("PartyName"),col("PartyTypeName"),col("ReportEndDate"),col("ReportStartDate"),col("UnitOfMeasureName"),col("WasteCategoryName"),col("WasteStreamName"))
#         output.write.format("delta").mode("overwrite").saveAsTable('#LAKEHOUSE_SILVER_NAME#.Waste_'+TableList[i])  
#         print(TableList[i])
#     elif i==3:
#         output=output.select(col("MetricId"),col("MetricValue"),col("PartyId"),col("PartyName"),col("PartyTypeName"),col("ReportEndDate"),col("ReportStartDate"),col("UnitOfMeasureName"),col("WasteCategoryName"),col("WasteMaterialTypeName"),col("WasteStreamName"))
#         output.write.format("delta").mode("overwrite").saveAsTable('#LAKEHOUSE_SILVER_NAME#.Waste_'+TableList[i]) 
#         print(TableList[i])
#     elif i==4:
#         output=output.select(col("HazardousWasteIndicator"),col("MetricId"),col("MetricValue"),col("PartyId"),col("PartyName"),col("PartyTypeName"),col("RadioactiveWasteIndicator"),col("ReportEndDate"),col("ReportStartDate"),col("UnitOfMeasureName"))
#         output.write.format("delta").mode("overwrite").saveAsTable('#LAKEHOUSE_SILVER_NAME#.Waste_'+TableList[i])  
#         print(TableList[i])
# print("Data loaded into Lakehouse 'GoldLayer'")

StatementMeta(, ec991221-8c1e-47bb-bf35-f0d9e8e033e2, 24, Finished, Available)

DataFrame[ReportStartDate: date, ReportEndDate: date, MetricId: string, PartyId: bigint, PartyName: string, PartyTypeName: string, UnitOfMeasureName: string, MetricValue: decimal(28,8)]
