
#### Run the cell below to install the required packages for Copilot


In [1]:

# #Run this cell to install the required packages for Copilot
# %pip install https://aka.ms/chat-magics-0.0.0-py3-none-any.whl
# %load_ext chat_magics


StatementMeta(, 6f401221-c0ba-4eac-a9de-af20153c8d6f, 5, Finished, Available)

### Import required libraries

In [2]:
from pyspark.sql.functions import lit
from datetime import datetime
from pyspark.sql.types import *
from pyspark.sql.functions import col, unix_timestamp, to_date,col,year,quarter,month,to_timestamp
from pyspark.sql.types import DateType
from pyspark.sql.functions import col, unix_timestamp, to_date,col,year,quarter,month
from pyspark.sql.functions import col
from pyspark.sql.types import IntegerType
from pyspark.sql.types import DoubleType
from pyspark.sql.types import DateType
from pyspark.sql import functions as F

StatementMeta(, 6f401221-c0ba-4eac-a9de-af20153c8d6f, 6, Finished, Available)

## Set input parameters

In [3]:

# # Set the report start and end dates
report_start_date = "2022-01-01"
report_end_date = "2022-12-31"
report_start_date = datetime.strptime(report_start_date, '%Y-%m-%d')
report_end_date = datetime.strptime(report_end_date, '%Y-%m-%d')

StatementMeta(, 6f401221-c0ba-4eac-a9de-af20153c8d6f, 7, Finished, Available)

## Silver to Gold Transformation

### `IncidentInReportDuration`

In [4]:
# Filter the incidents based on the given report duration and join with IncidentType, IncidentRelatedParty, and IncidentPartyRelationshipType
query = f"""
    SELECT i.IncidentId, pt.PartyTypeName, i.IncidentPeriodStartTimestamp, i.IncidentPeriodEndTimestamp, it.IncidentTypeName, i.NumberOfInjuries, i.NumberOfFatalities, i.TotalWorkerHoursLost, i.TotalOutageHoursLost, i.TotalIncidentCost
    FROM #LAKEHOUSE_SILVER_NAME#.SGESG_Incident AS i
    JOIN #LAKEHOUSE_SILVER_NAME#.SGESG_IncidentType AS it ON i.IncidentTypeId = it.IncidentTypeId
    JOIN #LAKEHOUSE_SILVER_NAME#.SGESG_IncidentRelatedParty AS irp ON i.IncidentId = irp.IncidentId AND i.IncidentPeriodStartTimestamp = irp.PeriodStartTimestamp
    JOIN #LAKEHOUSE_SILVER_NAME#.SGESG_IncidentPartyRelationshipType AS iprt ON irp.IncidentPartyRelationshipTypeId = iprt.IncidentPartyRelationshipTypeId
    JOIN #LAKEHOUSE_SILVER_NAME#.SGESG_Party As p on p.PartyId = irp.PartyId
    JOIN #LAKEHOUSE_SILVER_NAME#.SGESG_PartyType AS pt ON pt.PartyTypeId = p.PartyTypeId
    WHERE (iprt.IncidentPartyRelationshipTypeName = 'Injured')
      AND (i.IncidentPeriodStartTimestamp BETWEEN '{report_start_date}' AND '{report_end_date}'
           OR i.IncidentPeriodEndTimestamp BETWEEN '{report_start_date}' AND '{report_end_date}'
           OR '{report_start_date}' BETWEEN i.IncidentPeriodStartTimestamp AND i.IncidentPeriodEndTimestamp
           OR '{report_end_date}' BETWEEN i.IncidentPeriodStartTimestamp AND i.IncidentPeriodEndTimestamp)
"""

# Execute the query
result = spark.sql(query)
result.createOrReplaceTempView("IncidentInReportDuration")
display(result)


StatementMeta(, 6f401221-c0ba-4eac-a9de-af20153c8d6f, 8, Finished, Available)

SynapseWidget(Synapse.DataFrame, 80522e06-bfc9-4ba2-bc00-ec8721f35ffd)

### `S1-14-84-b`

In [5]:
# Filter the incidents based on the given criteria in S1-14-84-b.
query = f"""
    SELECT 'S1-14-84-b' AS MetricId, PartyTypeName, SUM(NumberOfFatalities) AS MetricValue
    FROM IncidentInReportDuration
    WHERE (IncidentTypeName = 'work-related injury' OR IncidentTypeName = 'work-related ill health')
      AND (PartyTypeName = 'Employee' OR PartyTypeName = 'Non-employee')
    GROUP BY PartyTypeName
"""

# Execute the query
S11484b = spark.sql(query)
S11484b.createOrReplaceTempView("S11484b")
# S11484b.show()

StatementMeta(, 6f401221-c0ba-4eac-a9de-af20153c8d6f, 9, Finished, Available)

### `S1-14-84-c`

In [6]:
# Filter the incidents based on the given criteria in S1-14-84-c.
query = f"""
    SELECT 'S1-14-84-c' AS MetricId, PartyTypeName, COUNT(IncidentId) AS MetricValue
    FROM IncidentInReportDuration
    WHERE (IncidentTypeName = 'recordable work-related accident')
      AND (PartyTypeName = 'Employee' OR PartyTypeName = 'Non-employee')
    GROUP BY PartyTypeName
"""


# Execute the query
S11484c = spark.sql(query)
S11484c.createOrReplaceTempView("S11484c")
# S11484c.show()

StatementMeta(, 6f401221-c0ba-4eac-a9de-af20153c8d6f, 10, Finished, Available)

### `S1-14-84-d`

In [7]:
# Filter the incidents based on the given criteria in S1-14-84-d.
query = f"""
    SELECT 'S1-14-84-d' AS MetricId, PartyTypeName, COUNT(IncidentId) AS MetricValue
    FROM IncidentInReportDuration
    WHERE (IncidentTypeName = 'work-related ill health')
      AND (PartyTypeName = 'Employee' OR PartyTypeName = 'Non-employee')
    GROUP BY PartyTypeName
"""


# Execute the query
S11484d = spark.sql(query)
S11484d.createOrReplaceTempView("S11484d")
# S11484d.show()

StatementMeta(, 6f401221-c0ba-4eac-a9de-af20153c8d6f, 11, Finished, Available)

### `S1-14-84-e`

In [8]:
# Filter the incidents based on the given criteria in S1-14-84-e.
query = f"""
    SELECT 'S1-14-84-e' AS MetricId, PartyTypeName, SUM(TotalWorkerHoursLost)/24 AS MetricValue
    FROM IncidentInReportDuration
    WHERE (IncidentTypeName = 'work-related injury' OR  IncidentTypeName = 'work-related accident' OR IncidentTypeName = 'work-related ill health' OR IncidentTypeName = 'ill health')
      AND (PartyTypeName = 'Employee' OR PartyTypeName = 'Non-employee')
    GROUP BY PartyTypeName
"""


# Execute the query
S11484e = spark.sql(query)
S11484e.createOrReplaceTempView("S11484e")
# S11484e.show()

StatementMeta(, 6f401221-c0ba-4eac-a9de-af20153c8d6f, 12, Finished, Available)

### `S1-17-98-a`

In [9]:
# Filter the incidents based on the given criteria in S1-17-98-a.
query = f"""
    SELECT 'S1-17-98-a' AS MetricId, PartyTypeName, COUNT(IncidentId) AS MetricValue
    FROM IncidentInReportDuration
    WHERE (IncidentTypeName = 'discrimination' OR  IncidentTypeName = 'harassment')
      AND (PartyTypeName = 'Employee' OR PartyTypeName = 'Non-employee')
    GROUP BY PartyTypeName
"""


# Execute the query
S11798a = spark.sql(query)
S11798a.createOrReplaceTempView("S11798a")
# S11798a.show()

StatementMeta(, 6f401221-c0ba-4eac-a9de-af20153c8d6f, 13, Finished, Available)

### `S1-17-98-b`

In [10]:
# Filter the incidents based on the given criteria in S1-17-98-b.
query = f"""
    SELECT 'S1-17-98-b' AS MetricId, PartyTypeName, COUNT(IncidentId) AS MetricValue
    FROM IncidentInReportDuration
    WHERE (IncidentTypeName != 'discrimination' AND  IncidentTypeName != 'harassment')
      AND (PartyTypeName = 'Employee' OR PartyTypeName = 'Non-employee')
    GROUP BY PartyTypeName
"""


# Execute the query
S11798b = spark.sql(query)
S11798b.createOrReplaceTempView("S11798b")
# S11798b.show()

StatementMeta(, 6f401221-c0ba-4eac-a9de-af20153c8d6f, 14, Finished, Available)

### `S1-17-98-c`

In [11]:
# Filter the incidents based on the given criteria in S1-17-98-c.
query = """
    SELECT 'S1-17-98-c' AS MetricId, ict.IncidentCostTypeName, SUM(ic.IncidentCostAmount) AS MetricValue
    FROM IncidentInReportDuration i
    JOIN #LAKEHOUSE_SILVER_NAME#.SGESG_IncidentCost ic ON i.IncidentId = ic.IncidentId
    JOIN #LAKEHOUSE_SILVER_NAME#.SGESG_IncidentCostType ict ON ic.IncidentCostTypeId = ict.IncidentCostTypeId
    WHERE (i.IncidentTypeName = 'work-related grievance' OR i.IncidentTypeName = 'social and human right matter')
        AND ict.IncidentCostTypeName IN ('Material fine', 'Penality', 'Compensation for damage')
    GROUP BY ict.IncidentCostTypeName
"""

# Execute the query
S11798c = spark.sql(query)
S11798c.createOrReplaceTempView("S11798c")
# S11798c.show()

StatementMeta(, 6f401221-c0ba-4eac-a9de-af20153c8d6f, 15, Finished, Available)

### `S1-17-99-a`

In [12]:
# Filter the incidents based on the given criteria in S1-17-99-a.
query = f"""
    SELECT 'S1-17-99-a' AS MetricId, PartyTypeName, COUNT(IncidentId) AS MetricValue
    FROM IncidentInReportDuration
    WHERE (IncidentTypeName = 'severe human right issues')
      AND (PartyTypeName = 'Employee' OR PartyTypeName = 'Non-employee')
    GROUP BY PartyTypeName
"""

# Execute the query
S11799a = spark.sql(query)
S11799a.createOrReplaceTempView("S11799a")
# S11799a.show()

StatementMeta(, 6f401221-c0ba-4eac-a9de-af20153c8d6f, 16, Finished, Available)

### `S1-17-99-b`

In [13]:
# Filter the incidents based on the given criteria in S1-17-99-b.
query = """
    SELECT 'S1-17-99-b' AS MetricId, ict.IncidentCostTypeName, SUM(ic.IncidentCostAmount) AS MetricValue
    FROM IncidentInReportDuration i
    JOIN #LAKEHOUSE_SILVER_NAME#.SGESG_IncidentCost ic ON i.IncidentId = ic.IncidentId
    JOIN #LAKEHOUSE_SILVER_NAME#.SGESG_IncidentCostType ict ON ic.IncidentCostTypeId = ict.IncidentCostTypeId
    WHERE (i.IncidentTypeName = 'severe human right issues')
        AND ict.IncidentCostTypeName IN ('Material fine', 'Penality', 'Compensation for damage')
    GROUP BY ict.IncidentCostTypeName
"""

# Execute the query
S11799b = spark.sql(query)
S11799b.createOrReplaceTempView("S11799b")
# S11799b.show()

StatementMeta(, 6f401221-c0ba-4eac-a9de-af20153c8d6f, 17, Finished, Available)

### `S1-9-65-a`

In [14]:
# Query to calculate the number of employees for each GenderName with KeyEmployeeIndicator as true
query = """
    SELECT 'S1-9-65-a' AS MetricId, g.GenderName, COUNT(*) AS MetricValue
    FROM #LAKEHOUSE_SILVER_NAME#.SGESG_Employee e
    JOIN #LAKEHOUSE_SILVER_NAME#.SGESG_Gender g ON g.GenderId = e.GenderId
    WHERE e.KeyEmployeeIndicator = true
    GROUP BY g.GenderName
"""
# Execute the query
S1965a = spark.sql(query)
S1965a.createOrReplaceTempView("S1965a")
# S1965a.show()

StatementMeta(, 6f401221-c0ba-4eac-a9de-af20153c8d6f, 18, Finished, Available)

### `S1-9-65-b-1`

In [15]:
# Query to count the number of employees under 30 years old
query = """
    SELECT 'S1-9-65-b-1' AS MetricId, 'Under 30 years' AS AgeGroup, COUNT(*) AS MetricValue
    FROM #LAKEHOUSE_SILVER_NAME#.SGESG_Employee
    WHERE DATEDIFF(CURRENT_DATE(), DateOfBirth) / 365 < 30
"""
# Execute the query
S1965b1 = spark.sql(query)
S1965b1.createOrReplaceTempView("S1965b1")
# S1965b1.show()

StatementMeta(, 6f401221-c0ba-4eac-a9de-af20153c8d6f, 19, Finished, Available)

### `S1-9-65-b-2`

In [16]:
# Query to count the number of employees in 30-50 years old
query = """
    SELECT 'S1-9-65-b-2' AS MetricId, '30-50 years' AS AgeGroup, COUNT(*) AS MetricValue
    FROM #LAKEHOUSE_SILVER_NAME#.SGESG_Employee
    WHERE DATEDIFF(CURRENT_DATE(), DateOfBirth) / 365 BETWEEN 30 AND 50
"""
# Execute the query
S1965b2 = spark.sql(query)
S1965b2.createOrReplaceTempView("S1965b2")
# S1965b2.show()

StatementMeta(, 6f401221-c0ba-4eac-a9de-af20153c8d6f, 20, Finished, Available)

### `S1-9-65-b-3`

In [17]:
# Query to count the number of employees above 50 years old
query = """
    SELECT 'S1-9-65-b-3' AS MetricId, 'Above 50 years' AS AgeGroup, COUNT(*) AS MetricValue
    FROM #LAKEHOUSE_SILVER_NAME#.SGESG_Employee
    WHERE DATEDIFF(CURRENT_DATE(), DateOfBirth) / 365 > 50
"""
# Execute the query
S1965b3 = spark.sql(query)
S1965b3.createOrReplaceTempView("S1965b3")
# S1965b3.show()

StatementMeta(, 6f401221-c0ba-4eac-a9de-af20153c8d6f, 21, Finished, Available)

### `S1-6-51-a`

In [18]:
query = f"""
    SELECT
    'S1-6-51-a' AS MetricId,
    Country.IsoCountryName,
    Gender.GenderName,
    COUNT(DISTINCT Employee.EmployeeId) AS MetricValue
FROM
    #LAKEHOUSE_SILVER_NAME#.SGESG_Employee AS Employee
JOIN
    #LAKEHOUSE_SILVER_NAME#.SGESG_Gender AS Gender ON Employee.GenderId = Gender.GenderId
JOIN
    #LAKEHOUSE_SILVER_NAME#.SGESG_EmployeeLocation  AS EmployeeLocation ON Employee.EmployeeId = EmployeeLocation.EmployeeId
JOIN
    #LAKEHOUSE_SILVER_NAME#.SGESG_Location AS Location ON EmployeeLocation.LocationId = Location.LocationId
JOIN
    #LAKEHOUSE_SILVER_NAME#.SGESG_Country As Country ON Location.CountryId = Country.CountryId
WHERE
    (EmployeeLocation.PeriodStartTimestamp BETWEEN '{report_start_date}' AND '{report_end_date}'
    OR EmployeeLocation.PeriodEndTimestamp BETWEEN '{report_start_date}' AND '{report_end_date}'
    OR '{report_start_date}' BETWEEN EmployeeLocation.PeriodStartTimestamp AND EmployeeLocation.PeriodEndTimestamp
    OR '{report_end_date}' BETWEEN EmployeeLocation.PeriodStartTimestamp AND EmployeeLocation.PeriodEndTimestamp)
GROUP BY
   Gender.GenderName,
    Country.IsoCountryName;
"""

# Execute the query
S1651a = spark.sql(query)
S1651a.createOrReplaceTempView("S1651a")
# S1651a.show()

####################################################################################################################
# Note: Uncomment below code when you have enough data. Since, currently number of employees in sample data is very less, 
# we will get empty output as we are filtering data for countries having more than 50 employees.
# query = """
# SELECT *
# FROM S1651a
# WHERE IsoCountryName IN (
#     SELECT IsoCountryName
#     FROM S1651a
#     GROUP BY IsoCountryName
#     HAVING SUM(EmployeeCount) > 50
# );

# """
# S1651a = spark.sql(query)
# S1651a.createOrReplaceTempView("S1651a")
# S1651a.show()
#######################################################################################################################

StatementMeta(, 6f401221-c0ba-4eac-a9de-af20153c8d6f, 22, Finished, Available)

### `EmployeeByPartyTypeAndRegionAndGender`

In [19]:
query = f"""
    SELECT
    SubdivisionCategory.SubdivisionCategoryName,
    Gender.GenderName,
    PartyType.PartyTypeName,
    COUNT(DISTINCT Employee.EmployeeId) AS EmployeeCount
FROM
    #LAKEHOUSE_SILVER_NAME#.SGESG_Employee AS Employee
JOIN
    #LAKEHOUSE_SILVER_NAME#.SGESG_Gender As Gender ON Employee.GenderId = Gender.GenderId
JOIN
    #LAKEHOUSE_SILVER_NAME#.SGESG_EmployeeLocation As EmployeeLocation ON Employee.EmployeeId = EmployeeLocation.EmployeeId
JOIN
    #LAKEHOUSE_SILVER_NAME#.SGESG_Location As Location ON EmployeeLocation.LocationId = Location.LocationId
JOIN
    #LAKEHOUSE_SILVER_NAME#.SGESG_CountrySubdivision AS CountrySubdivision ON Location.SubdivisionId = CountrySubdivision.SubdivisionId
JOIN
    #LAKEHOUSE_SILVER_NAME#.SGESG_SubdivisionCategory As SubdivisionCategory ON CountrySubdivision.SubdivisionCategoryId = SubdivisionCategory.SubdivisionCategoryId
JOIN
    #LAKEHOUSE_SILVER_NAME#.SGESG_Party AS Party ON Party.PartyId = Employee.PartyId
JOIN
    #LAKEHOUSE_SILVER_NAME#.SGESG_PartyType AS PartyType ON Party.PartyTypeId = PartyType.PartyTypeId
WHERE
    (EmployeeLocation.PeriodStartTimestamp BETWEEN '{report_start_date}' AND '{report_end_date}'
    OR EmployeeLocation.PeriodEndTimestamp BETWEEN '{report_start_date}' AND '{report_end_date}'
    OR '{report_start_date}' BETWEEN EmployeeLocation.PeriodStartTimestamp AND EmployeeLocation.PeriodEndTimestamp
    OR '{report_end_date}' BETWEEN EmployeeLocation.PeriodStartTimestamp AND EmployeeLocation.PeriodEndTimestamp)
GROUP BY
    Gender.GenderName,
    SubdivisionCategory.SubdivisionCategoryName,
    PartyType.PartyTypeName;
"""

# Execute the query
result = spark.sql(query)
result.createOrReplaceTempView("EmployeeByPartyTypeAndRegionAndGender")
# result.show()

StatementMeta(, 6f401221-c0ba-4eac-a9de-af20153c8d6f, 23, Finished, Available)

### `S1-6-51-b-1`

In [20]:
query = """
SELECT 'S1-6-51-b-1' AS MetricId, SubdivisionCategoryName, GenderName, PartyTypeName, EmployeeCount as MetricValue
FROM EmployeeByPartyTypeAndRegionAndGender
WHERE PartyTypeName = 'Permanent employee'
"""

S1651b1 = spark.sql(query)
S1651b1.createOrReplaceTempView("S1651b1")
# S1651b1.show()

StatementMeta(, 6f401221-c0ba-4eac-a9de-af20153c8d6f, 24, Finished, Available)

### `S1-6-51-b-2`

In [21]:
query = """
SELECT 'S1-6-51-b-2' AS MetricId, SubdivisionCategoryName, GenderName, PartyTypeName, EmployeeCount as MetricValue
FROM EmployeeByPartyTypeAndRegionAndGender
WHERE PartyTypeName = 'Temporary employee'
"""
S1651b2 = spark.sql(query)
S1651b2.createOrReplaceTempView("S1651b2")
# S1651b2.show()

StatementMeta(, 6f401221-c0ba-4eac-a9de-af20153c8d6f, 25, Finished, Available)

### `S1-6-51-b-3`

In [22]:
query = """
SELECT 'S1-6-51-b-3' AS MetricId, SubdivisionCategoryName, GenderName, PartyTypeName, EmployeeCount as MetricValue
FROM EmployeeByPartyTypeAndRegionAndGender
WHERE PartyTypeName = 'Non-guaranteed hours employee'
"""

S1651b3 = spark.sql(query)
S1651b3.createOrReplaceTempView("S1651b3")
# S1651b3.show()

StatementMeta(, 6f401221-c0ba-4eac-a9de-af20153c8d6f, 26, Finished, Available)

### `S1-6-52-a`

In [23]:
query = """
SELECT 'S1-6-52-a' AS MetricId, SubdivisionCategoryName, GenderName, PartyTypeName, EmployeeCount as MetricValue
FROM EmployeeByPartyTypeAndRegionAndGender
WHERE PartyTypeName = 'Full-time employee'
"""

S1652a = spark.sql(query)
S1652a.createOrReplaceTempView("S1652a")
# S1652a.show()

StatementMeta(, 6f401221-c0ba-4eac-a9de-af20153c8d6f, 27, Finished, Available)

### `S1-6-52-b`

In [24]:
query = """
SELECT 'S1-6-52-b' AS MetricId, SubdivisionCategoryName, GenderName, PartyTypeName, EmployeeCount as MetricValue
FROM EmployeeByPartyTypeAndRegionAndGender
WHERE PartyTypeName = 'Part-time employee'
"""

S1652b = spark.sql(query)
S1652b.createOrReplaceTempView("S1652b")
# S1652b.show()

StatementMeta(, 6f401221-c0ba-4eac-a9de-af20153c8d6f, 28, Finished, Available)

### `S1-12-76`

In [25]:
total_number_of_employees = spark.sql("""SELECT COUNT(DISTINCT e.EmployeeId) AS EmployeeCount FROM #LAKEHOUSE_SILVER_NAME#.SGESG_Employee e""").collect()[0]["EmployeeCount"]
# print(total_number_of_employees)

StatementMeta(, 6f401221-c0ba-4eac-a9de-af20153c8d6f, 29, Finished, Available)

In [26]:
# Query to calculate the percentage of employees with disability
query = f"""
    SELECT 'S1-12-76' AS MetricId, (COUNT(DISTINCT e.EmployeeId))*100/'{total_number_of_employees}' AS MetricValue
    FROM #LAKEHOUSE_SILVER_NAME#.SGESG_Employee e
    JOIN #LAKEHOUSE_SILVER_NAME#.SGESG_PartyDisability pd ON e.PartyId = pd.PartyId
    WHERE
    (pd.PartyDisabilityPeriodStartDate BETWEEN '{report_start_date}' AND '{report_end_date}'
    OR pd.PartyDisabilityPeriodEndDate BETWEEN '{report_start_date}' AND '{report_end_date}'
    OR '{report_start_date}' BETWEEN pd.PartyDisabilityPeriodStartDate AND pd.PartyDisabilityPeriodEndDate
    OR '{report_end_date}' BETWEEN pd.PartyDisabilityPeriodStartDate AND pd.PartyDisabilityPeriodEndDate)
"""

# Execute the query
S11276 = spark.sql(query)
S11276.createOrReplaceTempView("S11276")
# S11276.show()

StatementMeta(, 6f401221-c0ba-4eac-a9de-af20153c8d6f, 30, Finished, Available)

### `S1-12-77`

In [27]:
# Query to calculate the percentage of employees with disability for each GenderName
query = f"""
    SELECT 'S1-12-77' AS MetricId, g.GenderName, (COUNT(DISTINCT e.EmployeeId))*100/'{total_number_of_employees}' AS MetricValue
    FROM #LAKEHOUSE_SILVER_NAME#.SGESG_Employee e
    JOIN #LAKEHOUSE_SILVER_NAME#.SGESG_Gender g ON g.GenderId = e.GenderId
    JOIN #LAKEHOUSE_SILVER_NAME#.SGESG_PartyDisability pd ON e.PartyId = pd.PartyId
    WHERE
    (pd.PartyDisabilityPeriodStartDate BETWEEN '{report_start_date}' AND '{report_end_date}'
    OR pd.PartyDisabilityPeriodEndDate BETWEEN '{report_start_date}' AND '{report_end_date}'
    OR '{report_start_date}' BETWEEN pd.PartyDisabilityPeriodStartDate AND pd.PartyDisabilityPeriodEndDate
    OR '{report_end_date}' BETWEEN pd.PartyDisabilityPeriodStartDate AND pd.PartyDisabilityPeriodEndDate)
    GROUP BY g.GenderName
"""

# Execute the query
S11277 = spark.sql(query)
S11277.createOrReplaceTempView("S11277")
# S11277.show()

StatementMeta(, 6f401221-c0ba-4eac-a9de-af20153c8d6f, 31, Finished, Available)

### `S1-13-80-a`

In [28]:
# Query to calculate the percentage of employees that participated in regular performance and career development reviews
query = f"""
SELECT 
    'S1-13-80-a' AS MetricId, PartyType.PartyTypeName, Gender.GenderName, (COUNT(DISTINCT e.EmployeeId))*100/'{total_number_of_employees}' AS MetricValue
FROM 
    #LAKEHOUSE_SILVER_NAME#.SGESG_Employee e
JOIN
    #LAKEHOUSE_SILVER_NAME#.SGESG_Gender AS Gender ON e.GenderId = Gender.GenderId
JOIN
    #LAKEHOUSE_SILVER_NAME#.SGESG_Party AS Party ON Party.PartyId = e.PartyId
JOIN
    #LAKEHOUSE_SILVER_NAME#.SGESG_PartyType AS PartyType ON Party.PartyTypeId = PartyType.PartyTypeId
JOIN 
    #LAKEHOUSE_SILVER_NAME#.SGESG_PartyEvent pe ON e.PartyId = pe.PartyId
JOIN 
    #LAKEHOUSE_SILVER_NAME#.SGESG_EventType et ON pe.EventTypeId = pe.EventTypeId
WHERE
    (et.EventTypeName = 'Regular performance and career development reviews')
    AND
    (
        pe.PartyEventStartTimestamp BETWEEN '{report_start_date}' AND '{report_end_date}'
        OR pe.PartyEventEndTimestamp BETWEEN '{report_start_date}' AND '{report_end_date}'
        OR '{report_start_date}' BETWEEN pe.PartyEventStartTimestamp AND pe.PartyEventEndTimestamp
        OR '{report_end_date}' BETWEEN pe.PartyEventStartTimestamp AND pe.PartyEventEndTimestamp
    )
GROUP BY
    Gender.GenderName,
    PartyType.PartyTypeName;
"""

# Execute the query
S11380a = spark.sql(query)
S11380a.createOrReplaceTempView("S11380a")
# S11380a.show()

StatementMeta(, 6f401221-c0ba-4eac-a9de-af20153c8d6f, 32, Finished, Available)

### `S1-15-88-a`

In [29]:
# Query to calculate the percentage of employees entitled to take family-related leaves
query = f"""
SELECT 
    'S1-15-88-a' AS MetricId, (COUNT(DISTINCT e.EmployeeId))*100/'{total_number_of_employees}' AS MetricValue
FROM 
    #LAKEHOUSE_SILVER_NAME#.SGESG_Employee e
JOIN 
    #LAKEHOUSE_SILVER_NAME#.SGESG_PartyEvent pe ON e.PartyId = pe.PartyId
JOIN 
    #LAKEHOUSE_SILVER_NAME#.SGESG_EventType et ON pe.EventTypeId = pe.EventTypeId
WHERE
    (et.EventTypeName = 'Family-related leave')
    AND
    (
        pe.PartyEventStartTimestamp BETWEEN '{report_start_date}' AND '{report_end_date}'
        OR pe.PartyEventEndTimestamp BETWEEN '{report_start_date}' AND '{report_end_date}'
        OR '{report_start_date}' BETWEEN pe.PartyEventStartTimestamp AND pe.PartyEventEndTimestamp
        OR '{report_end_date}' BETWEEN pe.PartyEventStartTimestamp AND pe.PartyEventEndTimestamp
    )
"""

# Execute the query
S11588a = spark.sql(query)
S11588a.createOrReplaceTempView("S11588a")
# S11588a.show()

StatementMeta(, 6f401221-c0ba-4eac-a9de-af20153c8d6f, 33, Finished, Available)

### `S1-15-88-b`

In [30]:
# Query to calculate the percentage of employees entitled to take family-related leaves
query = f"""
SELECT 
    'S1-15-88-b' AS MetricId,
    g.GenderName, 
    (COUNT(DISTINCT e.EmployeeId))*100/'{total_number_of_employees}' AS MetricValue
FROM 
    #LAKEHOUSE_SILVER_NAME#.SGESG_Employee e
JOIN 
    #LAKEHOUSE_SILVER_NAME#.SGESG_Gender g ON g.GenderId = e.GenderId
JOIN 
    #LAKEHOUSE_SILVER_NAME#.SGESG_PartyEvent pe ON e.PartyId = pe.PartyId
JOIN 
    #LAKEHOUSE_SILVER_NAME#.SGESG_EventType et ON pe.EventTypeId = pe.EventTypeId
WHERE
    (et.EventTypeName = 'Family-related leave')
    AND
    (
        pe.PartyEventStartTimestamp BETWEEN '{report_start_date}' AND '{report_end_date}'
        OR pe.PartyEventEndTimestamp BETWEEN '{report_start_date}' AND '{report_end_date}'
        OR '{report_start_date}' BETWEEN pe.PartyEventStartTimestamp AND pe.PartyEventEndTimestamp
        OR '{report_end_date}' BETWEEN pe.PartyEventStartTimestamp AND pe.PartyEventEndTimestamp
    )
GROUP BY 
    g.GenderName
"""

# Execute the query
S11588b = spark.sql(query)
S11588b.createOrReplaceTempView("S11588b")
# S11588b.show()

StatementMeta(, 6f401221-c0ba-4eac-a9de-af20153c8d6f, 34, Finished, Available)

### `S1-13-80-b`

In [31]:
query = f"""
    SELECT
    'S1-13-80-b' AS MetricId,
    Gender.GenderName,
    PartyType.PartyTypeName,
    SUM(PartyHealthSafetyTrainingQuantity)/SUM(DISTINCT Employee.EmployeeId) AS MetricValue
FROM
    #LAKEHOUSE_SILVER_NAME#.SGESG_Employee As Employee
JOIN
    #LAKEHOUSE_SILVER_NAME#.SGESG_Gender As Gender ON Employee.GenderId = Gender.GenderId
JOIN
    #LAKEHOUSE_SILVER_NAME#.SGESG_Party As Party ON Party.PartyId = Employee.PartyId
JOIN
    #LAKEHOUSE_SILVER_NAME#.SGESG_PartyType As PartyType ON Party.PartyTypeId = PartyType.PartyTypeId
JOIN
    #LAKEHOUSE_SILVER_NAME#.SGESG_PartyHealthSafetyTrainingMetric As PartyHealthSafetyTrainingMetric ON Employee.PartyId = PartyHealthSafetyTrainingMetric.PartyId
JOIN
    #LAKEHOUSE_SILVER_NAME#.SGESG_HealthSafetyTrainingMetricType AS HealthSafetyTrainingMetricType ON PartyHealthSafetyTrainingMetric.HealthSafetyTrainingMetricTypeId = HealthSafetyTrainingMetricType.HealthSafetyTrainingMetricTypeId
JOIN
    #LAKEHOUSE_SILVER_NAME#.SGESG_MetricPurpose As MetricPurpose ON MetricPurpose.MetricPurposeId = PartyHealthSafetyTrainingMetric.MetricPurposeId
WHERE
    HealthSafetyTrainingMetricType.HealthSafetyTrainingMetricTypeName = 'Number of training hours'
    AND MetricPurpose.MetricPurposeName = 'Actual'
    AND (PartyHealthSafetyTrainingMetric.PeriodStartDate BETWEEN '{report_start_date}' AND '{report_end_date}'
    OR PartyHealthSafetyTrainingMetric.PeriodEndDate BETWEEN '{report_start_date}' AND '{report_end_date}'
    OR '{report_start_date}' BETWEEN PartyHealthSafetyTrainingMetric.PeriodStartDate AND PartyHealthSafetyTrainingMetric.PeriodEndDate
    OR '{report_end_date}' BETWEEN PartyHealthSafetyTrainingMetric.PeriodStartDate AND PartyHealthSafetyTrainingMetric.PeriodEndDate)
GROUP BY
    Gender.GenderName,
    PartyType.PartyTypeName;
"""

# Execute the query
S11380b = spark.sql(query)
S11380b.createOrReplaceTempView("S11380b")
# S11380b.show()

StatementMeta(, 6f401221-c0ba-4eac-a9de-af20153c8d6f, 35, Finished, Available)

### `PartyBusinessMetricNameAndValue`

In [32]:
query = f"""
    SELECT bm.BusinessMetricName AS BusinessMetricName, pbm.PartyBusinessMetricValue AS BusinessMetricValue
    FROM #LAKEHOUSE_SILVER_NAME#.SGESG_PartyBusinessMetric pbm
    JOIN #LAKEHOUSE_SILVER_NAME#.SGESG_MetricPurpose mp ON mp.MetricPurposeId = pbm.MetricPurposeId
    JOIN #LAKEHOUSE_SILVER_NAME#.SGESG_BusinessMetric bm ON bm.BusinessMetricId = pbm.BusinessMetricId
    JOIN #LAKEHOUSE_SILVER_NAME#.SGESG_Party As Party ON Party.PartyId = pbm.PartyId
    JOIN #LAKEHOUSE_SILVER_NAME#.SGESG_PartyType AS PartyType ON Party.PartyTypeId = PartyType.PartyTypeId
    WHERE mp.MetricPurposeName = 'Actual' 
    AND PartyType.PartyTypeName = 'LegalEntity'
    AND YEAR('{report_start_date}') = YEAR(pbm.PeriodStartDate)
    AND YEAR('{report_end_date}') = YEAR(pbm.PeriodEndDate)
"""

# Execute the query
result = spark.sql(query)
result.createOrReplaceTempView("PartyBusinessMetricNameAndValue")
# result.show()

StatementMeta(, 6f401221-c0ba-4eac-a9de-af20153c8d6f, 36, Finished, Available)

### `S1-6-51-c`

In [33]:
query = """
    SELECT 'S1-6-51-c' AS MetricId, BusinessMetricName, BusinessMetricValue as MetricValue
    FROM PartyBusinessMetricNameAndValue
    WHERE BusinessMetricName = 'Total number of own employee turnover'
"""

# Execute the query
S1651c = spark.sql(query)
S1651c.createOrReplaceTempView("S1651c")
# S1651c.show()

StatementMeta(, 6f401221-c0ba-4eac-a9de-af20153c8d6f, 37, Finished, Available)

### `S1-8-60-a`

In [34]:
query = """
    SELECT 'S1-8-60-a' AS MetricId, BusinessMetricName, BusinessMetricValue as MetricValue
    FROM PartyBusinessMetricNameAndValue
    WHERE BusinessMetricName = 'Percentage of total employees covered by collective bargaining agreements'
"""

# Execute the query
S1860a = spark.sql(query)
S1860a.createOrReplaceTempView("S1860a")
# S1860a.show()

StatementMeta(, 6f401221-c0ba-4eac-a9de-af20153c8d6f, 38, Finished, Available)

### `S1-16-92-a`

In [35]:
query = """
    SELECT 'S1-16-92-a' AS MetricId, BusinessMetricName, BusinessMetricValue as MetricValue
    FROM PartyBusinessMetricNameAndValue
    WHERE BusinessMetricName = 'Male-female pay gap'
"""

# Execute the query
S11692a = spark.sql(query)
S11692a.createOrReplaceTempView("S11692a")
# S11692a.show()

StatementMeta(, 6f401221-c0ba-4eac-a9de-af20153c8d6f, 39, Finished, Available)

### `S1-15-92-b`

In [36]:
query = """
    SELECT 'S1-15-92-b' AS MetricId, BusinessMetricName, BusinessMetricValue as MetricValue
    FROM PartyBusinessMetricNameAndValue
    WHERE BusinessMetricName = 'Ratio of the annual total compensation ratio of the highest paid individual to the median annual total compensation for all employees'
"""

# Execute the query
S11592b = spark.sql(query)
S11592b.createOrReplaceTempView("S11592b")
# S11592b.show()

StatementMeta(, 6f401221-c0ba-4eac-a9de-af20153c8d6f, 40, Finished, Available)

### `S1-14-84-a`

In [37]:
query = """
    SELECT 'S1-14-84-a' AS MetricId, BusinessMetricName, BusinessMetricValue as MetricValue
    FROM PartyBusinessMetricNameAndValue
    WHERE 
    BusinessMetricName = 'Percentage of employees covered by health and safety management system'
    OR BusinessMetricName = 'Percentage of non-employees covered by health and safety management system'
"""

# Execute the query
S11484a = spark.sql(query)
S11484a.createOrReplaceTempView("S11484a")
# S11484a.show()

StatementMeta(, 6f401221-c0ba-4eac-a9de-af20153c8d6f, 41, Finished, Available)

### `S1-8-62-a`

In [38]:
query = f"""
    SELECT 'S1-8-62-a' AS MetricId, bm.BusinessMetricName AS BusinessMetricName, Country.IsoCountryName, SUM(pbm.PartyBusinessMetricValue) AS MetricValue
    FROM #LAKEHOUSE_SILVER_NAME#.SGESG_PartyBusinessMetric pbm
    JOIN #LAKEHOUSE_SILVER_NAME#.SGESG_MetricPurpose mp ON mp.MetricPurposeId = pbm.MetricPurposeId
    JOIN #LAKEHOUSE_SILVER_NAME#.SGESG_BusinessMetric bm ON bm.BusinessMetricId = pbm.BusinessMetricId
    JOIN #LAKEHOUSE_SILVER_NAME#.SGESG_Party AS Party ON Party.PartyId = pbm.PartyId
    JOIN #LAKEHOUSE_SILVER_NAME#.SGESG_PartyType AS PartyType ON Party.PartyTypeId = PartyType.PartyTypeId
    JOIN #LAKEHOUSE_SILVER_NAME#.SGESG_PartyOrganization po ON po.PartyOrganizationPartyId = pbm.PartyId
    JOIN #LAKEHOUSE_SILVER_NAME#.SGESG_Location AS Location ON Location.LocationId = po.LocationId
    JOIN #LAKEHOUSE_SILVER_NAME#.SGESG_Country AS Country ON Country.CountryId = Location.CountryId
    WHERE mp.MetricPurposeName = 'Actual' 
    AND PartyType.PartyTypeName = 'OrganizationalUnit'
    AND bm.BusinessMetricName = 'Global percentage of employees covered at the establishment level by workers representatives'
    AND YEAR('{report_start_date}') = YEAR(pbm.PeriodStartDate)
    AND YEAR('{report_end_date}') = YEAR(pbm.PeriodEndDate)
    GROUP BY Country.IsoCountryName, bm.BusinessMetricName
"""

# Execute the query
S1862a = spark.sql(query)
S1862a.createOrReplaceTempView("S1862a")
# S1862a.show()

StatementMeta(, 6f401221-c0ba-4eac-a9de-af20153c8d6f, 42, Finished, Available)

### `G1-4-1-a`

In [39]:
# Filter the incidents based on the given criteria in G1-4-1-a.
query = f"""
    SELECT 'G1-4-1-a' AS MetricId, PartyTypeName, COUNT(IncidentId) AS MetricValue
    FROM IncidentInReportDuration
    WHERE (IncidentTypeName = 'Corruption' OR IncidentTypeName = 'Bribery')
      AND (PartyTypeName = 'Employee' OR PartyTypeName = 'Non-employee')
    GROUP BY PartyTypeName
"""


# Execute the query
G141a = spark.sql(query)
G141a.createOrReplaceTempView("G141a")
# G141a.show()

StatementMeta(, 6f401221-c0ba-4eac-a9de-af20153c8d6f, 43, Finished, Available)

### `G1-4-1-b`

In [40]:
# Filter the incidents based on the given criteria in G1-4-1-b.
query = f"""
    SELECT 'G1-4-1-b' AS MetricId, ict.IncidentCostTypeName, SUM(ic.IncidentCostAmount) AS MetricValue
    FROM IncidentInReportDuration i
    JOIN #LAKEHOUSE_SILVER_NAME#.SGESG_IncidentCost ic ON i.IncidentId = ic.IncidentId
    JOIN #LAKEHOUSE_SILVER_NAME#.SGESG_IncidentCostType ict ON ic.IncidentCostTypeId = ict.IncidentCostTypeId
    WHERE (IncidentTypeName = 'Corruption' OR IncidentTypeName = 'Bribery')
      AND (ict.IncidentCostTypeName = 'Fine')
    GROUP BY IncidentCostTypeName
"""


# Execute the query
G141b = spark.sql(query)
G141b.createOrReplaceTempView("G141b")
# G141b.show()

StatementMeta(, 6f401221-c0ba-4eac-a9de-af20153c8d6f, 44, Finished, Available)

### `G1-4-1-c`

In [41]:
# Filter the incidents based on the given criteria in G1-4-1-c.
query = f"""
    SELECT 'G1-4-1-c' AS MetricId, PartyTypeName, COUNT(IncidentId) AS MetricValue
    FROM IncidentInReportDuration
    WHERE (IncidentTypeName = 'Public/legal case related to corruption or bribery' OR IncidentTypeName = 'Public/legal case related to corruption' OR IncidentTypeName = 'Public/legal case related to bribery')
      AND (PartyTypeName = 'Employee' OR PartyTypeName = 'Non-employee')
    GROUP BY PartyTypeName
"""


# Execute the query
G141c = spark.sql(query)
G141c.createOrReplaceTempView("G141c")
# G141c.show()

StatementMeta(, 6f401221-c0ba-4eac-a9de-af20153c8d6f, 45, Finished, Available)

### `G1-4-1-d`

In [42]:
# Filter the incidents based on the given criteria in G1-4-1-d.
query = f"""
    SELECT 'G1-4-1-d' AS MetricId, PartyTypeName, COUNT(IncidentId) AS MetricValue
    FROM IncidentInReportDuration
    WHERE (IncidentTypeName = 'Workers disciplined' OR IncidentTypeName = 'Workers dismissed' OR IncidentTypeName = 'Workers disciplined/dismissed')
      AND (PartyTypeName = 'Employee' OR PartyTypeName = 'Non-employee')
    GROUP BY PartyTypeName
"""


# Execute the query
G141d = spark.sql(query)
G141d.createOrReplaceTempView("G141d")
# G141d.show()

StatementMeta(, 6f401221-c0ba-4eac-a9de-af20153c8d6f, 46, Finished, Available)

### `G1-4-1-e`

In [43]:
# Filter the incidents based on the given criteria in G1-4-1-e.
query = f"""
    SELECT 'G1-4-1-e' AS MetricId, PartyTypeName, COUNT(IncidentId) AS MetricValue
    FROM IncidentInReportDuration
    WHERE (IncidentTypeName = 'contractors/business partners terminated or not renewed due to corruption or bribery')
      AND (PartyTypeName = 'Employee' OR PartyTypeName = 'Non-employee')
    GROUP BY PartyTypeName
"""


# Execute the query
G141e = spark.sql(query)
G141e.createOrReplaceTempView("G141e")
# G141e.show()

StatementMeta(, 6f401221-c0ba-4eac-a9de-af20153c8d6f, 47, Finished, Available)

### `G1-6-1-a`

In [44]:
query = """
    SELECT 'G1-6-1-a' AS MetricId, BusinessMetricName, BusinessMetricValue AS MetricValue
    FROM PartyBusinessMetricNameAndValue
    WHERE BusinessMetricName = 'Average time the undertaking takes to pay an invoice from the date when the contractual or statutory term of payment starts to be calculated, in number of days'
"""
# Execute the query
G161a = spark.sql(query)
G161a.createOrReplaceTempView("G161a")
# G161a.show()

StatementMeta(, 6f401221-c0ba-4eac-a9de-af20153c8d6f, 48, Finished, Available)

### `G1-6-1-c`

In [45]:
query = """
    SELECT 'G1-6-1-c' AS MetricId, BusinessMetricName, BusinessMetricValue AS MetricValue
    FROM PartyBusinessMetricNameAndValue
    WHERE BusinessMetricName = 'Number of legal proceedings (currently outstanding) for late payments'
"""
# Execute the query
G161c = spark.sql(query)
G161c.createOrReplaceTempView("G161c")
# G161c.show()

StatementMeta(, 6f401221-c0ba-4eac-a9de-af20153c8d6f, 49, Finished, Available)

## Combine the individual metrics dataFrames

In [46]:
# Combine the individual metrics dataFrames
output1 = S11484b.unionAll(S11484c)\
    .unionAll(S11484d).unionAll(S11484e)\
    .unionAll(S11798a).unionAll(S11798b)\
    .unionAll(S11799a).unionAll(G141a)\
    .unionAll(G141c).unionAll(G141d)\
    .unionAll(G141e)
output1 = output1.withColumn("ReportStartDate", lit(report_start_date)) \
    .withColumn("ReportEndDate", lit(report_end_date))

output2 = S11798c.unionAll(S11799b).unionAll(G141b)
output2 = output2.withColumn("ReportStartDate", lit(report_start_date)) \
    .withColumn("ReportEndDate", lit(report_end_date))

output3 = S1965a.withColumn("ReportStartDate", lit(report_start_date)) \
    .withColumn("ReportEndDate", lit(report_end_date))

output4 = S1965b1.unionAll(S1965b2)\
    .unionAll(S1965b3)
output4 = output4.withColumn("ReportStartDate", lit(report_start_date)) \
    .withColumn("ReportEndDate", lit(report_end_date))

output5 = S1651a.withColumn("ReportStartDate", lit(report_start_date)) \
    .withColumn("ReportEndDate", lit(report_end_date))

output6 = S1651b1.unionAll(S1651b2)\
    .unionAll(S1651b3).unionAll(S1652a)\
    .unionAll(S1652b)
output6 = output6.withColumn("ReportStartDate", lit(report_start_date)) \
    .withColumn("ReportEndDate", lit(report_end_date))

output7 = S11276.unionAll(S11588a)
output7 = output7.withColumn("ReportStartDate", lit(report_start_date)) \
    .withColumn("ReportEndDate", lit(report_end_date))

output8 = S11277.unionAll(S11588b)
output8 = output8.withColumn("ReportStartDate", lit(report_start_date)) \
    .withColumn("ReportEndDate", lit(report_end_date))

output9 = S11380a.unionAll(S11380b)
output9 = output9.withColumn("ReportStartDate", lit(report_start_date)) \
    .withColumn("ReportEndDate", lit(report_end_date))

output10 = S1651c.unionAll(S1860a)\
    .unionAll(S11692a).unionAll(S11592b)\
    .unionAll(S11484a).unionAll(G161a).unionAll(G161c)
output10 = output10.withColumn("ReportStartDate", lit(report_start_date)) \
    .withColumn("ReportEndDate", lit(report_end_date))

output11 = S1862a.withColumn("ReportStartDate", lit(report_start_date)) \
    .withColumn("ReportEndDate", lit(report_end_date))

# Show the final output tables
#output1.show()
# output2.show()
# output3.show()
# output4.show()
# output5.show()
# output6.show()
# output7.show()
# output8.show()
# output9.show()
# output10.show()
# output11.show()

StatementMeta(, 6f401221-c0ba-4eac-a9de-af20153c8d6f, 50, Finished, Available)

In [47]:
TotalFatalities_PerPartyType=output1 
TotalIncidentCost_PerIncidentType=output2 
TotalNumber_OfEmp_WithKeyEmployeeIndicator_PerGender=output3  
TotalNumver_OfEmployee_PerAgeGroup=output4
TotalNumber_OfEmployee_PerCountry_ByGender=output5
TotalNumber_OfEmployee_PerSubdivisionCategory_ByGenderAndPartyType=output6
TotalPercentage_OfEmployees_WithDisability=output7
TotalPercentage_OfEmployees_EntitledToTakeFamilyRelatedLeaves=output8
TotalPercentage_OfEmployeeParticipated_PerParty_ByGender=output9
TotalPercentage_PerBusiness=output10
TotalPercentage_PerBusinessByCountry=output11

StatementMeta(, 6f401221-c0ba-4eac-a9de-af20153c8d6f, 51, Finished, Available)

In [48]:


df = spark.sql("SELECT * FROM #LAKEHOUSE_GOLD_NAME#.sgesg_totalpercentage_perbusiness LIMIT 1000")
display(df)

StatementMeta(, 6f401221-c0ba-4eac-a9de-af20153c8d6f, 52, Finished, Available)

AnalysisException: Table or view not found: Lakehouse_Gold.SGESG_totalpercentage_perbusiness; line 1 pos 14;
'GlobalLimit 1000
+- 'LocalLimit 1000
   +- 'Project [*]
      +- 'UnresolvedRelation [Lakehouse_Gold, SGESG_totalpercentage_perbusiness], [], false


In [None]:
# df4=output11.select(col("ReportStartDate"),col("ReportEndDate"))

# df4.write.format("delta").mode("append").saveAsTable('#LAKEHOUSE_SILVER_NAME#.SGESG_Reporting_Year')



In [51]:
TotalFatalities_PerPartyType=output1 
TotalIncidentCost_PerIncidentType=output2 
TotalNumber_OfEmp_WithKeyEmployeeIndicator_PerGender=output3  
TotalNumber_OfEmployee_PerAgeGroup=output4
TotalNumber_OfEmployee_PerCountry_ByGender=output5
TotalNumber_OfEmployee_PerSubdivisionCategory_ByGenderAndPartyType=output6
TotalPercentage_OfEmployees_WithDisability=output7
TotalPercentage_OfEmployees_EntitledToTakeFamilyRelatedLeaves=output8
TotalPercentage_OfEmployeeParticipated_PerParty_ByGender=output9
TotalPercentage_PerBusiness=output10
TotalPercentage_PerBusinessByCountry=output11

StatementMeta(, 6f401221-c0ba-4eac-a9de-af20153c8d6f, 55, Finished, Available)

In [52]:
# Export the final output tables to gold layer in data lake.
outputs = [TotalFatalities_PerPartyType, TotalIncidentCost_PerIncidentType,TotalNumber_OfEmp_WithKeyEmployeeIndicator_PerGender,TotalNumber_OfEmployee_PerAgeGroup,TotalNumber_OfEmployee_PerCountry_ByGender
,TotalNumber_OfEmployee_PerSubdivisionCategory_ByGenderAndPartyType,TotalPercentage_OfEmployees_WithDisability,TotalPercentage_OfEmployees_EntitledToTakeFamilyRelatedLeaves,TotalPercentage_OfEmployeeParticipated_PerParty_ByGender,
TotalPercentage_PerBusiness,TotalPercentage_PerBusinessByCountry]
TableName =["TotalFatalities_PerPartyType", "TotalIncidentCost_PerIncidentType","TotalNumber_OfEmp_WithKeyEmployeeIndicator_PerGender","TotalNumber_OfEmployee_PerAgeGroup","TotalNumber_OfEmployee_PerCountry_ByGender"
,"TotalNumber_OfEmployee_PerSubdivisionCategory_ByGenderAndPartyType","TotalPercentage_OfEmployees_WithDisability","TotalPercentage_OfEmployees_EntitledToTakeFamilyRelatedLeaves","TotalPercentage_OfEmployeeParticipated_PerParty_ByGender",
"TotalPercentage_PerBusiness","TotalPercentage_PerBusinessByCountry"]


for i, output in enumerate(outputs):
    if i==0:
        output.write.format("delta").mode("overwrite").saveAsTable('#LAKEHOUSE_GOLD_NAME#.SGESG_'+TableName[i]) 
    
        print(TableName[i])
    elif i==1:
        output.write.format("delta").mode("overwrite").saveAsTable('#LAKEHOUSE_GOLD_NAME#.SGESG_'+TableName[i]) 
             
        print(TableName[i])
    elif i==2:
        # output=output.withColumn("ReportStartDate",to_date(unix_timestamp(col('ReportStartDate'), 'MM-dd-yyyy').cast("timestamp"))).withColumn("ReportEndDate",to_date(unix_timestamp(col('ReportEndDate'), 'MM-dd-yyyy').cast("timestamp")))
        output.write.format("delta").mode("overwrite").saveAsTable('#LAKEHOUSE_GOLD_NAME#.SGESG_'+TableName[i])  
        print(TableName[i])
    elif i==3:
        # output=output.withColumn("ReportStartDate",to_date(unix_timestamp(col('ReportStartDate'), 'MM-dd-yyyy').cast("timestamp"))).withColumn("ReportEndDate",to_date(unix_timestamp(col('ReportEndDate'), 'MM-dd-yyyy').cast("timestamp")))
        output.write.format("delta").mode("overwrite").saveAsTable('#LAKEHOUSE_GOLD_NAME#.SGESG_'+TableName[i]) 
        print(TableName[i])
    elif i==4:
        # output=output.withColumn("ReportStartDate",to_date(unix_timestamp(col('ReportStartDate'), 'MM-dd-yyyy').cast("timestamp"))).withColumn("ReportEndDate",to_date(unix_timestamp(col('ReportEndDate'), 'MM-dd-yyyy').cast("timestamp")))
        output.write.format("delta").mode("overwrite").saveAsTable('#LAKEHOUSE_GOLD_NAME#.SGESG_'+TableName[i])  
        print(TableName[i])
    elif i==5:
        # output=output.withColumn("ReportStartDate",to_date(unix_timestamp(col('ReportStartDate'), 'MM-dd-yyyy').cast("timestamp"))).withColumn("ReportEndDate",to_date(unix_timestamp(col('ReportEndDate'), 'MM-dd-yyyy').cast("timestamp")))
        output.write.format("delta").mode("overwrite").saveAsTable('#LAKEHOUSE_GOLD_NAME#.SGESG_'+TableName[i])
        print(TableName[i])
    elif i==6:
        # output=output.withColumn("ReportStartDate",to_date(unix_timestamp(col('ReportStartDate'), 'MM-dd-yyyy').cast("timestamp"))).withColumn("ReportEndDate",to_date(unix_timestamp(col('ReportEndDate'), 'MM-dd-yyyy').cast("timestamp")))
        output.write.format("delta").mode("overwrite").saveAsTable('#LAKEHOUSE_GOLD_NAME#.SGESG_'+TableName[i])  
              
        print(TableName[i])
    elif i==7:
        # output=output.withColumn("ReportStartDate",to_date(unix_timestamp(col('ReportStartDate'), 'MM-dd-yyyy').cast("timestamp"))).withColumn("ReportEndDate",to_date(unix_timestamp(col('ReportEndDate'), 'MM-dd-yyyy').cast("timestamp")))
        output.write.format("delta").mode("overwrite").saveAsTable('#LAKEHOUSE_GOLD_NAME#.SGESG_'+TableName[i])        
        print(TableName[i])
    elif i==8:
        # output=output.withColumn("ReportStartDate",to_date(unix_timestamp(col('ReportStartDate'), 'MM-dd-yyyy').cast("timestamp"))).withColumn("ReportEndDate",to_date(unix_timestamp(col('ReportEndDate'), 'MM-dd-yyyy').cast("timestamp")))
        output.write.format("delta").mode("overwrite").saveAsTable('#LAKEHOUSE_GOLD_NAME#.SGESG_'+TableName[i])        
        print(TableName[i])
    elif i==9:
        # output=output.withColumn("ReportStartDate",to_date(unix_timestamp(col('ReportStartDate'), 'MM-dd-yyyy').cast("timestamp"))).withColumn("ReportEndDate",to_date(unix_timestamp(col('ReportEndDate'), 'MM-dd-yyyy').cast("timestamp")))
        output.write.format("delta").mode("overwrite").saveAsTable('#LAKEHOUSE_GOLD_NAME#.SGESG_'+TableName[i])        
        print(TableName[i])
    elif i==10:
        # output=output.withColumn("ReportStartDate",to_date(unix_timestamp(col('ReportStartDate'), 'MM-dd-yyyy').cast("timestamp"))).withColumn("ReportEndDate",to_date(unix_timestamp(col('ReportEndDate'), 'MM-dd-yyyy').cast("timestamp")))
        output.write.format("delta").mode("overwrite").saveAsTable('#LAKEHOUSE_GOLD_NAME#.SGESG_'+TableName[i])        
        print(TableName[i])
    elif i==11:
        # output=output.withColumn("ReportStartDate",to_date(unix_timestamp(col('ReportStartDate'), 'MM-dd-yyyy').cast("timestamp"))).withColumn("ReportEndDate",to_date(unix_timestamp(col('ReportEndDate'), 'MM-dd-yyyy').cast("timestamp")))
        output.write.format("delta").mode("overwrite").saveAsTable('#LAKEHOUSE_GOLD_NAME#.SGESG_'+TableName[i])        
        print(TableName[i])

print("Data Loaded in Gold Layer")
    
    #output.write.format("delta").mode("overwrite").saveAsTable('#LAKEHOUSE_GOLD_NAME#.SGESG_'+table.name)
#     print(output)
    #exportData(output, destinationFolderUrl + "/sgdata" + str(i+1))


StatementMeta(, 6f401221-c0ba-4eac-a9de-af20153c8d6f, 56, Finished, Available)

TotalFatalities_PerPartyType
TotalIncidentCost_PerIncidentType
TotalNumber_OfEmp_WithKeyEmployeeIndicator_PerGender
TotalNumber_OfEmployee_PerAgeGroup
TotalNumber_OfEmployee_PerCountry_ByGender
TotalNumber_OfEmployee_PerSubdivisionCategory_ByGenderAndPartyType
TotalPercentage_OfEmployees_WithDisability
TotalPercentage_OfEmployees_EntitledToTakeFamilyRelatedLeaves
TotalPercentage_OfEmployeeParticipated_PerParty_ByGender
TotalPercentage_PerBusiness
TotalPercentage_PerBusinessByCountry
Data Loaded in Gold Layer
