In [0]:
# Import modules and dependencies
from pyspark.sql.functions import col

In [0]:
# Load the data from the database
grouped_df = spark.sql("""
    SELECT `spark_catalog`.`warehousedb2`.`All_Sales`.`Year`,
         `spark_catalog`.`warehousedb2`.`All_Sales`.`Month`,
         `spark_catalog`.`warehousedb2`.`All_Sales`.`Category`,
         `spark_catalog`.`warehousedb2`.`All_Sales`.`Ship_Qty`,
         `spark_catalog`.`warehousedb2`.`All_Sales`.`Profit`,
         `spark_catalog`.`warehousedb2`.`All_Sales`.`Extended_price`,
         `spark_catalog`.`warehousedb2`.`All_Sales`.`Margin_Percentage_Current`,
         `spark_catalog`.`warehousedb2`.`All_Sales`.`Current_Cost`
     FROM `spark_catalog`.`warehousedb2`.`All_Sales`
""")

In [0]:
# Display the dataframe
display(grouped_df)

Year,Month,Category,Ship_Qty,Profit,Extended_price,Margin_Percentage_Current,Current_Cost
2018,1,PVC Chair Mat,1,12.901000000000002,33.95,38.0,21.049
2018,1,Entrance Mat,1,59.102,101.9,58.0,42.79800000000001
2018,1,Anti-Fatigue Mat,2,58.232,100.4,58.0,42.168000000000006
2018,1,Polycarbonate Chair Mat,2,117.0,260.0,45.0,143.0
2018,1,Polycarbonate Chair Mat,4,138.908,338.8,41.0,199.892
2018,1,Polycarbonate Chair Mat,2,94.92,226.0,42.0,131.07999999999998
2018,1,Polycarbonate Chair Mat,5,129.375,287.5,45.0,158.125
2018,1,Polycarbonate Chair Mat,1,30.36,69.0,44.0,38.64
2018,1,Recycled Chair Mat,6,141.12,294.0,48.0,152.88
2018,1,Recycled Chair Mat,3,102.0,204.0,50.0,102.0


In [0]:
# Group the data on Year, Month and Category columns
grouped_df = grouped_df.groupBy("Year", "Month", "Category").sum("Ship_Qty", "Profit", "Current_Cost")

# Sort the dataframe
grouped_df = grouped_df.orderBy("Year", "Month", "Category")

In [0]:
# Display the grouped dataframe
display(grouped_df)

Year,Month,Category,sum(Ship_Qty),sum(Profit),sum(Current_Cost)
2018,1,Anti-Fatigue Mat,149,4769.465000000001,4071.434999999998
2018,1,Desk Pad,138,935.0400000000002,833.7599999999999
2018,1,Entrance Mat,30,1434.7155000000002,1107.5845
2018,1,PVC Chair Mat,209,5654.6841,4266.165900000002
2018,1,Polycarbonate Chair Mat,96,4758.329599999999,4483.0104
2018,1,Porcelain Whiteboard,31,2539.577,2553.423000000001
2018,1,Recycled Chair Mat,16,552.0582000000002,462.9618
2018,1,Steel Whiteboard,86,5896.4535000000005,2932.4964999999984
2018,1,Tempered Glass Whiteboard,4,166.1635,119.4865
2018,2,Anti-Fatigue Mat,315,10632.959,7618.190999999998


# USE WIDGETS TO CREATE DROPDOWN FILTERS

In [0]:
# Remove any existing widgets
dbutils.widgets.removeAll()

In [0]:
# Extract the unique values in "Year" column and store them in a list
years = [x.Year for x in grouped_df.select('Year').distinct().collect()]

print(*years, sep=',')

2018,2019,2020,2021,2022


In [0]:
# Extract the unique values in "Category" column and store them in a list
categories = [x.Category for x in grouped_df.select('Category').distinct().collect()]

print(*categories, sep=',')

Desk Pad,Entrance Mat,PVC Chair Mat,Recycled Chair Mat,Tempered Glass Whiteboard,Anti-Fatigue Mat,Porcelain Whiteboard,Polycarbonate Chair Mat,Steel Whiteboard,Tempered Glass Chair Mat


In [0]:
# Create the Dropdown Filters and Visualizations for the Dashboard

# Create the filter for Years
dbutils.widgets.dropdown(name='year_filter', defaultValue='2018', choices=['2018', '2019', '2020', '2021','2022', 'All'], label='Select Year')

# Create the filter for Categories
dbutils.widgets.dropdown(name='category_filter', defaultValue='Anti-Fatigue Mat', choices=['Anti-Fatigue Mat','Desk Pad','Entrance Mat','Polycarbonate Chair Mat','Porcelain Whiteboard','PVC Chair Mat','Recycled Chair Mat','Steel Whiteboard','Tempered Glass Chair Mat', 'Tempered Glass Whiteboard', 'All categories'], label='Select Product Category')

# Save the dropdown widget values into variables
dropdown_filter_value_year = dbutils.widgets.get("year_filter")
dropdown_filter_value_category = dbutils.widgets.get("category_filter")

# Pass dropdown values with options
if (dropdown_filter_value_year == 'All') & (dropdown_filter_value_category == 'All categories'):
    display(grouped_df)
    
elif (dropdown_filter_value_year == 'All') & ((dropdown_filter_value_category == 'Anti-Fatigue Mat') | (dropdown_filter_value_category == 'Anti-Fatigue Mat') | (dropdown_filter_value_category == 'Desk Pad') | (dropdown_filter_value_category == 'Entrance Mat') | (dropdown_filter_value_category == 'Polycarbonate Chair Mat') | (dropdown_filter_value_category == 'Porcelain Whiteboard') | (dropdown_filter_value_category == 'PVC Chair Mat') | (dropdown_filter_value_category == 'Recycled Chair Mat') | (dropdown_filter_value_category == 'Steel Whiteboard') | (dropdown_filter_value_category == 'Tempered Glass Chair Mat') | (dropdown_filter_value_category == 'Tempered Glass Whiteboard')):
    display(grouped_df.filter(col('Category').isin(dropdown_filter_value_category)))
    
elif (dropdown_filter_value_category == 'All categories') & ((dropdown_filter_value_year == '2018') | (dropdown_filter_value_year == '2019') | (dropdown_filter_value_year == '2020') | (dropdown_filter_value_year == '2021') | (dropdown_filter_value_year == '2022')):
    display(grouped_df.filter(col('Year').isin(dropdown_filter_value_year)))
    
else:
    display(grouped_df.filter(col('Year').isin(dropdown_filter_value_year) & col('Category').isin(dropdown_filter_value_category)))

Year,Month,Category,sum(Ship_Qty),sum(Profit),sum(Current_Cost)
2018,1,Anti-Fatigue Mat,149,4769.465000000001,4071.434999999998
2018,1,Desk Pad,138,935.0400000000002,833.7599999999999
2018,1,Entrance Mat,30,1434.7155000000002,1107.5845
2018,1,PVC Chair Mat,209,5654.6841,4266.165900000002
2018,1,Polycarbonate Chair Mat,96,4758.329599999999,4483.0104
2018,1,Porcelain Whiteboard,31,2539.577,2553.423000000001
2018,1,Recycled Chair Mat,16,552.0582000000002,462.9618
2018,1,Steel Whiteboard,86,5896.4535000000005,2932.4964999999984
2018,1,Tempered Glass Whiteboard,4,166.1635,119.4865
2018,2,Anti-Fatigue Mat,315,10632.959,7618.190999999998


Databricks visualization. Run in Databricks to view.

Databricks visualization. Run in Databricks to view.

Databricks visualization. Run in Databricks to view.

Databricks visualization. Run in Databricks to view.

Databricks visualization. Run in Databricks to view.

Databricks visualization. Run in Databricks to view.

Databricks visualization. Run in Databricks to view.

Databricks visualization. Run in Databricks to view.

Databricks visualization. Run in Databricks to view.

Databricks visualization. Run in Databricks to view.

In [0]:
# Create Visualizations for Product Category

# Create a copy of the grouped_df dataframe
product_categories = grouped_df.alias("product_categories")
 
# Display the data
display(product_categories)

Year,Month,Category,sum(Ship_Qty),sum(Profit),sum(Current_Cost)
2018,1,Anti-Fatigue Mat,149,4769.465000000001,4071.434999999998
2018,1,Desk Pad,138,935.0400000000002,833.7599999999999
2018,1,Entrance Mat,30,1434.7155000000002,1107.5845
2018,1,PVC Chair Mat,209,5654.6841,4266.165900000002
2018,1,Polycarbonate Chair Mat,96,4758.329599999999,4483.0104
2018,1,Porcelain Whiteboard,31,2539.577,2553.423000000001
2018,1,Recycled Chair Mat,16,552.0582000000002,462.9618
2018,1,Steel Whiteboard,86,5896.4535000000005,2932.4964999999984
2018,1,Tempered Glass Whiteboard,4,166.1635,119.4865
2018,2,Anti-Fatigue Mat,315,10632.959,7618.190999999998


Databricks visualization. Run in Databricks to view.

Databricks visualization. Run in Databricks to view.