### Create Custom Mapping Dimension

In [6]:
from pyspark.sql import SparkSession
from pyspark.sql.functions import (col, monotonically_increasing_id, count as sparkCount)

In [2]:
spark = SparkSession.builder.appName("fact_mapping").getOrCreate()

In [26]:
df_custom_mapping = spark.read.option("header",True).option("inferSchema",True).csv("source/Custom_Mapping_DIM.csv")
df_custom_mapping.show()

+---------------+-----------+------------+-----------------+
|ProductCategory|StoreRegion|CustomerType|     MappingLabel|
+---------------+-----------+------------+-----------------+
|         Tablet|       East|      Retail|        Side Head|
|         Laptop|       West|      Retail|      Premium Box|
|         Mobile|      North|   Wholesale|North Dist Mobile|
|         Tablet|      South|      Retail|      Tablet Push|
|         Laptop|      North|   Wholesale|    Laptop Supply|
+---------------+-----------+------------+-----------------+



#### Ensure that mapping are unique in nature

In [27]:
df_custom_mapping_check = df_custom_mapping.groupBy([col("ProductCategory"),col("StoreRegion"),col("CustomerType")]).agg(sparkCount(col("MappingLabel")).alias("count"))

In [28]:
count_duplicate = df_custom_mapping_check.filter(col("count") > 1).count()

In [29]:
if count_duplicate > 0:
    print("Handle Duplicate Mapping combinations")

#### Add Surrogate key and N/A for Mapping Table

In [30]:
df_custom_mapping3 = df_custom_mapping.withColumn("CustomMappingKey",monotonically_increasing_id())

In [31]:
df_mapping_na = spark.createDataFrame([
    ("N/A","N/A","N/A","N/A",-1)
],["MappingLabel","ProductCategory","StoreRegion","CustomerType", "CustomMappingKey"])

In [32]:
dim_custom_mapping = df_custom_mapping3.unionAll(df_mapping_na)

In [33]:
dim_custom_mapping.show()

+---------------+-----------+------------+-----------------+----------------+
|ProductCategory|StoreRegion|CustomerType|     MappingLabel|CustomMappingKey|
+---------------+-----------+------------+-----------------+----------------+
|         Tablet|       East|      Retail|        Side Head|               0|
|         Laptop|       West|      Retail|      Premium Box|               1|
|         Mobile|      North|   Wholesale|North Dist Mobile|               2|
|         Tablet|      South|      Retail|      Tablet Push|               3|
|         Laptop|      North|   Wholesale|    Laptop Supply|               4|
|            N/A|        N/A|         N/A|              N/A|              -1|
+---------------+-----------+------------+-----------------+----------------+



In [39]:
dim_custom_mapping.write.mode("overwrite").parquet("warehouse/dim_custom_mapping")

In [40]:
spark.read.parquet("dim_custom_mapping").show()

+---------------+-----------+------------+-----------------+----------------+
|ProductCategory|StoreRegion|CustomerType|     MappingLabel|CustomMappingKey|
+---------------+-----------+------------+-----------------+----------------+
|         Tablet|       East|      Retail|        Side Head|               0|
|         Laptop|       West|      Retail|      Premium Box|               1|
|         Mobile|      North|   Wholesale|North Dist Mobile|               2|
|         Tablet|      South|      Retail|      Tablet Push|               3|
|         Laptop|      North|   Wholesale|    Laptop Supply|               4|
|            N/A|        N/A|         N/A|              N/A|              -1|
+---------------+-----------+------------+-----------------+----------------+

