### Create Custom Mapping Dimension

In [1]:
from pyspark.sql import SparkSession
from pyspark.sql.functions import (col, monotonically_increasing_id)

In [2]:
spark = SparkSession.builder.appName("fact_mapping").getOrCreate()

In [3]:
df_custom_mapping = spark.read.option("header",True).option("inferSchema",True).csv("source/Custom_Mapping_DIM.csv")
df_custom_mapping.show()

+---------------+-----------+------------+-----------------+
|ProductCategory|StoreRegion|CustomerType|     MappingLabel|
+---------------+-----------+------------+-----------------+
|         Tablet|       East|      Retail|        Side Head|
|         Laptop|       West|      Retail|      Premium Box|
|         Mobile|      North|   Wholesale|North Dist Mobile|
|         Tablet|      South|      Retail|      Tablet Push|
|         Laptop|      North|   Wholesale|    Laptop Supply|
+---------------+-----------+------------+-----------------+



In [5]:
df_custom_mapping = df_category.withColumn("MappingKey", monotonically_increasing_id())

#### Add Dimension Keys for Mapping Columns

In [11]:
df_dim_product_category = spark.read.parquet("spark-warehouse/dim_product_category")
df_dim_store_region = spark.read.parquet("spark-warehouse/dim_store_region")
df_dim_customer_type = spark.read.parquet("spark-warehouse/dim_customer_type")

In [19]:
df_custom_mapping2 = df_custom_mapping.join(df_dim_product_category, \
    df_custom_mapping.ProductCategory == df_dim_product_category.ProductCategory, \
    "inner") \
    .join(df_dim_store_region, \
    df_custom_mapping.StoreRegion == df_dim_store_region.StoreRegion, \
    "inner") \
    .join(df_dim_customer_type, \
    df_custom_mapping.CustomerType == df_dim_customer_type.CustomerType, \
    "inner") \
    .select([col("MappingLabel"), col("ProductCategoryKey"),col("StoreRegionKey"),col("CustomerTypeKey")])

In [28]:
df_custom_mapping2.show()

+-----------------+------------------+--------------+---------------+
|     MappingLabel|ProductCategoryKey|StoreRegionKey|CustomerTypeKey|
+-----------------+------------------+--------------+---------------+
|    Laptop Supply|                 0|             0|              1|
|      Premium Box|                 0|             3|              0|
|North Dist Mobile|                 1|             0|              1|
|      Tablet Push|                 2|             1|              0|
|        Side Head|                 2|             2|              0|
+-----------------+------------------+--------------+---------------+



#### Add Surrogate key and N/A for Mapping Table

In [29]:
df_custom_mapping3 = df_custom_mapping2.withColumn("CustomMappingKey",monotonically_increasing_id())

In [30]:
df_mapping_na = spark.createDataFrame([
    ("N/A",-1,-1,-1,-1)
],["MappingLabel","ProductCategoryKey","StoreRegionKey","CustomerTypeKey", "CustomMappingKey"])

In [31]:
dim_custom_mapping = df_custom_mapping3.unionAll(df_mapping_na)

In [33]:
dim_custom_mapping.show()

+-----------------+------------------+--------------+---------------+----------------+
|     MappingLabel|ProductCategoryKey|StoreRegionKey|CustomerTypeKey|CustomMappingKey|
+-----------------+------------------+--------------+---------------+----------------+
|    Laptop Supply|                 0|             0|              1|               0|
|      Premium Box|                 0|             3|              0|               1|
|North Dist Mobile|                 1|             0|              1|               2|
|      Tablet Push|                 2|             1|              0|               3|
|        Side Head|                 2|             2|              0|               4|
|              N/A|                -1|            -1|             -1|              -1|
+-----------------+------------------+--------------+---------------+----------------+



In [35]:
dim_custom_mapping.write.format("parquet").mode("overwrite").saveAsTable(name="dim_custom_mapping")

In [36]:
spark.read.parquet("spark-warehouse/dim_custom_mapping").show()

+-----------------+------------------+--------------+---------------+----------------+
|     MappingLabel|ProductCategoryKey|StoreRegionKey|CustomerTypeKey|CustomMappingKey|
+-----------------+------------------+--------------+---------------+----------------+
|    Laptop Supply|                 0|             0|              1|               0|
|      Premium Box|                 0|             3|              0|               1|
|North Dist Mobile|                 1|             0|              1|               2|
|      Tablet Push|                 2|             1|              0|               3|
|        Side Head|                 2|             2|              0|               4|
|              N/A|                -1|            -1|             -1|              -1|
+-----------------+------------------+--------------+---------------+----------------+

