In [1]:
# Script to initialize the Data Warehouse\Lakehouse and create the required tables

In [2]:
# Import required libraries
from pyspark.sql import SparkSession
from lib.spark_session import get_spark_session

In [3]:
# Generate SparkSession
spark: SparkSession = get_spark_session("Drop Databases")
print("SPARK_APP: Spark Session UI - "+ spark.sparkContext.uiWebUrl)

SPARK_APP: Spark Session UI - http://46346aae9d54:4040


In [4]:
# Create dw schema in catalog
spark.sql("create database if not exists edw");
spark.sql("create database if not exists edw_stg");
spark.sql("create database if not exists edw_ld");
spark.sql("show databases").show();

+---------+
|namespace|
+---------+
|  default|
|      edw|
|   edw_ld|
|  edw_stg|
+---------+



In [5]:
# Create Store Dim table
spark.sql("""drop table if exists edw.dim_store""");

spark.sql("""
create table edw.dim_store (
    row_wid string,
    store_id string,
    store_name string,
    address string,
    city string,
    state string,
    zip_code string,
    phone_number string,
    insert_dt timestamp,
    update_dt timestamp
)
USING delta
;
""");

print("SPARK-APP: Store dimension created")

SPARK-APP: Store dimension created


In [6]:
# Create Plan Type Dimension
spark.sql("""drop table if exists edw.dim_plan_type""");
spark.sql("""
create table edw.dim_plan_type (
    plan_type_code string,
    plan_name string,
    insert_dt timestamp,
    update_dt timestamp
)
USING delta
;
""");

print("SPARK-APP: Plan Type dimension created")

SPARK-APP: Plan Type dimension created


In [7]:
# Create Date Dimension
spark.sql("""drop table if exists edw.dim_date""");
spark.sql("""
create table edw.dim_date (
    row_wid string,
    date date,
    day int,
    month int,
    year int,
    day_of_week string,
    insert_dt timestamp,
    update_dt timestamp
)
USING delta
;
""");

print("SPARK-APP: Date dimension created")

SPARK-APP: Date dimension created


In [8]:
# Create Product Dimension
spark.sql("""drop table if exists edw.dim_product""");
spark.sql("""
create table edw.dim_product (
    row_wid string,
    product_id string,
    product_name string,
    brand string,
    type string,
    flavor string,
    size string,
    price bigint,
    expiration_dt date,
    image_url string,
    effective_start_dt date,
    effective_end_dt date,
    active_flg int,
    insert_dt timestamp,
    update_dt timestamp
)
USING delta
;
""");

print("SPARK-APP: Product dimension created")

SPARK-APP: Product dimension created


In [9]:
# Create Customer Dimension
spark.sql("""drop table if exists edw.dim_customer""");
spark.sql("""
create table edw.dim_customer (
    row_wid string,
    customer_id string,
    first_name string,
    last_name string,
    address string,
    city string,
    state string,
    zip_code string,
    phone_number string,
    email string,
    date_of_birth date,
    plan_type string,
    effective_start_dt date,
    effective_end_dt date,
    active_flg int,
    insert_dt timestamp,
    update_dt timestamp
)
USING delta
;
""");

print("SPARK-APP: Customer dimension created")

SPARK-APP: Customer dimension created


In [10]:
# Create Sales Fact
spark.sql("""drop table if exists edw.fact_sales""");
spark.sql("""
create table edw.fact_sales (
    date_wid string,
    product_wid string,
    store_wid string,
    customer_wid string,
    order_id string,
    invoice_num string,
    qty int,
    tax double,
    discount double,
    line_total double,
    integration_key string,
    insert_dt timestamp
)
USING delta
;
""");

print("SPARK-APP: Sales Fact created")

SPARK-APP: Sales Fact created


In [11]:
# Create Audit table
spark.sql("""drop table if exists edw.job_control""");
spark.sql("""
create table edw.job_control (
    schema_name string,
    table_name string,
    max_timestamp timestamp,
    insert_dt timestamp
)
USING delta
;
""");

print("SPARK-APP: JOB Control table created")

SPARK-APP: JOB Control table created


In [12]:
# Log all tables in Data Warehouse/Lakehouse

spark.sql("show tables in edw").show()

+---------+-------------+-----------+
|namespace|    tableName|isTemporary|
+---------+-------------+-----------+
|      edw| dim_customer|      false|
|      edw|     dim_date|      false|
|      edw|dim_plan_type|      false|
|      edw|  dim_product|      false|
|      edw|    dim_store|      false|
|      edw|   fact_sales|      false|
|      edw|  job_control|      false|
+---------+-------------+-----------+



In [13]:
spark.stop()