In [0]:
%run "../0 - SETUP/0 - Setup"

### 1. Create The Sales Table Schema

In [0]:
lakebase_catalog_name = CATALOG_NAME
catalog_name = ANALYTICS_CATALOG_NAME

lakebase_schema_name= POSTGRES_SCHEMA
schema_name = 'inventory_analytics'

In [0]:

spark.sql(f"CREATE CATALOG IF NOT EXISTS {catalog_name}")
spark.sql(f'USE CATALOG {catalog_name}')


spark.sql(f"CREATE SCHEMA IF NOT EXISTS {schema_name}")
spark.sql(f"USE SCHEMA {schema_name}")


In [0]:
%sql
CREATE  OR REPLACE TABLE store_sales_temp
AS 
SELECT * FROM samples.tpcds_sf1.store_sales

In [0]:
spark.sql(f"""CREATE OR REPLACE TABLE items_temp
          AS SELECT DISTINCT item_name from {lakebase_catalog_name}.{lakebase_schema_name}.inventory_items""")

### 2. Update Data Surrogate Keys to Current Date

In [0]:
%sql
WITH bounds AS (
  SELECT MIN(d_date) AS old_start, MAX(d_date) AS old_end
  FROM store_sales_temp ss
  JOIN samples.tpcds_sf1.date_dim dd
    ON ss.ss_sold_date_sk = dd.d_date_sk
),
calc AS (
  SELECT old_start, old_end,
        DATEDIFF(DAY, old_start, old_end) AS old_span,
        current_date() - 1  AS new_end
  FROM bounds
)
,date_shifter AS
(
  SELECT DISTINCT dd.d_date_sk AS original_date_sk ,
    dd.d_date AS original_date,
    CAST(DATEADD(
        DAY,
        DATEDIFF(DAY, c.old_start, dd.d_date),
        DATEADD(DAY, -DATEDIFF(DAY, c.old_start, c.old_end), c.new_end)
    ) AS DATE) AS shifted_date
  FROM store_sales_temp ss
      JOIN samples.tpcds_sf1.date_dim dd
      ON ss.ss_sold_date_sk = dd.d_date_sk
  CROSS JOIN calc c
)

MERGE INTO store_sales_temp AS ss
USING (
  SELECT ds.original_date_sk, dd.d_date_sk AS new_date_sk
  FROM date_shifter ds
  JOIN samples.tpcds_sf1.date_dim dd
    ON ds.shifted_date = dd.d_date
) AS src
ON ss.ss_sold_date_sk = src.original_date_sk
WHEN MATCHED THEN
  UPDATE SET ss.ss_sold_date_sk = src.new_date_sk

### 2.1 Validate if the update worked as expected

In [0]:
%sql
SELECT max(d_date) , min(d_date)
FROM store_sales_temp ss
JOIN samples.tpcds_sf1.date_dim dd
  ON ss.ss_sold_date_sk = dd.d_date_sk
ORDER BY 1 DESC

In [0]:
spark.sql(f"""
CREATE OR REPLACE TABLE store_sales
AS
SELECT DISTINCT d_date AS date, i_category_id as category_id, CAST(FLOOR(rand() * 22) + 1 AS INT) AS warehouse_id,itms.item_name, ss.*
FROM samples.tpcds_sf1.item i
JOIN store_sales_temp ss
  ON ss.ss_item_sk = i.i_item_sk
JOIN samples.tpcds_sf1.date_dim dd
  ON ss.ss_sold_date_sk = dd.d_date_sk
CROSS JOIN items_temp itms
WHERE i_category IS NOT NULL AND i_category_id IS NOT NULL
"""
)

In [0]:
%sql
SELECT * FROM store_sales LIMIT 10

In [0]:
%sql
DROP TABLE IF EXISTS store_sales_temp;
DROP TABLE IF EXISTS items_temp

### Next Steps
- Since synthetic data generation is completed. Proceed to the DemandForecasting notebook 