## Retail Data Management with Unity Catalog

This notebook demonstrates how to build a mini project for retail data management using Databricks Unity Catalog.

We create a structured data lakehouse with the following steps:

- **Create a catalog** (retail_uc) - acts as the project's top-level container.
- **Create schemas** (bronze, silver, gold) - to organize data by stages of processing.
- **Create and manage tables** such as customers and transactions in the bronze schema.
- **Insert and query data** to validate the setup.
- Check existing catalogs, schemas, and tables in Unity Catalog.

This setup forms the foundation of a retail analytics platform, where:

- **Bronze layer** holds raw ingested data,
- **Silver layer** stores cleaned and enriched data,
- **Gold layer** powers business reporting and dashboards.

Create a new catalog for the project

In [0]:
%sql
CREATE CATALOG IF NOT EXISTS retail_uc
COMMENT 'Mini project: retail data governed by Unity Catalog';

Give yourself ownership so you can proceed (replace with your user or group if needed)

In [0]:
%sql
ALTER CATALOG retail_uc OWNER TO `ragavivenugopal123@gmail.com`;

Create Schemas (Bronze/Silver/Gold + Secure)

In [0]:
%sql
CREATE SCHEMA IF NOT EXISTS retail_uc.bronze COMMENT 'Raw-ish landing zone';

In [0]:
%sql
CREATE SCHEMA IF NOT EXISTS retail_uc.silver COMMENT 'Cleaned/curated';

In [0]:
%sql
CREATE SCHEMA IF NOT EXISTS retail_uc.gold COMMENT 'Business-ready marts';

In [0]:
%sql
CREATE SCHEMA IF NOT EXISTS retail_uc.secure COMMENT 'Dynamic views for masking/row-level security';

Create sample source tables (Bronze)

In [0]:
%sql
USE CATALOG retail_uc;
USE SCHEMA bronze;

In [0]:
SELECT current_catalog(), current_schema();

current_catalog(),current_schema()
retail_uc,bronze


In [0]:
SHOW CATALOGS;

catalog
retail_uc
samples
system
workspace


In [0]:
SHOW SCHEMAS IN retail_uc;

databaseName
bronze
default
gold
information_schema
secure
silver


In [0]:
SHOW TABLES IN retail_uc.bronze;

database,tableName,isTemporary
bronze,customers,False
,_sqldf,True


In [0]:
USE CATALOG retail_uc;
USE SCHEMA bronze;

Customers with PII (email, phone).

In [0]:
%sql
CREATE OR REPLACE TABLE customers AS
SELECT * FROM VALUES
  (1, 'Asha',     'APAC', 'asha@example.com',     '+91-90000-00001'),
  (2, 'Ravi',     'APAC', 'ravi@example.com',     '+91-90000-00002'),
  (3, 'Chloe',    'EMEA', 'chloe@example.co.uk',  '+44-7000-000003'),
  (4, 'Diego',    'AMER', 'diego@example.com',    '+1-202-000-0004'),
  (5, 'Meera',    'APAC', 'meera@example.com',    '+91-90000-00005')
AS t(customer_id, customer_name, region, email, phone);

num_affected_rows,num_inserted_rows


In [0]:
SELECT * FROM retail_uc.bronze.customers;

customer_id,customer_name,region,email,phone
1,Asha,APAC,asha@example.com,+91-90000-00001
2,Ravi,APAC,ravi@example.com,+91-90000-00002
3,Chloe,EMEA,chloe@example.co.uk,+44-7000-000003
4,Diego,AMER,diego@example.com,+1-202-000-0004
5,Meera,APAC,meera@example.com,+91-90000-00005


In [0]:
COMMENT ON TABLE customers IS 'Bronze customers (contains PII).';

In [0]:
ALTER TABLE customers SET TBLPROPERTIES ('classification'='pii', 'owner_team'='data_stewards');

Transactions 

In [0]:
USE CATALOG retail_uc;
USE SCHEMA bronze;

In [0]:
CREATE OR REPLACE TABLE transactions AS
SELECT * FROM VALUES
  (101, 1, '2025-08-01',  120.50, 'online',  'APAC'),
  (102, 1, '2025-08-02',   75.00, 'store',   'APAC'),
  (103, 2, '2025-08-01',  210.00, 'online',  'APAC'),
  (104, 3, '2025-08-01',  330.00, 'store',   'EMEA'),
  (105, 4, '2025-08-03',   55.90, 'online',  'AMER'),
  (106, 5, '2025-08-03',  499.00, 'store',   'APAC')
AS t(txn_id, customer_id, txn_date, amount, channel, region);

num_affected_rows,num_inserted_rows


In [0]:
SELECT * FROM retail_uc.bronze.customers;

customer_id,customer_name,region,email,phone
1,Asha,APAC,asha@example.com,+91-90000-00001
2,Ravi,APAC,ravi@example.com,+91-90000-00002
3,Chloe,EMEA,chloe@example.co.uk,+44-7000-000003
4,Diego,AMER,diego@example.com,+1-202-000-0004
5,Meera,APAC,meera@example.com,+91-90000-00005


In [0]:
SELECT * FROM retail_uc.bronze.transactions;

txn_id,customer_id,txn_date,amount,channel,region
101,1,2025-08-01,120.5,online,APAC
102,1,2025-08-02,75.0,store,APAC
103,2,2025-08-01,210.0,online,APAC
104,3,2025-08-01,330.0,store,EMEA
105,4,2025-08-03,55.9,online,AMER
106,5,2025-08-03,499.0,store,APAC


In [0]:
COMMENT ON TABLE transactions IS 'Bronze transactions.';

In [0]:
ALTER TABLE transactions SET TBLPROPERTIES ('quality'='raw');

Quality constraint example (no negative amounts)

In [0]:
ALTER TABLE transactions ADD CONSTRAINT chk_amount_nonneg CHECK (amount >= 0);

In [0]:
GRANT SELECT ON TABLE customers   TO `ragavivenugopal123@gmail.com`;
GRANT SELECT ON TABLE transactions TO `ragavivenugopal123@gmail.com`; 
GRANT MODIFY ON TABLE transactions TO `ragavivenugopal123@gmail.com`;

Curate to Silver (cleansed/denormalized)

In [0]:
USE CATALOG retail_uc;
USE SCHEMA bronze;

In [0]:
SELECT current_catalog(), current_schema();

current_catalog(),current_schema()
retail_uc,silver


In [0]:
SHOW CATALOGS;

catalog
retail_uc
samples
system
workspace


In [0]:
SHOW SCHEMAS IN retail_uc;

databaseName
bronze
default
gold
information_schema
secure
silver


In [0]:
SHOW TABLES IN retail_uc.bronze;

database,tableName,isTemporary
bronze,transactions,False
,_sqldf,True


In [0]:
USE CATALOG retail_uc;
USE SCHEMA silver;

In [0]:
CREATE OR REPLACE TABLE sales AS
SELECT
  t.txn_id,
  t.txn_date,
  t.amount,
  t.channel,
  t.region,
  c.customer_id,
  c.customer_name
FROM retail_uc.bronze.transactions t
JOIN retail_uc.bronze.customers c
  ON c.customer_id = t.customer_id;

num_affected_rows,num_inserted_rows


In [0]:
SELECT current_catalog(), current_schema();


current_catalog(),current_schema()
retail_uc,silver


In [0]:
SHOW TABLES IN retail_uc.bronze;


database,tableName,isTemporary
bronze,customers,False
,_sqldf,True


In [0]:

COMMENT ON TABLE sales IS 'Silver: joined and cleaned transactions with customer names.';

Aggregate to Gold (business-ready marts)

In [0]:
USE CATALOG retail_uc;
USE SCHEMA gold;

Daily totals

In [0]:
CREATE OR REPLACE TABLE daily_sales AS
SELECT
  DATE(txn_date) AS date,
  SUM(amount)    AS total_amount,
  COUNT(*)       AS txn_count
FROM retail_uc.silver.sales
GROUP BY DATE(txn_date);

num_affected_rows,num_inserted_rows


In [0]:
COMMENT ON TABLE daily_sales IS 'Gold: daily rollups';

Region KPI

In [0]:
CREATE OR REPLACE TABLE region_kpis AS
SELECT
  region,
  SUM(amount)         AS total_amount,
  AVG(amount)         AS avg_ticket,
  COUNT(DISTINCT customer_id) AS unique_customers
FROM retail_uc.silver.sales
GROUP BY region;

num_affected_rows,num_inserted_rows


In [0]:
COMMENT ON TABLE region_kpis IS 'Gold: region-level KPIs';

Data masking & row-level security (Secure schema)

In [0]:
USE CATALOG retail_uc;
USE SCHEMA secure;

Column masking dynamic view

In [0]:
CREATE OR REPLACE VIEW customers_masked AS
SELECT
  customer_id,
  customer_name,
  region,
  CASE
    WHEN is_account_group_member('data_stewards') OR is_account_group_member('data_scientists')
      THEN email
    ELSE 'REDACTED'
  END AS email,
  CASE
    WHEN is_account_group_member('data_stewards') OR is_account_group_member('data_scientists')
      THEN phone
    ELSE 'REDACTED'
  END AS phone
FROM retail_uc.bronze.customers;

In [0]:
COMMENT ON VIEW customers_masked IS 'Masks PII for non-privileged users';

Row-level security dynamic view on sales

In [0]:
CREATE OR REPLACE VIEW sales_rls AS
SELECT *
FROM retail_uc.silver.sales
WHERE
  -- Admins and stewards see all
  is_account_group_member('data_stewards')
  OR
  -- APAC analysts see APAC rows, EMEA → EMEA, AMER → AMER
  (is_account_group_member('analysts') AND region IN (
      CASE
        WHEN is_account_group_member('region_apac') THEN 'APAC'
        WHEN is_account_group_member('region_emea') THEN 'EMEA'
        WHEN is_account_group_member('region_amer') THEN 'AMER'
      END
  ))
  OR
  -- Data scientists see all regions
  is_account_group_member('data_scientists');

In [0]:
COMMENT ON VIEW sales_rls IS 'Row-level security by region using account groups';

Lineage (create a fresh derived asset)

In [0]:
USE CATALOG retail_uc;
USE SCHEMA gold;

In [0]:
CREATE OR REPLACE TABLE top_customers AS
SELECT
  customer_id,
  customer_name,
  SUM(amount) AS lifetime_value
FROM retail_uc.silver.sales
GROUP BY customer_id, customer_name
ORDER BY lifetime_value DESC;

num_affected_rows,num_inserted_rows


In [0]:
COMMENT ON TABLE top_customers IS 'Gold: top customers by LTV';

Discovery & audit examples (INFORMATION_SCHEMA)

List all tables in the catalog

In [0]:
SELECT table_catalog, table_schema, table_name, table_type
FROM retail_uc.INFORMATION_SCHEMA.TABLES
ORDER BY table_schema, table_name;

table_catalog,table_schema,table_name,table_type
retail_uc,bronze,customers,MANAGED
retail_uc,bronze,sales,MANAGED
retail_uc,bronze,transactions,MANAGED
retail_uc,gold,daily_sales,MANAGED
retail_uc,gold,region_kpis,MANAGED
retail_uc,gold,top_customers,MANAGED
retail_uc,information_schema,catalog_privileges,VIEW
retail_uc,information_schema,catalog_tags,VIEW
retail_uc,information_schema,catalogs,VIEW
retail_uc,information_schema,check_constraints,VIEW


See column definitions

In [0]:
SELECT table_schema, table_name, column_name, data_type
FROM retail_uc.INFORMATION_SCHEMA.COLUMNS
ORDER BY table_schema, table_name, ordinal_position;

table_schema,table_name,column_name,data_type
bronze,customers,customer_id,INT
bronze,customers,customer_name,STRING
bronze,customers,region,STRING
bronze,customers,email,STRING
bronze,customers,phone,STRING
bronze,sales,txn_id,INT
bronze,sales,txn_date,STRING
bronze,sales,amount,DECIMAL
bronze,sales,channel,STRING
bronze,sales,region,STRING


Show who has what on a specific table

In [0]:
SHOW GRANTS ON TABLE retail_uc.bronze.customers;

Principal,ActionType,ObjectType,ObjectKey


Find objects marked as PII

In [0]:
SELECT table_schema, table_name, table_type
FROM retail_uc.INFORMATION_SCHEMA.TABLES
WHERE table_catalog = 'retail_uc';

table_schema,table_name,table_type
information_schema,tables,VIEW
information_schema,schema_tags,VIEW
information_schema,table_constraints,VIEW
information_schema,routine_privileges,VIEW
information_schema,information_schema_catalog_name,VIEW
gold,daily_sales,MANAGED
bronze,transactions,MANAGED
bronze,sales,MANAGED
information_schema,row_filters,VIEW
information_schema,routine_columns,VIEW
