In [0]:
%sql
--- 1. Ensure Schema exists
CREATE SCHEMA IF NOT EXISTS retail_chain_catalog.silver_schema;

--- 2. Create the Silver table
CREATE TABLE IF NOT EXISTS retail_chain_catalog.silver_schema.silver_customers_cleaned (
  CustomerID BIGINT,
  FirstName STRING,
  LastName STRING,
  DateOfBirth DATE,
  AddressID BIGINT,
  CustomerTypeID INT,
  _updated_at TIMESTAMP
) USING DELTA;

--- 3. Perform the cleaning and upsert logic
MERGE INTO retail_chain_catalog.silver_schema.silver_customers_cleaned AS target
USING (
  SELECT 
    CAST(CustomerID AS BIGINT) AS CustomerID,
    INITCAP(TRIM(FirstName)) AS FirstName,
    INITCAP(TRIM(LastName)) AS LastName,
    
    -- FIX: Use try_cast to handle 'NaT' or malformed strings by returning NULL
    CAST(try_cast(DateOfBirth AS TIMESTAMP) AS DATE) AS DateOfBirth, 
    
    CAST(AddressID AS BIGINT) AS AddressID,
    CAST(CustomerTypeID AS INT) AS CustomerTypeID,
    CURRENT_TIMESTAMP() AS _updated_at
  FROM retail_chain_catalog.bronze_schema.customers
  WHERE CustomerID IS NOT NULL
) AS source
ON target.CustomerID = source.CustomerID
WHEN MATCHED THEN
  UPDATE SET 
    target.FirstName = source.FirstName,
    target.LastName = source.LastName,
    target.DateOfBirth = source.DateOfBirth,
    target.AddressID = source.AddressID,
    target.CustomerTypeID = source.CustomerTypeID,
    target._updated_at = source._updated_at
WHEN NOT MATCHED THEN
  INSERT (CustomerID, FirstName, LastName, DateOfBirth, AddressID, CustomerTypeID, _updated_at)
  VALUES (source.CustomerID, source.FirstName, source.LastName, source.DateOfBirth, source.AddressID, source.CustomerTypeID, source._updated_at);

In [0]:

%sql
--- 1. Create the Silver Transactions table with strict financial types
CREATE TABLE IF NOT EXISTS retail_chain_catalog.silver_schema.silver_transactions_cleaned (
  TransactionID STRING,
  AccountOriginID BIGINT,
  AccountDestinationID BIGINT,
  TransactionTypeID INT,
  Amount DECIMAL(18,2), -- Important for financial accuracy
  TransactionDate TIMESTAMP,
  BranchID BIGINT,
  Description STRING,
  _updated_at TIMESTAMP
) USING DELTA;

--- 2. Perform the cleaning and upsert logic
MERGE INTO retail_chain_catalog.silver_schema.silver_transactions_cleaned AS target
USING (
  SELECT 
    TRIM(TransactionID) AS TransactionID,
    CAST(AccountOriginID AS BIGINT) AS AccountOriginID,
    CAST(AccountDestinationID AS BIGINT) AS AccountDestinationID,
    CAST(TransactionTypeID AS INT) AS TransactionTypeID,
    -- Ensure Amount is handled as a decimal to avoid rounding issues
    CAST(Amount AS DECIMAL(18,2)) AS Amount,
    -- Handling 'NaT' and timestamp strings safely
    try_cast(TransactionDate AS TIMESTAMP) AS TransactionDate,
    CAST(BranchID AS BIGINT) AS BranchID,
    -- Handle messy description text
    COALESCE(INITCAP(TRIM(Description)), 'No Description Provided') AS Description,
    CURRENT_TIMESTAMP() AS _updated_at
  FROM retail_chain_catalog.bronze_schema.transactions
  WHERE TransactionID IS NOT NULL
) AS source
ON target.TransactionID = source.TransactionID
WHEN MATCHED THEN
  UPDATE SET 
    target.AccountOriginID = source.AccountOriginID,
    target.AccountDestinationID = source.AccountDestinationID,
    target.TransactionTypeID = source.TransactionTypeID,
    target.Amount = source.Amount,
    target.TransactionDate = source.TransactionDate,
    target.BranchID = source.BranchID,
    target.Description = source.Description,
    target._updated_at = source._updated_at
WHEN NOT MATCHED THEN
  INSERT (
    TransactionID, AccountOriginID, AccountDestinationID, TransactionTypeID, 
    Amount, TransactionDate, BranchID, Description, _updated_at
  )
  VALUES (
    source.TransactionID, source.AccountOriginID, source.AccountDestinationID, source.TransactionTypeID, 
    source.Amount, source.TransactionDate, source.BranchID, source.Description, source._updated_at
  );

In [0]:
%sql
--- 1. Create the Silver table with the correct Financial Type (DECIMAL)
CREATE TABLE IF NOT EXISTS retail_chain_catalog.silver_schema.silver_accounts_cleaned (
  AccountID BIGINT,
  CustomerID BIGINT,
  AccountTypeID INT,
  AccountStatusID INT,
  Balance DECIMAL(18,2), -- Ensure this is NOT BIGINT
  OpeningDate DATE,
  _updated_at TIMESTAMP
) USING DELTA;

--- 2. Perform the cleaning and upsert logic
MERGE INTO retail_chain_catalog.silver_schema.silver_accounts_cleaned AS target
USING (
  SELECT 
    -- Double cast IDs just in case they have ".0"
    CAST(CAST(AccountID AS DECIMAL) AS BIGINT) AS AccountID,
    CAST(CAST(CustomerID AS DECIMAL) AS BIGINT) AS CustomerID,
    CAST(CAST(AccountTypeID AS DECIMAL) AS INT) AS AccountTypeID,
    CAST(CAST(AccountStatusID AS DECIMAL) AS INT) AS AccountStatusID,
    
    -- FIX: Ensure Balance is cast to DECIMAL, not BIGINT
    CAST(COALESCE(try_cast(Balance AS DECIMAL(18,2)), 0) AS DECIMAL(18,2)) AS Balance,
    
    -- Safe dating
    CAST(try_cast(OpeningDate AS TIMESTAMP) AS DATE) AS OpeningDate,
    CURRENT_TIMESTAMP() AS _updated_at
  FROM retail_chain_catalog.bronze_schema.accounts
  WHERE AccountID IS NOT NULL
) AS source
ON target.AccountID = source.CustomerID -- Ensure this matches your PK
WHEN MATCHED THEN
  UPDATE SET 
    target.CustomerID = source.CustomerID,
    target.AccountTypeID = source.AccountTypeID,
    target.AccountStatusID = source.AccountStatusID,
    target.Balance = source.Balance,
    target.OpeningDate = source.OpeningDate,
    target._updated_at = source._updated_at
WHEN NOT MATCHED THEN
  INSERT (AccountID, CustomerID, AccountTypeID, AccountStatusID, Balance, OpeningDate, _updated_at)
  VALUES (source.AccountID, source.CustomerID, source.AccountTypeID, source.AccountStatusID, source.Balance, source.OpeningDate, source._updated_at);

In [0]:
%sql
--- 1. Create the Silver Loans table
CREATE TABLE IF NOT EXISTS retail_chain_catalog.silver_schema.silver_loans_cleaned (
  LoanID BIGINT,
  AccountID BIGINT,
  LoanStatusID INT,
  PrincipalAmount DECIMAL(18,2),
  InterestRate DECIMAL(5,2), -- e.g., 12.55%
  StartDate DATE,
  EstimatedEndDate DATE,
  _updated_at TIMESTAMP
) USING DELTA;

--- 2. Perform the cleaning and upsert logic
MERGE INTO retail_chain_catalog.silver_schema.silver_loans_cleaned AS target
USING (
  SELECT 
    -- Double cast IDs to handle strings like '1001.0'
    CAST(CAST(LoanID AS DECIMAL) AS BIGINT) AS LoanID,
    CAST(CAST(AccountID AS DECIMAL) AS BIGINT) AS AccountID,
    CAST(CAST(LoanStatusID AS DECIMAL) AS INT) AS LoanStatusID,
    
    -- Safe financial casting
    CAST(COALESCE(try_cast(PrincipalAmount AS DECIMAL(18,2)), 0) AS DECIMAL(18,2)) AS PrincipalAmount,
    CAST(COALESCE(try_cast(InterestRate AS DECIMAL(5,2)), 0) AS DECIMAL(5,2)) AS InterestRate,
    
    -- Safe dating (handles 'NaT' or timestamp strings)
    CAST(try_cast(StartDate AS TIMESTAMP) AS DATE) AS StartDate,
    CAST(try_cast(EstimatedEndDate AS TIMESTAMP) AS DATE) AS EstimatedEndDate,
    
    CURRENT_TIMESTAMP() AS _updated_at
  FROM retail_chain_catalog.bronze_schema.loans
  WHERE LoanID IS NOT NULL
) AS source
ON target.LoanID = source.LoanID
WHEN MATCHED THEN
  UPDATE SET 
    target.AccountID = source.AccountID,
    target.LoanStatusID = source.LoanStatusID,
    target.PrincipalAmount = source.PrincipalAmount,
    target.InterestRate = source.InterestRate,
    target.StartDate = source.StartDate,
    target.EstimatedEndDate = source.EstimatedEndDate,
    target._updated_at = source._updated_at
WHEN NOT MATCHED THEN
  INSERT (LoanID, AccountID, LoanStatusID, PrincipalAmount, InterestRate, StartDate, EstimatedEndDate, _updated_at)
  VALUES (source.LoanID, source.AccountID, source.LoanStatusID, source.PrincipalAmount, source.InterestRate, source.StartDate, source.EstimatedEndDate, source._updated_at);

In [0]:
%sql
--- 1. Create the Silver Address table
CREATE TABLE IF NOT EXISTS retail_chain_catalog.silver_schema.silver_address_cleaned (
  AddressID BIGINT,
  Street STRING,
  City STRING,
  Country STRING,
  _updated_at TIMESTAMP
) USING DELTA;

--- 2. Perform the cleaning and upsert logic
MERGE INTO retail_chain_catalog.silver_schema.silver_address_cleaned AS target
USING (
  SELECT 
    -- Double cast to handle IDs that look like '101.0'
    CAST(CAST(AddressID AS DECIMAL) AS BIGINT) AS AddressID,
    -- Clean up text: remove extra spaces and fix casing (e.g., " new york " -> "New York")
    INITCAP(TRIM(Street)) AS Street,
    INITCAP(TRIM(City)) AS City,
    UPPER(TRIM(Country)) AS Country, -- Country codes/names usually look better in UPPER
    CURRENT_TIMESTAMP() AS _updated_at
  FROM retail_chain_catalog.bronze_schema.addresses
  WHERE AddressID IS NOT NULL
) AS source
ON target.AddressID = source.AddressID
WHEN MATCHED THEN
  UPDATE SET 
    target.Street = source.Street,
    target.City = source.City,
    target.Country = source.Country,
    target._updated_at = source._updated_at
WHEN NOT MATCHED THEN
  INSERT (AddressID, Street, City, Country, _updated_at)
  VALUES (source.AddressID, source.Street, source.City, source.Country, source._updated_at);

In [0]:
%sql
--- 1. Create the Silver Branches table
CREATE TABLE IF NOT EXISTS retail_chain_catalog.silver_schema.silver_branches_cleaned (
  BranchID BIGINT,
  BranchName STRING,
  AddressID BIGINT,
  _updated_at TIMESTAMP
) USING DELTA;

--- 2. Perform the cleaning and upsert logic
MERGE INTO retail_chain_catalog.silver_schema.silver_branches_cleaned AS target
USING (
  SELECT 
    -- Double cast IDs to handle potential '1.0' string issues
    CAST(CAST(BranchID AS DECIMAL) AS BIGINT) AS BranchID,
    -- Standardize names (e.g., " downtown branch " -> "Downtown Branch")
    INITCAP(TRIM(BranchName)) AS BranchName,
    CAST(CAST(AddressID AS DECIMAL) AS BIGINT) AS AddressID,
    CURRENT_TIMESTAMP() AS _updated_at
  FROM retail_chain_catalog.bronze_schema.branches
  WHERE BranchID IS NOT NULL
) AS source
ON target.BranchID = source.BranchID
WHEN MATCHED THEN
  UPDATE SET 
    target.BranchName = source.BranchName,
    target.AddressID = source.AddressID,
    target._updated_at = source._updated_at
WHEN NOT MATCHED THEN
  INSERT (BranchID, BranchName, AddressID, _updated_at)
  VALUES (source.BranchID, source.BranchName, source.AddressID, source._updated_at);