#### Why Snowpark?
The purpose of Snowpark lies in its ability to provide more flexibility, scalability, and integration for data processing and orchestration tasks.|

Orchestrating Jobs and Pipelines: We can automate Snowpark-based pipelines (UDFs, views, and other transformations) in a more flexible way. Snowpark code can be versioned and tested, making it easier to maintain and extend in your pipeline.

#### Note:
This notebook is to validate, clean and update data in tables of lesser priority in terms of updating speed.

Data tables that do not need a faster updating speed will be processed here.

#### Desired Flow
1. Grab data from raw tables
2. Check for data, validate, check and update cleaned data onto the cleaned tables
3. Join tables via foreign keys and make hybrid master table
4. Create Schema Views

#### 1. Loading Data In

Since we already push our data onto Snowflake, we can call for them in this notebook to run in Snowpark. This will be the first step to the data flow overview for establishing the CI/CD deployment & finalizing the ELT pipepline.

In [1]:
import time
from snowflake.snowpark import Session

# Define all tables organized by categories or schemas
TABLE_DICT = {
    "application": {
        "schema": "KN_LOGISTICS.SNOWSQL", 
        "tables": [
            "APPLICATION_CITIES",
            "APPLICATION_COUNTRIES_SEA",
            "APPLICATION_DELIVERYMETHODS",
            "APPLICATION_PAYMENTMETHODS",
            "APPLICATION_TRANSACTIONTYPES"
        ]
    },
    "purchasing": {
        "schema": "KN_LOGISTICS.SNOWSQL",
        "tables": [
            "PURCHASING_SUPPLIERCATEGORIES",
            "PURCHASING_SUPPLIERS"
        ]
    },
    "sales": {
        "schema": "KN_LOGISTICS.SNOWSQL",
        "tables": [
            "SALES_BUYINGGROUPS",
        ]
    },
    "warehouse": {
        "schema": "KN_LOGISTICS.SNOWSQL",
        "tables": [
            "WAREHOUSE_COLORS",
            "WAREHOUSE_PACKAGETYPES"
        ]
    }
}

def load_raw_table(session, tname=None, schema=None):
    # Adjusted for direct use (no S3 staging assumed in your case)
    session.use_schema(schema)
    print(f"Loading table: {tname}")
    # If additional logic for transformations/loading is needed, add it here
    df = session.table(tname)
    df.show()  # Example action to verify table content

def load_all_tables(session):
    for category, data in TABLE_DICT.items():
        schema = data['schema']
        tables = data['tables']
        for tname in tables:
            load_raw_table(session, tname=tname, schema=schema)

def validate_tables(session):
    for category, data in TABLE_DICT.items():
        schema = data['schema']
        tables = data['tables']
        for tname in tables:
            session.use_schema(schema)
            print(f"Validating table: {tname}")
            print(f"Columns: {session.table(tname).columns}")

In [None]:
# Add the utils package to our path and import the snowpark_utils function
import os, sys
current_dir = os.getcwd()
parent_dir = os.path.dirname(current_dir)
sys.path.append(parent_dir)

In [None]:
#from snowflake.snowpark.context import get_active_session
session = get_active_session()

In [None]:
load_all_tables(session)

In [None]:
validate_tables(session)

#### 2. Check for data, validate, check and update cleaned data onto the cleaned tables
After pulling in the raw data, we can check and validate these raw data to make sure they are of a certain format eligible to be pushed over to the cleaned tables. 

If not, we will clean the tables accordingly then update them over to the cleaned tables.

Some of the validation we can do is:
- Check for null values
- Check for duplicates in PK and UQ
- Check for invalid datetypes & text formats

After cleaning the data, we need to validate ONE MORE TIME to make sure the data has not been imported into the cleaned tables before. If validation succeeds, we update the current records over to the cleaned tables.
- Check if the data records exist in the cleaned tables (checkj if they are identitcal)

#### Cleaning APPLICATION_CITIES

In [None]:
-- Create a clean table with the same structure as the raw table
CREATE OR REPLACE TABLE KN_LOGISTICS.SNOWSQL.APPLICATION_CITIES AS
SELECT
  CAST(CASE WHEN CITYID = 'NULL' THEN NULL ELSE CITYID END AS VARCHAR(38)) AS CITYID,
  CAST(CASE WHEN CITYNAME = 'NULL' THEN NULL ELSE CITYNAME END AS VARCHAR(50)) AS CITYNAME,
  CAST(CASE WHEN COUNTRYID = 'NULL' THEN NULL ELSE COUNTRYID END AS VARCHAR(38)) AS COUNTRYID,
  CAST(CASE WHEN LATITUDE = 'NULL' THEN NULL ELSE LATITUDE END AS VARCHAR(38)) AS LATITUDE,
  CAST(CASE WHEN LONGITUDE = 'NULL' THEN NULL ELSE LONGITUDE END AS VARCHAR(38)) AS LONGITUDE,
  CAST(CASE WHEN LATESTRECORDEDPOPULATION = 'NULL' THEN NULL ELSE LATESTRECORDEDPOPULATION END AS VARCHAR(38)) AS LATESTRECORDEDPOPULATION
FROM KN_LOGISTICS.SNOWSQL.APPLICATION_CITIES_RAW;

-- Verify the clean table
SELECT *
FROM KN_LOGISTICS.SNOWSQL.APPLICATION_CITIES;

--------------------------------------------------------------
--CONVERSION OF DATATYPES 
--------------------------------------------------------------
// CITYID
ALTER TABLE KN_LOGISTICS.SNOWSQL.APPLICATION_CITIES
ADD COLUMN CITYID_NUM NUMBER(38,0);

UPDATE KN_LOGISTICS.SNOWSQL.APPLICATION_CITIES
SET CITYID_NUM = TO_NUMBER(CITYID);

SELECT CITYID, CITYID_NUM
FROM KN_LOGISTICS.SNOWSQL.APPLICATION_CITIES;

ALTER TABLE KN_LOGISTICS.SNOWSQL.APPLICATION_CITIES
DROP COLUMN CITYID;

ALTER TABLE KN_LOGISTICS.SNOWSQL.APPLICATION_CITIES
RENAME COLUMN CITYID_NUM TO CITYID;
// Check 10 rows of SUPPLIERID after updating the data type
SELECT CITYID FROM KN_LOGISTICS.SNOWSQL.APPLICATION_CITIES LIMIT 10;

// Check 10 rows of the whole table after updating the data type
SELECT * FROM KN_LOGISTICS.SNOWSQL.APPLICATION_CITIES;

// COUNTRYID
ALTER TABLE KN_LOGISTICS.SNOWSQL.APPLICATION_CITIES
ADD COLUMN COUNTRYID_NUM NUMBER(38,0);

UPDATE KN_LOGISTICS.SNOWSQL.APPLICATION_CITIES
SET COUNTRYID_NUM = TO_NUMBER(COUNTRYID);

SELECT COUNTRYID, COUNTRYID_NUM
FROM KN_LOGISTICS.SNOWSQL.APPLICATION_CITIES;

ALTER TABLE KN_LOGISTICS.SNOWSQL.APPLICATION_CITIES
DROP COLUMN COUNTRYID;

ALTER TABLE KN_LOGISTICS.SNOWSQL.APPLICATION_CITIES
RENAME COLUMN COUNTRYID_NUM TO COUNTRYID;
// Check 10 rows of SUPPLIERID after updating the data type
SELECT COUNTRYID FROM KN_LOGISTICS.SNOWSQL.APPLICATION_CITIES LIMIT 10;

// LATITUDE
ALTER TABLE KN_LOGISTICS.SNOWSQL.APPLICATION_CITIES
ADD COLUMN LATITUDE_NUM NUMBER(38,0);

UPDATE KN_LOGISTICS.SNOWSQL.APPLICATION_CITIES
SET LATITUDE_NUM = TO_NUMBER(LATITUDE);

SELECT LATITUDE, LATITUDE_NUM
FROM KN_LOGISTICS.SNOWSQL.APPLICATION_CITIES;

ALTER TABLE KN_LOGISTICS.SNOWSQL.APPLICATION_CITIES
DROP COLUMN LATITUDE;

ALTER TABLE KN_LOGISTICS.SNOWSQL.APPLICATION_CITIES
RENAME COLUMN LATITUDE_NUM TO LATITUDE;
// Check 10 rows of SUPPLIERID after updating the data type
SELECT LATITUDE FROM KN_LOGISTICS.SNOWSQL.APPLICATION_CITIES LIMIT 10;

// LONGITUDE
ALTER TABLE KN_LOGISTICS.SNOWSQL.APPLICATION_CITIES
ADD COLUMN LONGITUDE_NUM NUMBER(38,0);

UPDATE KN_LOGISTICS.SNOWSQL.APPLICATION_CITIES
SET LONGITUDE_NUM = TO_NUMBER(LONGITUDE);

SELECT LONGITUDE, LONGITUDE_NUM
FROM KN_LOGISTICS.SNOWSQL.APPLICATION_CITIES;

ALTER TABLE KN_LOGISTICS.SNOWSQL.APPLICATION_CITIES
DROP COLUMN LONGITUDE;

ALTER TABLE KN_LOGISTICS.SNOWSQL.APPLICATION_CITIES
RENAME COLUMN LONGITUDE_NUM TO LONGITUDE;
// Check 10 rows of SUPPLIERID after updating the data type
SELECT LONGITUDE FROM KN_LOGISTICS.SNOWSQL.APPLICATION_CITIES LIMIT 10;

// LATESTRECORDEDPOPULATION
ALTER TABLE KN_LOGISTICS.SNOWSQL.APPLICATION_CITIES
ADD COLUMN LATESTRECORDEDPOPULATION_NUM NUMBER(38,0);

UPDATE KN_LOGISTICS.SNOWSQL.APPLICATION_CITIES
SET LATESTRECORDEDPOPULATION_NUM = TO_NUMBER(LATESTRECORDEDPOPULATION);

SELECT LATESTRECORDEDPOPULATION, LATESTRECORDEDPOPULATION_NUM
FROM KN_LOGISTICS.SNOWSQL.APPLICATION_CITIES;

ALTER TABLE KN_LOGISTICS.SNOWSQL.APPLICATION_CITIES
DROP COLUMN LATESTRECORDEDPOPULATION;

ALTER TABLE KN_LOGISTICS.SNOWSQL.APPLICATION_CITIES
RENAME COLUMN LATESTRECORDEDPOPULATION_NUM TO LATESTRECORDEDPOPULATION;
// Check 10 rows of SUPPLIERID after updating the data type
SELECT LATESTRECORDEDPOPULATION FROM KN_LOGISTICS.SNOWSQL.APPLICATION_CITIES LIMIT 10;

--------------------------------------------------------------
--ADDING OF PRIMARY KEYS TO TABLE
--------------------------------------------------------------
ALTER TABLE KN_LOGISTICS.SNOWSQL.APPLICATION_CITIES
ADD CONSTRAINT PK_APPLICATION_CITIES_CITYID
PRIMARY KEY (CITYID);

--------------------------------------------------------------
--ERROR HANDLING
--------------------------------------------------------------
WITH CTE AS (
    SELECT 
        CITYID, 
        CITYNAME, 
        COUNTRYID,
        LATITUDE,
        LONGITUDE,
        LATESTRECORDEDPOPULATION,
        LAG(CITYID) OVER (ORDER BY CITYID) AS prev_cityid,
        ROW_NUMBER() OVER (ORDER BY CITYID) AS row_num
    FROM KN_LOGISTICS.SNOWSQL.APPLICATION_CITIES
)
SELECT 
        CITYNAME, 
        COUNTRYID,
        LATITUDE,
        LONGITUDE,
        LATESTRECORDEDPOPULATION,
    CASE
        WHEN CITYID IS NULL THEN prev_cityid + 1
        ELSE CITYID
    END AS CITYID
FROM CTE
ORDER BY row_num;


-------------------------------------------------------
--Adding of foreign key to table
-------------------------------------------------------

-- Foreign Key: APPLICATION_CITIES.COUNTRYID -> APPLICATION_COUNTRIES_SEA.COUNTRYID
ALTER TABLE KN_LOGISTICS.SNOWSQL.APPLICATION_CITIES
ADD CONSTRAINT FK_APPLICATION_CITIES_COUNTRYID_APPLICATION_COUNTRIES_SEA
FOREIGN KEY (COUNTRYID)
REFERENCES KN_LOGISTICS.SNOWSQL.APPLICATION_COUNTRIES_SEA(COUNTRYID);

SELECT * FROM APPLICATION_CITIES LIMIT 20


#### Cleaning APPLICATION_COUNTRIES-SEA

In [None]:
-- Create a clean table with the same structure as the raw table
CREATE OR REPLACE TABLE KN_LOGISTICS.SNOWSQL.APPLICATION_COUNTRIES_SEA AS
SELECT
  CAST(CASE WHEN COUNTRYNAME = 'NULL' THEN NULL ELSE COUNTRYNAME END AS VARCHAR(60)) AS COUNTRYNAME,
  CAST(CASE WHEN FORMALNAME = 'NULL' THEN NULL ELSE FORMALNAME END AS VARCHAR(60)) AS FORMALNAME,
  CAST(CASE WHEN CONTINENT = 'NULL' THEN NULL ELSE CONTINENT END AS VARCHAR(30)) AS CONTINENT,
  CAST(CASE WHEN REGION = 'NULL' THEN NULL ELSE REGION END AS VARCHAR(30)) AS REGION,
  CAST(CASE WHEN SUBREGION = 'NULL' THEN NULL ELSE SUBREGION END AS VARCHAR(30)) AS SUBREGION,
  CAST(CASE WHEN COUNTRYID = 'NULL' THEN NULL ELSE COUNTRYID END AS VARCHAR(38)) AS COUNTRYID,
  CAST(CASE WHEN LATESTRECORDEDPOPULATION = 'NULL' THEN NULL ELSE LATESTRECORDEDPOPULATION END AS VARCHAR(38)) AS LATESTRECORDEDPOPULATION
FROM KN_LOGISTICS.SNOWSQL.APPLICATION_COUNTRIES_SEA_RAW;

-- Verify the clean table
SELECT *
FROM KN_LOGISTICS.SNOWSQL.APPLICATION_COUNTRIES_SEA;

--------------------------------------------------------------
--CONVERSION OF DATATYPES 
--------------------------------------------------------------
//COUNTRYID
ALTER TABLE KN_LOGISTICS.SNOWSQL.APPLICATION_COUNTRIES_SEA
ADD COLUMN COUNTRYID_NUM NUMBER(38,0);

UPDATE KN_LOGISTICS.SNOWSQL.APPLICATION_COUNTRIES_SEA
SET COUNTRYID_NUM = TO_NUMBER(COUNTRYID);

SELECT COUNTRYID, COUNTRYID_NUM
FROM KN_LOGISTICS.SNOWSQL.APPLICATION_COUNTRIES_SEA;

ALTER TABLE KN_LOGISTICS.SNOWSQL.APPLICATION_COUNTRIES_SEA
DROP COLUMN COUNTRYID;

ALTER TABLE KN_LOGISTICS.SNOWSQL.APPLICATION_COUNTRIES_SEA
RENAME COLUMN COUNTRYID_NUM TO COUNTRYID;
// Check 10 rows of SUPPLIERID after updating the data type
SELECT COUNTRYID FROM KN_LOGISTICS.SNOWSQL.APPLICATION_COUNTRIES_SEA LIMIT 10;

// Check 10 rows of the whole table after updating the data type
SELECT * FROM KN_LOGISTICS.SNOWSQL.APPLICATION_COUNTRIES_SEA;

// LATESTRECORDEDPOPULATION
ALTER TABLE KN_LOGISTICS.SNOWSQL.APPLICATION_COUNTRIES_SEA
ADD COLUMN LATESTRECORDEDPOPULATION_NUM NUMBER(38,0);

UPDATE KN_LOGISTICS.SNOWSQL.APPLICATION_COUNTRIES_SEA
SET LATESTRECORDEDPOPULATION_NUM = TO_NUMBER(LATESTRECORDEDPOPULATION);

SELECT LATESTRECORDEDPOPULATION, LATESTRECORDEDPOPULATION_NUM
FROM KN_LOGISTICS.SNOWSQL.APPLICATION_COUNTRIES_SEA;

ALTER TABLE KN_LOGISTICS.SNOWSQL.APPLICATION_COUNTRIES_SEA
DROP COLUMN LATESTRECORDEDPOPULATION;

ALTER TABLE KN_LOGISTICS.SNOWSQL.APPLICATION_COUNTRIES_SEA
RENAME COLUMN LATESTRECORDEDPOPULATION_NUM TO LATESTRECORDEDPOPULATION;
// Check 10 rows of SUPPLIERID after updating the data type
SELECT LATESTRECORDEDPOPULATION FROM KN_LOGISTICS.SNOWSQL.APPLICATION_COUNTRIES_SEA LIMIT 10;


--------------------------------------------------------------
--ADDING OF PRIMARY KEYS TO TABLE
--------------------------------------------------------------

ALTER TABLE KN_LOGISTICS.SNOWSQL.APPLICATION_COUNTRIES_SEA
ADD CONSTRAINT PK_APPLICATION_COUNTRIES_SEA_COUNTRYID
PRIMARY KEY (COUNTRYID);

--------------------------------------------------------------
--Adding of unique key to table
--------------------------------------------------------------
ALTER TABLE KN_LOGISTICS.SNOWSQL.APPLICATION_COUNTRIES_SEA
ADD CONSTRAINT UK_APPLICATION_COUNTRIES_SEA_COUNTRYNAME
UNIQUE (COUNTRYNAME);

ALTER TABLE KN_LOGISTICS.SNOWSQL.APPLICATION_COUNTRIES_SEA
ADD CONSTRAINT UK_APPLICATION_COUNTRIES_SEA_FORMALNAME
UNIQUE (FORMALNAME);

-------------------------------------------------------
--ERROR HANDLING
-------------------------------------------------------
WITH CTE AS (
    SELECT 
        COUNTRYNAME,
        FORMALNAME,
        CONTINENT,
        REGION,
        SUBREGION,
        COUNTRYID,
        LATESTRECORDEDPOPULATION,
        LAG(COUNTRYID) OVER (ORDER BY COUNTRYID) AS prev_countryid,
        ROW_NUMBER() OVER (ORDER BY COUNTRYID) AS row_num
    FROM KN_LOGISTICS.SNOWSQL.APPLICATION_COUNTRIES_SEA
)
SELECT 
    COUNTRYNAME,
    FORMALNAME,
    CONTINENT,
    REGION,
    SUBREGION,
    CASE
        WHEN COUNTRYID IS NULL THEN prev_countryid + 1
        ELSE COUNTRYID
    END AS COUNTRYID,
    LATESTRECORDEDPOPULATION
FROM CTE
ORDER BY row_num;

-------------------------------------------------------
--Adding of foreign key to table
-------------------------------------------------------
--THERE ARE NO FOREIGN KEYS FOR THIS TABLE


#### Cleaning APPLICATION_DELIVERYMETHODS

In [None]:
-- Create a clean table with the same structure as the raw table but we pull data from the raw table into this table (final table)
CREATE OR REPLACE TABLE KN_LOGISTICS.SNOWSQL.APPLICATION_DELIVERYMETHODS AS
SELECT
  CAST(CASE WHEN DELIVERYMETHODID = 'NULL' THEN NULL ELSE DELIVERYMETHODID END AS VARCHAR(38)) AS DELIVERYMETHODID,
  CAST(CASE WHEN DELIVERYMETHODNAME = 'NULL' THEN NULL ELSE DELIVERYMETHODNAME END AS VARCHAR(50)) AS DELIVERYMETHODNAME
FROM KN_LOGISTICS.SNOWSQL.APPLICATION_DELIVERYMETHODS_RAW;

-- ADD A NEW DELIVERYMETHOD for those without a delivery method
INSERT INTO KN_LOGISTICS.SNOWSQL.APPLICATION_DELIVERYMETHODS (DELIVERYMETHODID, DELIVERYMETHODNAME)
VALUES
    (11, 'None');

-- Total Number of Null values in string 'NULL' for all columns
SELECT COUNT(*) FROM KN_LOGISTICS.SNOWSQL.APPLICATION_DELIVERYMETHODS
WHERE DELIVERYMETHODID IS NULL
OR DELIVERYMETHODNAME IS NULL;
    
-- Null value checks for all columns (only numeric data type columns)
SELECT 
    COUNT(CASE WHEN DELIVERYMETHODID IS NULL THEN 1 END) AS count_DELIVERYMETHODID_NULL,
    COUNT(CASE WHEN DELIVERYMETHODNAME IS NULL THEN 1 END) AS count_DELIVERYMETHODNAME_NULL
FROM KN_LOGISTICS.SNOWSQL.APPLICATION_DELIVERYMETHODS;


--DELIVERYMETHODID
ALTER TABLE KN_LOGISTICS.SNOWSQL.APPLICATION_DELIVERYMETHODS
ADD COLUMN DELIVERYMETHODID_NUM NUMBER(38,0);

UPDATE KN_LOGISTICS.SNOWSQL.APPLICATION_DELIVERYMETHODS
SET DELIVERYMETHODID_NUM = TO_NUMBER(DELIVERYMETHODID);

SELECT DELIVERYMETHODID, DELIVERYMETHODID_NUM
FROM KN_LOGISTICS.SNOWSQL.APPLICATION_DELIVERYMETHODS;

ALTER TABLE KN_LOGISTICS.SNOWSQL.APPLICATION_DELIVERYMETHODS
DROP COLUMN DELIVERYMETHODID;

ALTER TABLE KN_LOGISTICS.SNOWSQL.APPLICATION_DELIVERYMETHODS
RENAME COLUMN DELIVERYMETHODID_NUM TO DELIVERYMETHODID;

--check the number of rows of DELIVERYMETHODID = 1(random DELIVERYMETHODID from data), cross checked with excel sheet
SELECT COUNT(*) AS row_count
FROM KN_LOGISTICS.SNOWSQL.APPLICATION_DELIVERYMETHODS
WHERE DELIVERYMETHODID = 1;


-- primary key
ALTER TABLE KN_LOGISTICS.SNOWSQL.APPLICATION_DELIVERYMETHODS
ADD CONSTRAINT PK_APPLICATION_DELIVERYMETHODS_DELIVERYMETHODID
PRIMARY KEY (DELIVERYMETHODID);

-- unique key
ALTER TABLE KN_LOGISTICS.SNOWSQL.APPLICATION_DELIVERYMETHODS
ADD CONSTRAINT UK_APPLICATION_DELIVERYMETHODS_DELIVERYMETHODNAME
UNIQUE (DELIVERYMETHODNAME);

-- null error handling for primary key (increment 1 to impute null value primary keys from the previous record)
WITH CTE AS (
    SELECT 
        DELIVERYMETHODID,
        DELIVERYMETHODNAME,
        LAG(DELIVERYMETHODID) OVER (ORDER BY DELIVERYMETHODID) AS prev_DELIVERYMETHODID,
        ROW_NUMBER() OVER (ORDER BY DELIVERYMETHODID) AS row_num
    FROM KN_LOGISTICS.SNOWSQL.APPLICATION_DELIVERYMETHODS
)
SELECT 
        DELIVERYMETHODNAME,
    CASE
        WHEN DELIVERYMETHODID IS NULL THEN prev_DELIVERYMETHODID + 1
        ELSE DELIVERYMETHODID
    END AS DELIVERYMETHODID
FROM CTE
ORDER BY row_num;


-- foreign keys
ALTER TABLE KN_LOGISTICS.SNOWSQL.SALES_CUSTOMERS
ADD CONSTRAINT FK_Sales_Customers_DeliveryMethodID_Application_DeliveryMethods
FOREIGN KEY (DeliveryMethodID)
REFERENCES KN_LOGISTICS.SNOWSQL.APPLICATION_DELIVERYMETHODS(DeliveryMethodID);

ALTER TABLE KN_LOGISTICS.SNOWSQL.Purchasing_Suppliers
ADD CONSTRAINT FK_Purchasing_Suppliers_DeliveryMethodID_Application_DeliveryMethods
FOREIGN KEY (DELIVERYMETHODID)
REFERENCES KN_LOGISTICS.SNOWSQL.APPLICATION_DELIVERYMETHODS(DeliveryMethodID);

ALTER TABLE KN_LOGISTICS.SNOWSQL.Sales_Invoices
ADD CONSTRAINT FK_Sales_Invoices_DeliveryMethodID_Application_DeliveryMethods
FOREIGN KEY (DELIVERYMETHODID)
REFERENCES KN_LOGISTICS.SNOWSQL.APPLICATION_DELIVERYMETHODS(DeliveryMethodID);

ALTER TABLE KN_LOGISTICS.SNOWSQL.Purchasing_PurchaseOrders
ADD CONSTRAINT FK_Purchasing_PurchaseOrders_DeliveryMethodID_Application_DeliveryMethods
FOREIGN KEY (DELIVERYMETHODID)
REFERENCES KN_LOGISTICS.SNOWSQL.APPLICATION_DELIVERYMETHODS(DeliveryMethodID);

#### Cleaning APPLICATION_PAYMENTMETHODS

In [None]:
-- Create a clean table with the same structure as the raw table
CREATE OR REPLACE TABLE KN_LOGISTICS.SNOWSQL.APPLICATION_PAYMENTMETHODS AS
SELECT
  CAST(CASE WHEN PAYMENTMETHODID = 'NULL' THEN NULL ELSE PAYMENTMETHODID END AS VARCHAR(38)) AS PAYMENTMETHODID,
  CAST(CASE WHEN PAYMENTMETHODNAME = 'NULL' THEN NULL ELSE PAYMENTMETHODNAME END AS VARCHAR(50)) AS PAYMENTMETHODNAME
FROM KN_LOGISTICS.SNOWSQL.APPLICATION_PAYMENTMETHODS_RAW;

-- Verify the clean table
SELECT *
FROM KN_LOGISTICS.SNOWSQL.APPLICATION_PAYMENTMETHODS;


---------------------------------------------
-- Data type conversion (to number)
---------------------------------------------
ALTER TABLE KN_LOGISTICS.SNOWSQL.APPLICATION_PAYMENTMETHODS
ADD COLUMN PAYMENTMETHODID_NUM NUMBER(38,0);

UPDATE KN_LOGISTICS.SNOWSQL.APPLICATION_PAYMENTMETHODS
SET PAYMENTMETHODID_NUM = TO_NUMBER(PAYMENTMETHODID);

SELECT PAYMENTMETHODID, PAYMENTMETHODID_NUM
FROM KN_LOGISTICS.SNOWSQL.APPLICATION_PAYMENTMETHODS;

ALTER TABLE KN_LOGISTICS.SNOWSQL.APPLICATION_PAYMENTMETHODS
DROP COLUMN PAYMENTMETHODID;

ALTER TABLE KN_LOGISTICS.SNOWSQL.APPLICATION_PAYMENTMETHODS
RENAME COLUMN PAYMENTMETHODID_NUM TO PAYMENTMETHODID;


---------------------------------------------
---Adding of primary key to table
---------------------------------------------
ALTER TABLE KN_LOGISTICS.SNOWSQL.APPLICATION_PAYMENTMETHODS
ADD CONSTRAINT PK_APPLICATION_PAYMENTMETHODS_PAYMENTMETHODID
PRIMARY KEY (PAYMENTMETHODID);


----------------------------------------------
--Adding of unique key to table
----------------------------------------------
ALTER TABLE KN_LOGISTICS.SNOWSQL.APPLICATION_PAYMENTMETHODS
ADD CONSTRAINT UK_APPLICATION_PAYMENTMETHODS_PAYMENTMETHODNAME
UNIQUE (PAYMENTMETHODNAME);

-------------------------------------------------------
--ERROR HANDLING 
-------------------------------------------------------
WITH CTE AS (
    SELECT 
        PAYMENTMETHODID,
        PAYMENTMETHODNAME,
        LAG(PAYMENTMETHODID) OVER (ORDER BY PAYMENTMETHODID) AS prev_paymentmethodid,
        ROW_NUMBER() OVER (ORDER BY PAYMENTMETHODID) AS row_num
    FROM KN_LOGISTICS.SNOWSQL.APPLICATION_PAYMENTMETHODS
)
SELECT 
    PAYMENTMETHODNAME,
    CASE
        WHEN PAYMENTMETHODID IS NULL THEN prev_paymentmethodid + 1
        ELSE PAYMENTMETHODID
    END AS PAYMENTMETHODID
FROM CTE
ORDER BY row_num;


-------------------------------------------------------
--Adding of foreign key to table
-------------------------------------------------------
--THERE ARE NO FOREIGN KEYS IN THIS TABLE




#### Cleaning APPLICATION_TRANSACTIONTYPES

In [None]:
-- Create a clean table with the same structure as the raw table
CREATE OR REPLACE TABLE KN_LOGISTICS.SNOWSQL.APPLICATION_TRANSACTIONTYPES AS
SELECT
  CAST(CASE WHEN TRANSACTIONTYPEID = 'NULL' THEN NULL ELSE TRANSACTIONTYPEID END AS VARCHAR(38)) AS TRANSACTIONTYPEID,
  CAST(CASE WHEN TRANSACTIONTYPENAME = 'NULL' THEN NULL ELSE TRANSACTIONTYPENAME END AS VARCHAR(50)) AS TRANSACTIONTYPENAME
FROM KN_LOGISTICS.SNOWSQL.APPLICATION_TRANSACTIONTYPES_RAW;

-- Verify the clean table
SELECT *
FROM KN_LOGISTICS.SNOWSQL.APPLICATION_TRANSACTIONTYPES;

---------------------------------------------
// Data type conversion (to number): TRANSACTIONTYPEID
---------------------------------------------
// TRANSACTIONTYPEID
ALTER TABLE KN_LOGISTICS.SNOWSQL.APPLICATION_TRANSACTIONTYPES
ADD COLUMN TRANSACTIONTYPEID_NUM NUMBER(38,0);

UPDATE KN_LOGISTICS.SNOWSQL.APPLICATION_TRANSACTIONTYPES
SET TRANSACTIONTYPEID_NUM = TO_NUMBER(TRANSACTIONTYPEID);

SELECT TRANSACTIONTYPEID, TRANSACTIONTYPEID_NUM
FROM KN_LOGISTICS.SNOWSQL.APPLICATION_TRANSACTIONTYPES;

ALTER TABLE KN_LOGISTICS.SNOWSQL.APPLICATION_TRANSACTIONTYPES
DROP COLUMN TRANSACTIONTYPEID;

ALTER TABLE KN_LOGISTICS.SNOWSQL.APPLICATION_TRANSACTIONTYPES
RENAME COLUMN TRANSACTIONTYPEID_NUM TO TRANSACTIONTYPEID;
// Check 10 rows of SUPPLIERID after updating the data type
SELECT TRANSACTIONTYPEID FROM KN_LOGISTICS.SNOWSQL.APPLICATION_TRANSACTIONTYPES LIMIT 10;

// Check 10 rows of the whole table after updating the data type
SELECT * FROM KN_LOGISTICS.SNOWSQL.APPLICATION_TRANSACTIONTYPES;

---------------------------------------------------------------
--Adding of primary key to table
---------------------------------------------------------------
ALTER TABLE KN_LOGISTICS.SNOWSQL.APPLICATION_TRANSACTIONTYPES
ADD CONSTRAINT PK_APPLICATION_TRANSACTIONTYPES_TRANSACTIONTYPEID
PRIMARY KEY (TRANSACTIONTYPEID);

----------------------------------------------------------------
--Adding of unique key to table
----------------------------------------------------------------
ALTER TABLE KN_LOGISTICS.SNOWSQL.APPLICATION_TRANSACTIONTYPES
ADD CONSTRAINT UK_APPLICATION_TRANSACTIONTYPES_TRANSACTIONTYPENAME
UNIQUE (TRANSACTIONTYPENAME);

-------------------------------------------------------
-- ERROR HANDLING
-------------------------------------------------------
WITH CTE AS (
    SELECT 
        TRANSACTIONTYPEID,
        TRANSACTIONTYPENAME,
        LAG(TRANSACTIONTYPEID) OVER (ORDER BY TRANSACTIONTYPEID) AS prev_transactiontypeid,
        ROW_NUMBER() OVER (ORDER BY TRANSACTIONTYPEID) AS row_num
    FROM KN_LOGISTICS.SNOWSQL.APPLICATION_TRANSACTIONTYPES
)
SELECT 
    TRANSACTIONTYPENAME,
    CASE
        WHEN TRANSACTIONTYPEID IS NULL THEN prev_transactiontypeid + 1
        ELSE TRANSACTIONTYPEID
    END AS TRANSACTIONTYPEID
FROM CTE
ORDER BY row_num;



-------------------------------------------------------
--Adding of foreign key to table
-------------------------------------------------------
-- THERE IS NO FOREIGN KEY IN THIS TABLE


#### Cleaning PURCHASING_SUPPLIERCATEGORIES

In [None]:
-- Create a clean table with the same structure as the raw table
CREATE OR REPLACE TABLE KN_LOGISTICS.SNOWSQL.PURCHASING_SUPPLIERCATEGORIES AS
SELECT
  CAST(CASE WHEN SUPPLIERCATEGORYID = 'NULL' THEN NULL ELSE SUPPLIERCATEGORYID END AS VARCHAR(38)) AS SUPPLIERCATEGORYID,
  CAST(CASE WHEN SUPPLIERCATEGORYNAME = 'NULL' THEN NULL ELSE SUPPLIERCATEGORYNAME END AS VARCHAR(50)) AS SUPPLIERCATEGORYNAME
FROM KN_LOGISTICS.SNOWSQL.PURCHASING_SUPPLIERCATEGORIES_RAW;

-- Total Number of Null values for all columns
SELECT COUNT(*) FROM KN_LOGISTICS.SNOWSQL.PURCHASING_SUPPLIERCATEGORIES
WHERE SUPPLIERCATEGORYID IS NULL
OR SUPPLIERCATEGORYNAME IS NULL;
    
-- Null value checks for all columns (only numeric/boolean data type columns)
SELECT 
    COUNT(CASE WHEN SUPPLIERCATEGORYID IS NULL THEN 1 END) AS count_SUPPLIERCATEGORYID_NULL,
    COUNT(CASE WHEN SUPPLIERCATEGORYNAME IS NULL THEN 1 END) AS count_SUPPLIERCATEGORYNAME_NULL
FROM KN_LOGISTICS.SNOWSQL.PURCHASING_SUPPLIERCATEGORIES;


-- data type conversion (to number) SUPPLIERCATEGORYID 

ALTER TABLE KN_LOGISTICS.SNOWSQL.PURCHASING_SUPPLIERCATEGORIES
ADD COLUMN SUPPLIERCATEGORYID_NUM NUMBER(38,0);

UPDATE KN_LOGISTICS.SNOWSQL.PURCHASING_SUPPLIERCATEGORIES
SET SUPPLIERCATEGORYID_NUM = TO_NUMBER(SUPPLIERCATEGORYID);

SELECT SUPPLIERCATEGORYID, SUPPLIERCATEGORYID_NUM
FROM KN_LOGISTICS.SNOWSQL.PURCHASING_SUPPLIERCATEGORIES;

ALTER TABLE KN_LOGISTICS.SNOWSQL.PURCHASING_SUPPLIERCATEGORIES
DROP COLUMN SUPPLIERCATEGORYID;

ALTER TABLE KN_LOGISTICS.SNOWSQL.PURCHASING_SUPPLIERCATEGORIES
RENAME COLUMN SUPPLIERCATEGORYID_NUM TO SUPPLIERCATEGORYID;


-- primary key
ALTER TABLE KN_LOGISTICS.SNOWSQL.PURCHASING_SUPPLIERCATEGORIES
ADD CONSTRAINT PK_PURCHASING_SUPPLIERCATEGORIES_SUPPLIERCATEGORYID
PRIMARY KEY (SUPPLIERCATEGORYID);

-- unique key
ALTER TABLE KN_LOGISTICS.SNOWSQL.PURCHASING_SUPPLIERCATEGORIES
ADD CONSTRAINT UK_PURCHASING_SUPPLIERCATEGORIES_SUPPLIERCATEGORYNAME
UNIQUE (SUPPLIERCATEGORYNAME);


-- null error handling for primary key (increment 1 to impute null value primary keys from the previous record)
WITH CTE AS (
    SELECT
        SUPPLIERCATEGORYID,
        SUPPLIERCATEGORYNAME,
        LAG(SUPPLIERCATEGORYID) OVER (ORDER BY SUPPLIERCATEGORYID) AS prev_SUPPLIERCATEGORYID,
        ROW_NUMBER() OVER (ORDER BY SUPPLIERCATEGORYID) AS row_num
    FROM KN_LOGISTICS.SNOWSQL.PURCHASING_SUPPLIERCATEGORIES
)
SELECT 
    SUPPLIERCATEGORYNAME,
    CASE
        WHEN SUPPLIERCATEGORYID IS NULL THEN prev_SUPPLIERCATEGORYID + 1
        ELSE SUPPLIERCATEGORYID
    END AS SUPPLIERCATEGORYID
FROM CTE
ORDER BY row_num;

-- foreign key
ALTER TABLE KN_LOGISTICS.SNOWSQL.PURCHASING_Suppliers
ADD CONSTRAINT FK_Purchasing_Suppliers_SupplierCategoryID_Purchasing_SupplierCategories
FOREIGN KEY (SupplierCategoryID)
REFERENCES KN_LOGISTICS.SNOWSQL.PURCHASING_SUPPLIERCATEGORIES(SupplierCategoryID);

#### Cleaning PURCHASING_SUPPLIERS

In [None]:
-- Create a clean table with the same structure as the raw table
CREATE OR REPLACE TABLE KN_LOGISTICS.SNOWSQL.PURCHASING_SUPPLIERS AS
SELECT
  CAST(CASE WHEN SUPPLIERID = 'NULL' THEN NULL ELSE SUPPLIERID END AS VARCHAR(38)) AS SUPPLIERID,
  CAST(CASE WHEN SUPPLIERNAME = 'NULL' THEN NULL ELSE SUPPLIERNAME END AS VARCHAR(100)) AS SUPPLIERNAME,
  CAST(CASE WHEN SUPPLIERCATEGORYID = 'NULL' THEN NULL ELSE SUPPLIERCATEGORYID END AS VARCHAR(38)) AS SUPPLIERCATEGORYID,
  CAST(CASE WHEN PRIMARYCONTACTPERSONID = 'NULL' THEN NULL ELSE PRIMARYCONTACTPERSONID END AS VARCHAR(38)) AS PRIMARYCONTACTPERSONID,
  CAST(CASE WHEN ALTERNATECONTACTPERSONID = 'NULL' THEN NULL ELSE ALTERNATECONTACTPERSONID END AS VARCHAR(38)) AS ALTERNATECONTACTPERSONID,
  CAST(CASE WHEN DELIVERYMETHODID = 'NULL' THEN NULL ELSE DELIVERYMETHODID END AS VARCHAR(38)) AS DELIVERYMETHODID,
  CAST(CASE WHEN DELIVERYCITYID = 'NULL' THEN NULL ELSE DELIVERYCITYID END AS VARCHAR(38)) AS DELIVERYCITYID,
  CAST(CASE WHEN POSTALCITYID = 'NULL' THEN NULL ELSE POSTALCITYID END AS VARCHAR(38)) AS POSTALCITYID,
  CAST(CASE WHEN SUPPLIERREFERENCE = 'NULL' THEN NULL ELSE SUPPLIERREFERENCE END AS VARCHAR(20)) AS SUPPLIERREFERENCE,
  CAST(CASE WHEN PAYMENTDAYS = 'NULL' THEN NULL ELSE PAYMENTDAYS END AS VARCHAR(38)) AS PAYMENTDAYS,
  CAST(CASE WHEN PHONENUMBER = 'NULL' THEN NULL ELSE PHONENUMBER END AS VARCHAR(20)) AS PHONENUMBER,
  CAST(CASE WHEN WEBSITEURL = 'NULL' THEN NULL ELSE WEBSITEURL END AS VARCHAR(256)) AS WEBSITEURL,
  CAST(CASE WHEN DELIVERYADDRESSLINE = 'NULL' THEN NULL ELSE DELIVERYADDRESSLINE END AS VARCHAR(60)) AS DELIVERYADDRESSLINE,
  CAST(CASE WHEN DELIVERYLOCATIONLAT = 'NULL' THEN NULL ELSE DELIVERYLOCATIONLAT END AS VARCHAR(60)) AS DELIVERYLOCATIONLAT,
  CAST(CASE WHEN DELIVERYLOCATIONLONG = 'NULL' THEN NULL ELSE DELIVERYLOCATIONLONG END AS VARCHAR(60)) AS DELIVERYLOCATIONLONG
FROM KN_LOGISTICS.SNOWSQL.PURCHASING_SUPPLIERS_RAW;

-- DROPPING 5 COUMNS - DELIVERYCITYID, POSTALCITYID, DELIVERYADDRESSLINE, DELIVERYLOCATIONLAT
ALTER TABLE KN_LOGISTICS.SNOWSQL.PURCHASING_SUPPLIERS
DROP COLUMN DELIVERYCITYID, POSTALCITYID, DELIVERYADDRESSLINE, DELIVERYLOCATIONLAT, DELIVERYLOCATIONLONG;

---------------------------------------------
-- data type conversion (to number)
---------------------------------------------
ALTER TABLE KN_LOGISTICS.SNOWSQL.PURCHASING_SUPPLIERS
ADD COLUMN SUPPLIERID_NUM NUMBER(38,0);

UPDATE KN_LOGISTICS.SNOWSQL.PURCHASING_SUPPLIERS
SET SUPPLIERID_NUM = TO_NUMBER(SUPPLIERID);

SELECT SUPPLIERID, SUPPLIERID_NUM
FROM KN_LOGISTICS.SNOWSQL.PURCHASING_SUPPLIERS;

ALTER TABLE KN_LOGISTICS.SNOWSQL.PURCHASING_SUPPLIERS
DROP COLUMN SUPPLIERID;

ALTER TABLE KN_LOGISTICS.SNOWSQL.PURCHASING_SUPPLIERS
RENAME COLUMN SUPPLIERID_NUM TO SUPPLIERID;


---------------------------------------------
-- data type conversion (to number)
---------------------------------------------
ALTER TABLE KN_LOGISTICS.SNOWSQL.PURCHASING_SUPPLIERS
ADD COLUMN DELIVERYMETHODID_NUM NUMBER(38,0);

UPDATE KN_LOGISTICS.SNOWSQL.PURCHASING_SUPPLIERS
SET DELIVERYMETHODID_NUM = TO_NUMBER(DELIVERYMETHODID);

SELECT DELIVERYMETHODID, DELIVERYMETHODID_NUM
FROM KN_LOGISTICS.SNOWSQL.PURCHASING_SUPPLIERS;

ALTER TABLE KN_LOGISTICS.SNOWSQL.PURCHASING_SUPPLIERS
DROP COLUMN DELIVERYMETHODID;

ALTER TABLE KN_LOGISTICS.SNOWSQL.PURCHASING_SUPPLIERS
RENAME COLUMN DELIVERYMETHODID_NUM TO DELIVERYMETHODID;

--------------------------------------------------
-- data type conversion (to number) SUPPLIERCATEGORYID 
--------------------------------------------------

ALTER TABLE KN_LOGISTICS.SNOWSQL.PURCHASING_SUPPLIERS
ADD COLUMN SUPPLIERCATEGORYID_NUM NUMBER(38,0);

UPDATE KN_LOGISTICS.SNOWSQL.PURCHASING_SUPPLIERS
SET SUPPLIERCATEGORYID_NUM = TO_NUMBER(SUPPLIERCATEGORYID);

SELECT SUPPLIERCATEGORYID, SUPPLIERCATEGORYID_NUM
FROM KN_LOGISTICS.SNOWSQL.PURCHASING_SUPPLIERS;

ALTER TABLE KN_LOGISTICS.SNOWSQL.PURCHASING_SUPPLIERS
DROP COLUMN SUPPLIERCATEGORYID;

ALTER TABLE KN_LOGISTICS.SNOWSQL.PURCHASING_SUPPLIERS
RENAME COLUMN SUPPLIERCATEGORYID_NUM TO SUPPLIERCATEGORYID;

--------------------------------------------------
-- data type conversion (to number) PAYMENTDAYS 
--------------------------------------------------

ALTER TABLE KN_LOGISTICS.SNOWSQL.PURCHASING_SUPPLIERS
ADD COLUMN PAYMENTDAYS_NUM NUMBER(38,0);

UPDATE KN_LOGISTICS.SNOWSQL.PURCHASING_SUPPLIERS
SET PAYMENTDAYS_NUM = TO_NUMBER(PAYMENTDAYS);

SELECT PAYMENTDAYS, PAYMENTDAYS_NUM
FROM KN_LOGISTICS.SNOWSQL.PURCHASING_SUPPLIERS;

ALTER TABLE KN_LOGISTICS.SNOWSQL.PURCHASING_SUPPLIERS
DROP COLUMN PAYMENTDAYS;

ALTER TABLE KN_LOGISTICS.SNOWSQL.PURCHASING_SUPPLIERS
RENAME COLUMN PAYMENTDAYS_NUM TO PAYMENTDAYS;

--------------------------------------------------
-- data type conversion (to number) PRIMARYCONTACTPERSONID 
--------------------------------------------------

ALTER TABLE KN_LOGISTICS.SNOWSQL.PURCHASING_SUPPLIERS
ADD COLUMN PRIMARYCONTACTPERSONID_NUM NUMBER(38,0);

UPDATE KN_LOGISTICS.SNOWSQL.PURCHASING_SUPPLIERS
SET PRIMARYCONTACTPERSONID_NUM = TO_NUMBER(PRIMARYCONTACTPERSONID);

SELECT PRIMARYCONTACTPERSONID, PRIMARYCONTACTPERSONID_NUM
FROM KN_LOGISTICS.SNOWSQL.PURCHASING_SUPPLIERS;

ALTER TABLE KN_LOGISTICS.SNOWSQL.PURCHASING_SUPPLIERS
DROP COLUMN PRIMARYCONTACTPERSONID;

ALTER TABLE KN_LOGISTICS.SNOWSQL.PURCHASING_SUPPLIERS
RENAME COLUMN PRIMARYCONTACTPERSONID_NUM TO PRIMARYCONTACTPERSONID;


--------------------------------------------------
-- data type conversion (to number) ALTERNATECONTACTPERSONID 
--------------------------------------------------

ALTER TABLE KN_LOGISTICS.SNOWSQL.PURCHASING_SUPPLIERS
ADD COLUMN ALTERNATECONTACTPERSONID_NUM NUMBER(38,0);

UPDATE KN_LOGISTICS.SNOWSQL.PURCHASING_SUPPLIERS
SET ALTERNATECONTACTPERSONID_NUM = TO_NUMBER(ALTERNATECONTACTPERSONID);

SELECT ALTERNATECONTACTPERSONID, ALTERNATECONTACTPERSONID_NUM
FROM KN_LOGISTICS.SNOWSQL.PURCHASING_SUPPLIERS;

ALTER TABLE KN_LOGISTICS.SNOWSQL.PURCHASING_SUPPLIERS
DROP COLUMN ALTERNATECONTACTPERSONID;

ALTER TABLE KN_LOGISTICS.SNOWSQL.PURCHASING_SUPPLIERS
RENAME COLUMN ALTERNATECONTACTPERSONID_NUM TO ALTERNATECONTACTPERSONID;

-- Check 10 rows of SUPPLIERID after updating the data type
SELECT ALTERNATECONTACTPERSONID FROM KN_LOGISTICS.SNOWSQL.PURCHASING_SUPPLIERS LIMIT 10;


-- primary keys
ALTER TABLE KN_LOGISTICS.SNOWSQL.PURCHASING_SUPPLIERS
ADD CONSTRAINT PK_PURCHASING_SUPPLIERS_SUPPLIERID
PRIMARY KEY (SUPPLIERID);

-- unique keys
ALTER TABLE KN_LOGISTICS.SNOWSQL.PURCHASING_SUPPLIERS
ADD CONSTRAINT UK_PURCHASING_SUPPLIERS_SUPPLIERNAME
UNIQUE (SUPPLIERNAME);


-- null error handling for primary key (increment 1 to impute null value primary keys from the previous record)
WITH CTE AS (
    SELECT
        SUPPLIERID, 
        SUPPLIERNAME,
        SUPPLIERREFERENCE,
        PHONENUMBER,
        WEBSITEURL,
        DELIVERYMETHODID,
        SUPPLIERCATEGORYID,
        PAYMENTDAYS,
        PRIMARYCONTACTPERSONID,
        ALTERNATECONTACTPERSONID,
        LAG(SUPPLIERID) OVER (ORDER BY SUPPLIERID) AS prev_SUPPLIERID,
        ROW_NUMBER() OVER (ORDER BY SUPPLIERID) AS row_num
    FROM KN_LOGISTICS.SNOWSQL.PURCHASING_SUPPLIERS
)
SELECT 
        SUPPLIERNAME,
        SUPPLIERREFERENCE,
        PHONENUMBER,
        WEBSITEURL,
        DELIVERYMETHODID,
        SUPPLIERCATEGORYID,
        PAYMENTDAYS,
        PRIMARYCONTACTPERSONID,
        ALTERNATECONTACTPERSONID,
    CASE
        WHEN SUPPLIERID IS NULL THEN prev_SUPPLIERID + 1
        ELSE SUPPLIERID
    END AS SUPPLIERID
FROM CTE
ORDER BY row_num;


-- Foreign Key Constraints for Purchasing.Suppliers
ALTER TABLE KN_LOGISTICS.SNOWSQL.Purchasing_Suppliers
ADD CONSTRAINT FK_Purchasing_Suppliers_DeliveryMethodID_Application_DeliveryMethods
FOREIGN KEY (DeliveryMethodID)
REFERENCES KN_LOGISTICS.SNOWSQL.Application_DeliveryMethods(DeliveryMethodID);

ALTER TABLE KN_LOGISTICS.SNOWSQL.Purchasing_Suppliers
ADD CONSTRAINT FK_Purchasing_Suppliers_AlternateContactPersonID_Application_People
FOREIGN KEY (AlternateContactPersonID)
REFERENCES KN_LOGISTICS.SNOWSQL.Application_People(PersonID);

ALTER TABLE KN_LOGISTICS.SNOWSQL.Purchasing_Suppliers
ADD CONSTRAINT FK_Purchasing_Suppliers_PrimaryContactPersonID_Application_People
FOREIGN KEY (PrimaryContactPersonID)
REFERENCES KN_LOGISTICS.SNOWSQL.Application_People(PersonID);

ALTER TABLE KN_LOGISTICS.SNOWSQL.Purchasing_Suppliers
ADD CONSTRAINT FK_Purchasing_Suppliers_SupplierCategoryID_Purchasing_SupplierCategories
FOREIGN KEY (SupplierCategoryID)
REFERENCES KN_LOGISTICS.SNOWSQL.Purchasing_SupplierCategories(SupplierCategoryID);

ALTER TABLE KN_LOGISTICS.SNOWSQL.Warehouse_StockItems
ADD CONSTRAINT FK_Warehouse_StockItems_SupplierID_Purchasing_Suppliers
FOREIGN KEY (SupplierID)
REFERENCES KN_LOGISTICS.SNOWSQL.Purchasing_Suppliers(SupplierID);

ALTER TABLE KN_LOGISTICS.SNOWSQL.Purchasing_SupplierTransactions
ADD CONSTRAINT FK_Purchasing_SupplierTransactions_SupplierID_Purchasing_Suppliers
FOREIGN KEY (SupplierID)
REFERENCES KN_LOGISTICS.SNOWSQL.Purchasing_Suppliers(SupplierID);

ALTER TABLE KN_LOGISTICS.SNOWSQL.Purchasing_PurchaseOrders
ADD CONSTRAINT FK_Purchasing_PurchaseOrders_SupplierID_Purchasing_Suppliers
FOREIGN KEY (SupplierID)
REFERENCES KN_LOGISTICS.SNOWSQL.Purchasing_Suppliers(SupplierID);

ALTER TABLE KN_LOGISTICS.SNOWSQL.Warehouse_StockItemTransactions
ADD CONSTRAINT FK_Warehouse_StockItemTransactions_SupplierID_Purchasing_Suppliers
FOREIGN KEY (SupplierID)
REFERENCES KN_LOGISTICS.SNOWSQL.Purchasing_Suppliers(SupplierID);

#### Cleaning SALES_BUYINGGROUPS

In [None]:
-- Create a clean table with the same structure as the raw table
CREATE OR REPLACE TABLE KN_LOGISTICS.SNOWSQL.SALES_BUYINGGROUPS AS
SELECT
  CAST(CASE WHEN BUYINGGROUPID = 'NULL' THEN NULL ELSE BUYINGGROUPID END AS VARCHAR(38)) AS BUYINGGROUPID,
  CAST(CASE WHEN BUYINGGROUPNAME = 'NULL' THEN NULL ELSE BUYINGGROUPNAME END AS VARCHAR(50)) AS BUYINGGROUPNAME
FROM KN_LOGISTICS.SNOWSQL.SALES_BUYINGGROUPS_RAW;


-- Verify the clean table
SELECT *
FROM KN_LOGISTICS.SNOWSQL.SALES_BUYINGGROUPS;

----------------------------------------
--Converting of datatype
----------------------------------------

---------------------BUYINGGROUPID------------------------
-- Step 1: Add a new column with DECIMAL data type
ALTER TABLE KN_LOGISTICS.SNOWSQL.SALES_BUYINGGROUPS
ADD COLUMN BUYINGGROUPID_NUMBER NUMBER(38,0);

-- Step 2: Populate the new column with converted values
UPDATE KN_LOGISTICS.SNOWSQL.SALES_BUYINGGROUPS
SET BUYINGGROUPID_NUMBER = CAST(BUYINGGROUPID AS NUMBER(38,0));

-- Step 3: Verify the conversion
SELECT BUYINGGROUPID_NUMBER, BUYINGGROUPID
FROM KN_LOGISTICS.SNOWSQL.SALES_BUYINGGROUPS
LIMIT 10;

-- Step 4: Drop the old column
ALTER TABLE KN_LOGISTICS.SNOWSQL.SALES_BUYINGGROUPS
DROP COLUMN BUYINGGROUPID;

-- Step 5: Rename the new column to the original column name
ALTER TABLE KN_LOGISTICS.SNOWSQL.SALES_BUYINGGROUPS
RENAME COLUMN BUYINGGROUPID_NUMBER TO BUYINGGROUPID;

-------------------------------------
--Adding of primary key to table
-------------------------------------
ALTER TABLE KN_LOGISTICS.SNOWSQL.SALES_BUYINGGROUPS
ADD CONSTRAINT PK_SALES_BUYINGGROUPS_BUYINGGROUPID
PRIMARY KEY (BUYINGGROUPID);


-------------------------------------
--Adding of unique key to table
-------------------------------------
ALTER TABLE KN_LOGISTICS.SNOWSQL.SALES_BUYINGGROUPS
ADD CONSTRAINT UK_SALES_BUYINGGROUPS_BUYINGGROUPNAME
UNIQUE (BUYINGGROUPNAME);


-------------------------------------------------------
--ERROR HANDLING
-------------------------------------------------------
WITH CTE AS (
    SELECT 
        BUYINGGROUPID,
        BUYINGGROUPNAME,
        LAG(BUYINGGROUPID) OVER (ORDER BY BUYINGGROUPID) AS prev_buyinggroupid,
        ROW_NUMBER() OVER (ORDER BY BUYINGGROUPID) AS row_num
    FROM KN_LOGISTICS.SNOWSQL.SALES_BUYINGGROUPS
)
SELECT
    BUYINGGROUPNAME,
    CASE
        WHEN BUYINGGROUPID IS NULL THEN prev_buyinggroupid + 1
        ELSE BUYINGGROUPID
    END AS BUYINGGROUPID

FROM CTE
ORDER BY row_num;

-------------------------------------------------------
--Adding of foreign key to table
-------------------------------------------------------
-- THERE ARE NO FOREIGN KEYS IN THIS TABLE


#### Cleaning WAREHOUSE_COLORS

In [None]:
-- Create a clean table with the same structure as the raw table
CREATE OR REPLACE TABLE KN_LOGISTICS.SNOWSQL.WAREHOUSE_COLORS AS
SELECT
  CAST(CASE WHEN COLORID = 'NULL' THEN NULL ELSE COLORID END AS VARCHAR(38)) AS COLORID,
  CAST(CASE WHEN COLORNAME = 'NULL' THEN NULL ELSE COLORNAME END AS VARCHAR(20)) AS COLORNAME
FROM KN_LOGISTICS.SNOWSQL.WAREHOUSE_COLORS_RAW;

-- Verify the clean table
SELECT *
FROM KN_LOGISTICS.SNOWSQL.WAREHOUSE_COLORS;

-----------------------------------------
--Conversion of column datatype 
-----------------------------------------
//COLORID
ALTER TABLE KN_LOGISTICS.SNOWSQL.WAREHOUSE_COLORS
ADD COLUMN COLORID_NUM NUMBER(38,0);

UPDATE KN_LOGISTICS.SNOWSQL.WAREHOUSE_COLORS
SET COLORID_NUM = CAST(COLORID AS NUMBER(38,0));

SELECT COLORID_NUM, COLORID
FROM KN_LOGISTICS.SNOWSQL.WAREHOUSE_COLORS;

ALTER TABLE KN_LOGISTICS.SNOWSQL.WAREHOUSE_COLORS
DROP COLUMN COLORID;

ALTER TABLE KN_LOGISTICS.SNOWSQL.WAREHOUSE_COLORS
RENAME COLUMN COLORID_NUM TO COLORID;

-- Select the transformed data for verification
SELECT 
    COLORID, COLORNAME
FROM KN_LOGISTICS.SNOWSQL.WAREHOUSE_COLORS
LIMIT 10;

-----------------------------------
--Adding of primary key to table
-----------------------------------

ALTER TABLE KN_LOGISTICS.SNOWSQL.WAREHOUSE_COLORS
ADD CONSTRAINT PK_WAREHOUSE_COLORS_COLORID
PRIMARY KEY (COLORID);

----------------------------------------
--Adding of unique key to table
----------------------------------------
ALTER TABLE KN_LOGISTICS.SNOWSQL.WAREHOUSE_COLORS
ADD CONSTRAINT UK_WAREHOUSE_COLORS_COLORNAME
UNIQUE (COLORNAME);

-------------------------------------------------------
-- ERROR HANDLING
-------------------------------------------------------
WITH CTE AS (
    SELECT 
        COLORID,
        COLORNAME,
        LAG(COLORID) OVER (ORDER BY COLORID) AS prev_colorid,
        ROW_NUMBER() OVER (ORDER BY COLORID) AS row_num
    FROM KN_LOGISTICS.SNOWSQL.WAREHOUSE_COLORS
)
SELECT 
    COLORNAME,
    CASE
        WHEN COLORID IS NULL THEN prev_colorid + 1
        ELSE COLORID
    END AS COLORID
FROM CTE
ORDER BY row_num;


-------------------------------------------------------
--Adding of foreign key to table
-------------------------------------------------------
-- THERE ARE NO FOREIGN KEYS IN THIS TABLE




#### Cleaning WAREHOUSE_PACKAGETYPES

In [None]:
-- Create a clean table with the same structure as the raw table
CREATE OR REPLACE TABLE KN_LOGISTICS.SNOWSQL.WAREHOUSE_PACKAGETYPES AS
SELECT
  CAST(CASE WHEN PACKAGETYPEID = 'NULL' THEN NULL ELSE PACKAGETYPEID END AS VARCHAR(38)) AS PACKAGETYPEID,
  CAST(CASE WHEN PACKAGETYPENAME = 'NULL' THEN NULL ELSE PACKAGETYPENAME END AS VARCHAR(50)) AS PACKAGETYPENAME
FROM KN_LOGISTICS.SNOWSQL.WAREHOUSE_PACKAGETYPES_RAW;

-- Verify the clean table
SELECT *
FROM KN_LOGISTICS.SNOWSQL.WAREHOUSE_PACKAGETYPES;

--------------------------------------
--Conversion of datatypes
--------------------------------------
ALTER TABLE KN_LOGISTICS.SNOWSQL.WAREHOUSE_PACKAGETYPES
ADD COLUMN PACKAGETYPEID_NUM NUMBER(38,0);

UPDATE KN_LOGISTICS.SNOWSQL.WAREHOUSE_PACKAGETYPES
SET PACKAGETYPEID_NUM = CAST(PACKAGETYPEID AS NUMBER(38,0));

SELECT PACKAGETYPEID_NUM, PACKAGETYPEID
FROM KN_LOGISTICS.SNOWSQL.WAREHOUSE_PACKAGETYPES;

ALTER TABLE KN_LOGISTICS.SNOWSQL.WAREHOUSE_PACKAGETYPES
DROP COLUMN PACKAGETYPEID;

ALTER TABLE KN_LOGISTICS.SNOWSQL.WAREHOUSE_PACKAGETYPES
RENAME COLUMN PACKAGETYPEID_NUM TO PACKAGETYPEID;

SELECT 
    PACKAGETYPEID, PACKAGETYPENAME
FROM KN_LOGISTICS.SNOWSQL.WAREHOUSE_PACKAGETYPES
LIMIT 10;

---------------------------------------
--Addition of primary key to table
---------------------------------------

ALTER TABLE KN_LOGISTICS.SNOWSQL.WAREHOUSE_PACKAGETYPES
ADD CONSTRAINT PK_WAREHOUSE_PACKAGETYPES_PACKAGETYPEID
PRIMARY KEY (PACKAGETYPEID);


---------------------------------------------
--Adding of unique key to table
---------------------------------------------
ALTER TABLE KN_LOGISTICS.SNOWSQL.WAREHOUSE_PACKAGETYPES
ADD CONSTRAINT UK_WAREHOUSE_PACKAGETYPES_PACKAGETYPENAME
UNIQUE (PACKAGETYPENAME);


-----------------------------------------------------
-- ERROR HANDLING
-----------------------------------------------------
WITH CTE AS (
    SELECT 
        PACKAGETYPEID,
        PACKAGETYPENAME,
        LAG(PACKAGETYPEID) OVER (ORDER BY PACKAGETYPEID) AS prev_packagetypeid,
        ROW_NUMBER() OVER (ORDER BY PACKAGETYPEID) AS row_num
    FROM KN_LOGISTICS.SNOWSQL.WAREHOUSE_PACKAGETYPES
)
SELECT 
    PACKAGETYPENAME,
    CASE
        WHEN PACKAGETYPEID IS NULL THEN prev_packagetypeid + 1
        ELSE PACKAGETYPEID
    END AS PACKAGETYPEID
FROM CTE
ORDER BY row_num;

-------------------------------------------------------
--Adding of foreign key to table
-------------------------------------------------------
-- THERE ARE NO FOREIGN KEYS 

