# Optimizing With UPDATE

- Generally speaking, JOINs between very large tables are very expensive regarding performance
- Steps to take for query optimization
    - Define a filtered dataset as early as possible to JOIN on smaller core population tables
    - Avoid several JOINs in a single SELECT query when large tables are involved
    - Instead, use UPDATE statements to populate fields in a temp table, one source table at a time
    - Apply indexes to fields that will be used in JOINs (will review later)

In [1]:
USE AW2019;

-- Starter Code
SELECT TOP 10
    H.SalesOrderID,
    H.OrderDate,
    D.ProductID,
    D.LineTotal,
    P.Name AS ProductName,
    SC.Name AS ProductSubcategory,
    C.Name AS ProductCategory
FROM Sales.SalesOrderHeader AS H
    JOIN Sales.SalesOrderDetail AS D
        ON H.SalesOrderID = D.SalesOrderDetailID
    JOIN Production.Product AS P
        ON D.ProductID = P.ProductID
    JOIN Production.ProductSubcategory AS SC
        ON P.ProductSubcategoryID = SC.ProductSubcategoryID
    JOIN Production.ProductCategory AS C
        ON SC.ProductCategoryID = C.ProductCategoryID
WHERE YEAR (H.OrderDate) = 2012;

/*
This particular query is ok since the source tables are not large, 
but if we were working with very large tables we would be better off with the following approach.
*/

SalesOrderID,OrderDate,ProductID,LineTotal,ProductName,ProductSubcategory,ProductCategory
45266,2012-01-01 00:00:00.000,957,2384.07,"Touring-1000 Yellow, 60",Touring Bikes,Bikes
45267,2012-01-01 00:00:00.000,707,34.99,"Sport-100 Helmet, Red",Helmets,Accessories
45268,2012-01-01 00:00:00.000,796,2443.35,"Road-250 Black, 58",Road Bikes,Bikes
45269,2012-01-01 00:00:00.000,933,32.6,HL Road Tire,Tires and Tubes,Accessories
45270,2012-01-01 00:00:00.000,922,3.99,Road Tire Tube,Tires and Tubes,Accessories
45271,2012-01-01 00:00:00.000,707,34.99,"Sport-100 Helmet, Red",Helmets,Accessories
45272,2012-01-01 00:00:00.000,859,24.49,"Half-Finger Gloves, M",Gloves,Clothing
45273,2012-01-01 00:00:00.000,799,1120.49,"Road-550-W Yellow, 42",Road Bikes,Bikes
45274,2012-01-01 00:00:00.000,932,24.99,ML Road Tire,Tires and Tubes,Accessories
45275,2012-01-01 00:00:00.000,922,3.99,Road Tire Tube,Tires and Tubes,Accessories


In [2]:
USE AW2019;

-- Optimized Code

-- Create Filtered Temp Table for 2012 Sales Data
DROP TABLE IF EXISTS #Sales2012
CREATE TABLE #Sales2012 (
    SalesOrderID    INT,
    OrderDate       DATE
)

    INSERT INTO #Sales2012 (
        SalesOrderID,
        OrderDate
    )

    SELECT SalesOrderID, OrderDate
    FROM Sales.SalesOrderHeader
    WHERE YEAR (OrderDate) = 2012;

-- Create Master Temp Table and Insert #Sales2012
DROP TABLE IF EXISTS #ProductsSold2012
CREATE TABLE #ProductsSold2012 (
    SalesOrderID            INT,
    OrderDate               DATE,
    LineTotal               MONEY,
    ProductID               INT,
    ProductName             VARCHAR(64),
    ProductSubcategoryID    INT,
    ProductSubcategory      VARCHAR(64),
    ProductCategoryID       INT,
    ProductCategory         VARCHAR(64)
)

    INSERT INTO #ProductsSold2012 (
        SalesOrderID,
        OrderDate,
        LineTotal,
        ProductID
    )

    SELECT
        S.SalesOrderID,
        S.OrderDate,
        D.LineTotal,
        D.ProductID
    FROM #Sales2012 AS S -- less expensive join
        JOIN Sales.SalesOrderDetail AS D
            ON S.SalesOrderID = D.SalesOrderID;

-- Update Remaining NULL Values in #ProductsSold2012
UPDATE #ProductsSold2012
SET 
    ProductName = P.Name,
    ProductSubcategoryID = P.ProductSubcategoryID
FROM #ProductsSold2012 AS PS
    JOIN Production.Product AS P
        ON PS.ProductID = P.ProductID;

UPDATE #ProductsSold2012
SET 
    ProductSubcategory = SC.Name,
    ProductCategoryID = SC.ProductCategoryID
FROM #ProductsSold2012 AS PS
    JOIN Production.ProductSubcategory AS SC
        ON PS.ProductSubcategoryID = SC.ProductSubcategoryID;

UPDATE #ProductsSold2012
SET ProductCategory = C.Name
FROM #ProductsSold2012 AS PS
    JOIN Production.ProductCategory AS C
        ON PS.ProductCategoryID = C.ProductCategoryID;

-- Query Data
SELECT TOP 10 * 
FROM #ProductsSold2012;

SalesOrderID,OrderDate,LineTotal,ProductID,ProductName,ProductSubcategoryID,ProductSubcategory,ProductCategoryID,ProductCategory
47362,2012-07-31,259.1222,863,"Full-Finger Gloves, L",20,Gloves,3,Clothing
47362,2012-07-31,22.794,861,"Full-Finger Gloves, S",20,Gloves,3,Clothing
47362,2012-07-31,4971.4072,780,"Mountain-200 Silver, 42",1,Mountain Bikes,1,Bikes
47362,2012-07-31,109.341,815,LL Mountain Front Wheel,17,Wheels,2,Components
47362,2012-07-31,209.256,832,"ML Mountain Frame - Black, 48",12,Mountain Frames,2,Components
47362,2012-07-31,1229.4589,782,"Mountain-200 Black, 38",1,Mountain Bikes,1,Bikes
47362,2012-07-31,196.329,825,HL Mountain Rear Wheel,17,Wheels,2,Components
47362,2012-07-31,157.941,823,LL Mountain Rear Wheel,17,Wheels,2,Components
47363,2012-07-31,214.236,828,HL Road Rear Wheel,17,Wheels,2,Components
47364,2012-07-31,183.9382,738,"LL Road Frame - Black, 52",14,Road Frames,2,Components


# Improved EXISTS and NOT EXISTS w/ UPDATE
- EXISTS allows you to check for matching records from the many side of a relationship, without resulting in duplicated data from the one side
    - This works fine uinless you need additional information about the match
    - If you need to see data points pertaining to the match, UPDATE is a superior alternative
- Choosing techniques;
    - If you need to see all matches from the many side of the relationship, use JOIN
    - If you don't want to see all matches from the many side, AND don't want to see information about matches, use EXISTS
    - If you don't want to see all matches from the many side, but would like to see information about returned matches, use UPDATE


In [3]:
USE AW2019;

--Select All Orders w/ At Least One Item Over 10K, Using EXISTS
SELECT TOP 10
    H.SalesOrderID,
    H.OrderDate,
    H.TotalDue
FROM Sales.SalesOrderHeader H
WHERE EXISTS (
	SELECT H.SalesOrderID
	FROM Sales.SalesOrderDetail D
	WHERE H.SalesOrderID = D.SalesOrderID
		AND D.LineTotal > 10000
)
ORDER BY H.SalesOrderID


-- Create a Table w/ Sales Data, Including a Field for Line Total
DROP TABLE IF EXISTS #Sales
CREATE TABLE #Sales (
    SalesOrderID    INT,
    OrderDate       DATE,
    TotalDue        MONEY,
    LineTotal       MONEY
)


--Insert Sales Data into Temp Table
    INSERT INTO #Sales (
        SalesOrderID,
        OrderDate,
        TotalDue
    )

    SELECT
        SalesOrderID,
        OrderDate,
        TotalDue
    FROM Sales.SalesOrderHeader


--Update Temp Table w/ > 10K Line Totals
UPDATE #Sales
SET LineTotal = D.LineTotal
FROM #Sales A
	JOIN Sales.SalesOrderDetail D
		ON A.SalesOrderID = D.SalesOrderID
WHERE D.LineTotal > 10000


--Recreate EXISTS, Returning Records With Existing Line Total
SELECT TOP 10 * 
FROM #Sales 
WHERE LineTotal IS NOT NULL;


--Recreate NOT EXISTS, Returning Records Without Existing Line Total
SELECT TOP 10 * 
FROM #Sales 
WHERE LineTotal IS NULL;

SalesOrderID,OrderDate,TotalDue
43683,2011-05-31 00:00:00.000,48204.0662
43695,2011-05-31 00:00:00.000,44344.8265
43843,2011-07-01 00:00:00.000,37106.2915
43864,2011-07-01 00:00:00.000,43335.7219
43869,2011-07-01 00:00:00.000,55408.1581
43875,2011-07-01 00:00:00.000,137343.2877
43881,2011-07-01 00:00:00.000,43706.8175
43884,2011-07-01 00:00:00.000,130416.4829
43890,2011-07-01 00:00:00.000,84686.9878
43894,2011-07-01 00:00:00.000,36585.904


SalesOrderID,OrderDate,TotalDue,LineTotal
57012,2013-09-30,89661.1827,14906.1593
57023,2013-09-30,38837.7761,15109.0437
57034,2013-09-30,86193.0846,10262.07
57036,2013-09-30,42680.2308,11015.952
57037,2013-09-30,45566.5466,12527.946
57041,2013-09-30,71945.7548,14304.42
57045,2013-09-30,55385.6976,14304.42
57046,2013-09-30,82333.7853,13769.94
57051,2013-09-30,84763.4917,14220.2764
57054,2013-09-30,105120.5963,10013.094


SalesOrderID,OrderDate,TotalDue,LineTotal
55803,2013-09-06,2673.0613,
55804,2013-09-06,2644.3313,
55805,2013-09-06,650.8008,
55806,2013-09-06,1238.1415,
55807,2013-09-06,1308.8283,
55808,2013-09-06,1238.1415,
55809,2013-09-06,1286.7394,
55810,2013-09-06,695.0119,
55811,2013-09-06,2755.3617,
55812,2013-09-06,901.4701,


# Indexes
- Main advantage of temp tavles over CTE's is that temp tables can be indexed for better performance
- Indexes are database objects that can make queries against your tables faster
    - They sort the data in the fileds they apply to (either in the tbale itself, or in a seperate data structure)
    - Sorting allowes the data base engine to locate records within a table without having to search through the table row-by-row
- Two types of indexes: Clustered and Non-Clustered
- General approach to applying indexes;
    - How tables are used in joins should drive the use of indexes
    - Generally add a clustered index first, and then layer non-clustered indexes to cover additional fields that will be used to join other tables
    - Indexes take up memeory in the database, so only add them when they are really needed
    - Indexes slow down data inserts, so you should generally add indexes after data has been loaded

# Clustered Indexes
- Rows are physically sorted base on the field(s) the index is applied to
- Tables with a primary key are given a clustered index by default
- Most tables should have at least have a clustered index for query speed improvement
- A table may only have one clustered index
- Implementation strategies;
    - Apply a clustered index to field(s) most likely to be used in joins
    - These should also be the ones that most uniquely define a record in a table
    - Whichever field is a good canditate for a primary key is likely also a good candidate for a clustered index

# Non-Clustered Indexes
- Advantage over clustered indexes is that many non-clustered indexes can be applied to a single table
- Non-clustered indexes do not physcially sort the data in a table (which is why many can be applied to a single table)
    - Sorted order is stored in an external data structure
- Implementation strategies;
    - Apply to fields that you'll use to join tables on outside of the clustered index
    - Fields covered by non-clustered indexes should still have a high level of uniqueness


In [7]:
USE AW2019;

-- Optimized Previous Code w/ Indexes (Use Join Fields to Drive Decisions, Create Indexes AFTER Data is Loaded)

-- Create Filtered Temp Table for 2012 Sales Data
DROP TABLE IF EXISTS #Sales2012
CREATE TABLE #Sales2012 (
    SalesOrderID    INT,
    OrderDate       DATE
)

    INSERT INTO #Sales2012 (
        SalesOrderID,
        OrderDate
    )

    SELECT SalesOrderID, OrderDate
    FROM Sales.SalesOrderHeader
    WHERE YEAR (OrderDate) = 2012;

    -- Add Clustered Index to #Sales2012
    CREATE CLUSTERED INDEX Sales2012_idx ON #Sales2012 (SalesOrderID)

-- Create Master Temp Table and Insert #Sales2012
DROP TABLE IF EXISTS #ProductsSold2012
CREATE TABLE #ProductsSold2012 (
    SalesOrderID            INT,
    -- SalesOrderDetailID      INT,
    OrderDate               DATE,
    LineTotal               MONEY,
    ProductID               INT,
    ProductName             VARCHAR(64),
    ProductSubcategoryID    INT,
    ProductSubcategory      VARCHAR(64),
    ProductCategoryID       INT,
    ProductCategory         VARCHAR(64)
)

    INSERT INTO #ProductsSold2012 (
        SalesOrderID,
        -- SalesOrderDetailID,
        OrderDate,
        LineTotal,
        ProductID
    )

    SELECT
        S.SalesOrderID,
        -- D.SalesOrderDetailID,
        S.OrderDate,
        D.LineTotal,
        D.ProductID
    FROM #Sales2012 AS S -- less expensive join
        JOIN Sales.SalesOrderDetail AS D
            ON S.SalesOrderID = D.SalesOrderID;

    -- Add Clustered Index to #ProductsSold2012
    CREATE CLUSTERED INDEX ProductsSold2012_idx ON #ProductsSold2012 (SalesOrderID)

    -- Add Non-Clustered Index to #ProductsSold2012
    CREATE NONCLUSTERED INDEX ProductIDSold2012_idx ON #ProductsSold2012 (ProductID)

-- Update Remaining NULL Values in #ProductsSold2012
UPDATE #ProductsSold2012
SET 
    ProductName = P.Name,
    ProductSubcategoryID = P.ProductSubcategoryID
FROM #ProductsSold2012 AS PS
    JOIN Production.Product AS P
        ON PS.ProductID = P.ProductID;

-- Add Non-Clustered Index to #ProductsSold2012 Based on Previous Update
CREATE NONCLUSTERED INDEX ProductSubcategoryIDSold2012_idx ON #ProductsSold2012 (ProductSubcategoryID)

UPDATE #ProductsSold2012
SET 
    ProductSubcategory = SC.Name,
    ProductCategoryID = SC.ProductCategoryID
FROM #ProductsSold2012 AS PS
    JOIN Production.ProductSubcategory AS SC
        ON PS.ProductSubcategoryID = SC.ProductSubcategoryID;

-- Add Non-Clustered Index to #ProductsSold2012 Based on Previous Update
CREATE NONCLUSTERED INDEX ProductCategoryIDSold2012_idx ON #ProductsSold2012 (ProductCategoryID)

UPDATE #ProductsSold2012
SET ProductCategory = C.Name
FROM #ProductsSold2012 AS PS
    JOIN Production.ProductCategory AS C
        ON PS.ProductCategoryID = C.ProductCategoryID;

-- Query Data
SELECT TOP 10 * 
FROM #ProductsSold2012;

SalesOrderID,OrderDate,LineTotal,ProductID,ProductName,ProductSubcategoryID,ProductSubcategory,ProductCategoryID,ProductCategory
46959,2012-06-30,447.093,819,ML Road Front Wheel,17,Wheels,2,Components
46959,2012-06-30,809.328,725,"LL Road Frame - Red, 44",14,Road Frames,2,Components
46959,2012-06-30,100.9325,708,"Sport-100 Helmet, Black",31,Helmets,4,Accessories
46959,2012-06-30,89.988,854,"Women's Tights, L",24,Tights,3,Clothing
46959,2012-06-30,7853.625,794,"Road-250 Black, 48",2,Road Bikes,1,Bikes
46959,2012-06-30,115.3616,715,"Long-Sleeve Logo Jersey, L",21,Jerseys,3,Clothing
46959,2012-06-30,1409.382,767,"Road-650 Black, 62",2,Road Bikes,1,Bikes
46959,2012-06-30,405.234,826,LL Road Rear Wheel,17,Wheels,2,Components
46959,2012-06-30,224.97,852,"Women's Tights, S",24,Tights,3,Clothing
46959,2012-06-30,60.5595,707,"Sport-100 Helmet, Red",31,Helmets,4,Accessories


# Creating Permanent Objects for Optimization
- DDL (Data Definition Language): Pertain to the structure and definition of a table. Include CREATE, DROP, and TRUNCATE.
- DML (Data Manipulation Language): Commands that manipulate data within tables. Include INSERT, UPDATE, and DELETE. 
- Benefits of lookup tables;
    - Eliminate duplicated effort by locating frequently used attributes in one place
    - Promote data integrity by consolidating a "single version of truth" in a central location
    - Examples: Calendar, Store Locations, etc.

In [5]:
USE AW2019;

-- Create Permanent Lookup Calendar Table
DROP TABLE IF EXISTS AW2019.dbo.Calendar
CREATE TABLE AW2019.dbo.Calendar (
    DateValue           DATE,
    DayOfWeekNumber     INT,
    DayOfWeekName       VARCHAR(32),
    DayOfMonthNumber    INT,
    MonthNumber         INT,
    YearNumber          INT,
    WeekendFlag         TINYINT
)

    -- Manually Inserting Values (Not Practical)
    INSERT INTO AW2019.dbo.Calendar (
        DateValue,
        DayOfWeekNumber,
        DayOfWeekName,
        DayOfMonthNumber,
        MonthNumber,
        YearNumber,
        WeekendFlag
    )

    VALUES (
        CAST ('2011-01-01' AS DATE), 7, 'Saturday', 1, 1, 2011, 1
    )

    -- Truncate Table to Remove Inserted Values
    TRUNCATE TABLE dbo.Calendar;

-- Using a Recursive CTE to Generate a List of Dates and Insert
WITH Dates AS (
    SELECT CAST ('2011-01-01' AS DATE) AS DateValue

    UNION ALL

    SELECT DATEADD (DAY, 1, DateValue)
    FROM Dates
    WHERE DateValue < CAST ('2030-12-31' AS DATE)
)

INSERT INTO dbo.Calendar (DateValue)
SELECT DateValue
FROM Dates
OPTION (MAXRECURSION 10000)

-- Update Fields w/ Date Functions
UPDATE dbo.Calendar
SET
    DayOfWeekNumber     = DATEPART (WEEKDAY, DateValue),
    DayOfWeekName       = FORMAT (DateValue, 'dddd'),
    DayOfMonthNumber    = DAY (DateValue),
    MonthNumber         = MONTH (DateValue),
    YearNumber          = YEAR (DateValue)


UPDATE dbo.Calendar
SET WeekendFlag =   CASE
                        WHEN DayOfWeekName IN ('Saturday', 'Sunday') THEN 1
                        ELSE 0
                    END

SELECT TOP 10 * 
FROM dbo.Calendar;

DateValue,DayOfWeekNumber,DayOfWeekName,DayOfMonthNumber,MonthNumber,YearNumber,WeekendFlag
2011-01-01,7,Saturday,1,1,2011,1
2011-01-02,1,Sunday,2,1,2011,1
2011-01-03,2,Monday,3,1,2011,0
2011-01-04,3,Tuesday,4,1,2011,0
2011-01-05,4,Wednesday,5,1,2011,0
2011-01-06,5,Thursday,6,1,2011,0
2011-01-07,6,Friday,7,1,2011,0
2011-01-08,7,Saturday,8,1,2011,1
2011-01-09,1,Sunday,9,1,2011,1
2011-01-10,2,Monday,10,1,2011,0


In [6]:
USE AW2019;

-- Use Calendar Table to Identify Weekend Sales
SELECT TOP 10 H.*
FROM Sales.SalesOrderHeader AS H
    JOIN dbo.Calendar AS C
        ON H.OrderDate = C.DateValue
WHERE C.WeekendFlag = 1 AND H.SalesOrderNumber IS NOT NULL;

SalesOrderID,RevisionNumber,OrderDate,DueDate,ShipDate,Status,OnlineOrderFlag,SalesOrderNumber,PurchaseOrderNumber,AccountNumber,CustomerID,SalesPersonID,TerritoryID,BillToAddressID,ShipToAddressID,ShipMethodID,CreditCardID,CreditCardApprovalCode,CurrencyRateID,SubTotal,TaxAmt,Freight,TotalDue,Comment,rowguid,ModifiedDate
43713,8,2011-06-04 00:00:00.000,2011-06-16 00:00:00.000,2011-06-11 00:00:00.000,5,1,SO43713,,10-4030-027601,27601,,4,11855,11855,1,16570,830288Vi85808,,3578.27,286.2616,89.4568,3953.9884,,9de30294-9066-4988-a3ad-09a0713348e5,2011-06-11 00:00:00.000
43714,8,2011-06-04 00:00:00.000,2011-06-16 00:00:00.000,2011-06-11 00:00:00.000,5,1,SO43714,,10-4030-013591,13591,,10,15169,15169,1,15963,1031124Vi82522,61.0,3578.27,286.2616,89.4568,3953.9884,,40659773-9ce3-4214-9886-c9de75e40274,2011-06-11 00:00:00.000
43715,8,2011-06-04 00:00:00.000,2011-06-16 00:00:00.000,2011-06-11 00:00:00.000,5,1,SO43715,,10-4030-016483,16483,,9,28213,28213,1,7985,1134605Vi41375,54.0,3578.27,286.2616,89.4568,3953.9884,,36c19607-cc19-4cc0-8ea7-c16a6cc747dd,2011-06-11 00:00:00.000
43716,8,2011-06-04 00:00:00.000,2011-06-16 00:00:00.000,2011-06-11 00:00:00.000,5,1,SO43716,,10-4030-016529,16529,,9,23557,23557,1,5873,534626Vi30635,54.0,3578.27,286.2616,89.4568,3953.9884,,92d7b8bf-071b-478e-a330-dec813c635f9,2011-06-11 00:00:00.000
43717,8,2011-06-04 00:00:00.000,2011-06-16 00:00:00.000,2011-06-11 00:00:00.000,5,1,SO43717,,10-4030-025249,25249,,9,11809,11809,1,5700,1235425Vi29858,54.0,699.0982,55.9279,17.4775,772.5036,,8cf612ef-31b8-4a57-8e57-5c62d48a0944,2011-06-11 00:00:00.000
43718,8,2011-06-05 00:00:00.000,2011-06-17 00:00:00.000,2011-06-12 00:00:00.000,5,1,SO43718,,10-4030-027668,27668,,1,25346,25346,1,3049,530203Vi16052,,3578.27,286.2616,89.4568,3953.9884,,877141f0-4ae7-4fb3-b9e0-3ca2cc1a20c9,2011-06-12 00:00:00.000
43719,8,2011-06-05 00:00:00.000,2011-06-17 00:00:00.000,2011-06-12 00:00:00.000,5,1,SO43719,,10-4030-027612,27612,,4,13854,13854,1,5998,1130295Vi31203,,3578.27,286.2616,89.4568,3953.9884,,bf5155eb-c5be-4245-8fc4-f801db5b052d,2011-06-12 00:00:00.000
43720,8,2011-06-05 00:00:00.000,2011-06-17 00:00:00.000,2011-06-12 00:00:00.000,5,1,SO43720,,10-4030-013264,13264,,8,18584,18584,1,2765,831105Vi14399,71.0,3578.27,286.2616,89.4568,3953.9884,,8832376e-105b-4696-8bb4-e6ec1aa20219,2011-06-12 00:00:00.000
43721,8,2011-06-05 00:00:00.000,2011-06-17 00:00:00.000,2011-06-12 00:00:00.000,5,1,SO43721,,10-4030-013590,13590,,10,22146,22146,1,11950,331111Vi61776,74.0,3578.27,286.2616,89.4568,3953.9884,,3a8a408f-c18f-4ae3-b0c4-f3b82f5f3404,2011-06-12 00:00:00.000
43741,8,2011-06-11 00:00:00.000,2011-06-23 00:00:00.000,2011-06-18 00:00:00.000,5,1,SO43741,,10-4030-027671,27671,,1,13036,13036,1,4679,530206Vi24524,,3578.27,286.2616,89.4568,3953.9884,,c674c190-0cda-46f5-aac1-a65d9f902572,2011-06-18 00:00:00.000
