# Chapter 1

- Coalesce : ` SELECT COALESCE(column_1, column_2) FROM table_name`
- Casting : `SELECT CAST (3.7 AS integer);`

# Chapter 2

### Summary stats

```
-- Variance
SELECT variance(col_name) FROM table_name;
-- Population variace 
SELECT var_pop(col_name) FROM table_name;
-- Sample variace 
SELECT var_samp(col_name) FROM table_name;
-- Standard deviation 
SELECT stddev(col_name) FROM table_name;

-- population standard deviation 
SELECT stddev_pop(col_name) FROM table_name;
-- sample standard deviation 
SELECT stddev_samp(col_name) FROM table_name;
-- truncate 
SELECT trunc(42.1256, 2); -- (42.12)
-- series 
SELECT generate_series(1, 10, 2); -- (1,3 ... , 9)
-- correlation 
SELECT corr(assets, equity) FROM fortune500;
-- continuous percentile 
SELECT percentile_cont(0.5) WITHIN GROUP (ORDER BY col_name) FROM table_name;
-- discrete percentile 
SELECT percentile_disc(0.5) WITHIN GROUP (ORDER BY col_name) FROM table_name;

```

### Binning

### Example 1

```
-- Create bins
WITH bins AS
    ( SELECT generate_series(30 , 60 , 5) AS lower, 
    generate_series(35 , 65, 5) AS upper),
-- Subset data to tag of interest
ebs AS 
    ( SELECT unanswered_count FROM stackoverflow 
    WHERE tag = 'amazon-ebs')
-- Count values in each bin
SELECT lower, upper, count(unanswered_count)
-- left join keeps all bins
FROM bins
LEFT JOIN ebs
ON unanswered_count >= lower AND unanswered_count < upper 
-- Group by bin bounds to create the groups
GROUP BY lower, upper
ORDER BY lower;
``` 

### Example 2

```
-- Bins from Step 1
WITH bins AS (
	 SELECT generate_series('2016-01-01',
                            '2018-01-01',
                            '6 months'::interval) AS lower,
            generate_series('2016-07-01',
                            '2018-07-01',
                            '6 months'::interval) AS upper),
-- Daily counts from Step 2
     daily_counts AS (
     SELECT day, count(date_created) AS count
       FROM (SELECT generate_series('2016-01-01',
                                    '2018-06-30',
                                    '1 day'::interval)::date AS day) AS daily_series
            LEFT JOIN evanston311
            ON day = date_created::date
      GROUP BY day)
-- Select bin bounds 
SELECT lower, 
       upper, 
       -- Compute median of count for each bin
       percentile_disc(0.5) WITHIN GROUP (ORDER BY count) AS median
  -- Join bins and daily_counts
  FROM bins
       LEFT JOIN daily_counts
       -- Where the day is between the bin bounds
       ON day >= lower
          AND day < upper
 -- Group by bin bounds
 GROUP BY lower, upper
 ORDER BY lower;
```

# Temporary table

```
-- Create temp table
CREATE TEMP TABLE temp_table (
    id INT,
    name VARCHAR(30)
);

-- Query results to store in the temporary table on the fly
CREATE TEMP TABLE temp_table AS
SELECT column1, column2
FROM table_name;

-- Insert values in temporary table from another table
INSERT INTO temp_table
SELECT column1, column2
FROM another_table

-- Delete temporary table
DROP TABLE IF EXISTS temp_table;
```

# Chapter 3

- lowercase : `SELECT lower('ABC');` (abc)
- uppercase : `SELECT lower('abc');` (ABC)
- pattern matching case sensitive : `SELECT * FROM table_name  WHERE col_name LIKE '%apple%';`
- pattern matching case insensitive : `SELECT * FROM table_name  WHERE col_name ILIKE '%apple%';`
- trimming : `SELECT trim('Wow!', '!wW');` (o). Also `rtrim`, `ltrim`
- substring : `SELECT substr('abcdef', 2, 3);` (bcd)
- splitting: `SELECT split_part('a,bc,d', ',', 2);` (bc)
- Concatenating : `SELECT concat('a', 2, 'cc');` (a2cc) ALSO `concat_ws()`
- Concatenating : `SSELECT 'a' || 2 || 'cc';` (a2cc) (NOTE: Any NULL will produce NULL result )


### Split

```
-- Assume you have the CTE
WITH cte AS (
    SELECT STRING_AGG(Country, ', ') AS ConcatenatedCountries
    FROM Country_Medals
)
SELECT value AS Country
FROM cte
CROSS APPLY STRING_SPLIT(cte.ConcatenatedCountries, ', '); -- Use STRING_SPLIT with CROSS APPLY

-- Split into words
SELECT 
    CASE 
        WHEN category LIKE '%: %' THEN SPLIT_PART(category, ': ', 1)
        WHEN category LIKE '% - %' THEN SPLIT_PART(category, ' - ', 1)
        ELSE SPLIT_PART(category, ' | ', 1)
        END AS major_category, -- alias the result
sum(businesses) -- also select number of businesses
FROM naics
GROUP BY major_category;
```

### SQL column operations

```
-- Change column type
ALTER TABLE table_name
ALTER COLUMN some_col VARCHAR(250);

-- Change column name
ALTER TABLE table_name
RENAME COLUMN old_column_name TO new_column_name;

-- Change column values
UPDATE table_name
SET col_name=LOWER(col_name);

-- Add column
ALTER TABLE table_name
ADD COLUMN new_col INTEGER;

-- DELETE column
ALTER TABLE table_name
DROP COLUMN col_name;
```

# Chapter 4

### Date and Time

```
-- DATE operations (Advised : Use TIMESTAMP FORMAT instead of DATE FORMAT to acquire preciseness)
SELECT date '2005-09-11' - date '2005-09-10'; -- Result will be an integer as no of days (1)
SELECT date '2005-09-11' + integer '3'; -- Result will be a date (2005-09-14)
SELECT '2018-12-10'::DATE + '1 year 2 days 3 minutes'::INTERVAL ; -- Result will be a date

-- TIMESTAMP operations
SELECT date '2005-09-11 00:00:00' - date '2005-09-09 12:00:00'; -- Result will be an interval (1 day 12:00:00)
SELECT AGE(timestamp '2005-09-11 00:00:00', timestamp '2005-09-09 12:00:00'); -- Result will be an interval (1 day 12:00:00)
SELECT timestamp '2019-05-01' + 21 * INTERVAL '1 day';  -- Result will be a new timestamp (2019-05-22 00:00:00)
SELECT rental_date + INTERVAL '3 days' AS expected_return_date -- adding timestamp with interval = new timestamp
FROM rental;

-- Current Timestamp
SELECT NOW()::timestamp;    -- casting will cut the timezone information
SELECT CAST(NOW() as timestamp);
SELECT CURRENT_TIMESTAMP(2); -- control precision
SELECT CURRENT_DATE; -- current date
SELECT CURRENT_TIME; -- current time with timezone information

-- Extract date and time information
SELECT EXTRACT (month FROM timestamp '2005-01-24 05:12:00') AS month; -- Result will be month (1)
SELECT DATE_PART('month', timestamp '2005-01-24 05:12:00') AS month; -- Result will be month (1)
SELECT DATE_TRUNC('year', TIMESTAMP '2005-05-21 15:30:30'); -- Result will truncate to specified precision (2005-01-01 00:00:00)

-- Time Series generation
SELECT GENERATE_SERIES('2018-01-01', '2019-01-01', '1 month'::INTERVAL) - '1 day'::INTERVAL;

-- Lead and lag operations (go back or go forward in time)
SELECT date_col, lead(date_col) OVER (ORDER BY date_col) FROM table_name;
SELECT date_col, lag(date_col) OVER (ORDER BY date_col) FROM table_name;

-- String formatting of date
SELECT TO_CHAR(my_date_column, 'Month DD, YYYY') AS custom_date_format FROM my_table;
-- Reverse this process
SELECT TO_DATE('February 14, 2024', 'Month DD, YYYY') AS original_date FROM converted_table;
```

### Custom Formatting

```
-- String formatting of date
SELECT TO_CHAR(my_date_column, 'Month DD, YYYY') AS custom_date_format FROM my_table;
-- Reverse this process
SELECT TO_DATE('February 14, 2024', 'Month DD, YYYY') AS original_date FROM converted_table;
-- custom number formatting
SELECT TO_CHAR(my_numeric_column, '$999,999.99') AS formatted_number FROM my_table;
-- Reverse this process
SELECT TO_NUMBER(REPLACE(formatted_number, '$', ''), '999999.99') AS original_numeric_value FROM converted_table;
```