In [None]:
SELECT COUNT(DISTINCT field)
FROM table
WHERE title = 'Metro'
    AND year = 2015
    AND birthdate IS NOT NULL # NULL
    AND name LIKE 'B%'; # NOT LIKE
LIMIT 10;

In [None]:
# BETWEEN _ AND _
SELECT title
FROM films
WHERE release_year
BETWEEN 1994 AND 2000;

In [None]:
# WHERE _ IN ()
SELECT name
FROM kids
WHERE age IN (2, 4, 6, 8, 10);

In [None]:
# aggregate functions
SELECT AVG(budget),
    MAX(budget),
    SUM(budget),
    MIN(budget)
FROM films;

In [None]:
# arithmetic
SELECT (4 * 3);
# 12

# note division of integers, returns same type of integers
SELECT (4 / 3);
# 1

SELECT (4.0 / 3.0) AS result; # float type for division else rounded int
# 1.333

In [None]:
# comment
-- some text
/* some lines of text */

In [None]:
# ORDER BY with DESC example
SELECT title
FROM films
ORDER BY release_year, title DESC;

In [None]:
# GROUP BY
SELECT sex, COUNT(*)
FROM employees
GROUP BY sex
ORDER BY count DESC;

In [None]:
# HAVING - option for aggregate condition since WHERE clause can't do
# shows only those years in which more than 10 films were released
SELECT release_year
FROM films
GROUP BY release_year
HAVING COUNT(title) > 10;

In [None]:
# Ex - ORDER BY, GROUP BY, HAVING
SELECT release_year, AVG(budget) AS avg_budget, AVG(gross) AS avg_gross
FROM films
WHERE release_year > 1990
GROUP BY release_year
HAVING AVG(budget) > 60000000
ORDER BY (avg_gross) DESC;

In [None]:
# JOIN
SELECT 	name
FROM hit_tracks AS t
INNER JOIN features AS f
ON 
t.id = f.song_id
 AND 
t.dance = f.dance_level
ORDER BY name
LIMIT 5;

# JOIN via USING(same_col_name)
-- Select fields
SELECT c.name AS country, c.continent, l.name AS language, l.official
  -- From countries (alias as c)
  FROM countries AS c
  -- Join to languages (as l)
  INNER JOIN languages as l
    -- Match using code
    USING (code);

In [None]:
# INTO - create a table in the query
SELECT country_code, size,
    CASE WHEN size > 50000000 THEN 'large'
        WHEN size > 1000000 THEN 'medium'
        ELSE 'small' END
        AS popsize_group
-- Into table
INTO pop_plus
FROM populations
WHERE year = 2015;

# display results of new table
-- Select all columns of pop_plus
SELECT *
FROM pop_plus;

In [None]:
# CASE WHEN THEN ELSE END AS - create categorical variables
SELECT 
    CASE WHEN hometeam_id = 10189 THEN 'FC Schalke 04'
         WHEN hometeam_id = 9823 THEN 'FC Bayern Munich'
         ELSE 'Other' END AS home_team,
    COUNT(id) AS total_matches
FROM matches_germany
-- Group by the CASE statement alias
GROUP BY home_team;

In [None]:
# CASE WHEN - comparing column values
SELECT 
    m.date,
    t.team_long_name AS opponent,
    -- Complete the CASE statement with an alias
    CASE WHEN m.home_goal > m.away_goal THEN 'Barcelona win!'
        WHEN m.home_goal < m.away_goal THEN 'Barcelona loss :('
        ELSE 'Tie' END AS outcome 
FROM matches_spain AS m
LEFT JOIN teams_spain AS t 
ON m.awayteam_id = t.team_api_id
-- Filter for Barcelona as the home team
WHERE m.hometeam_id = 8634; 


SELECT date, hometeam_id, awayteam_id,
    CASE WHEN hometeam_id = 8455 AND home_goal > away_goal
            THEN 'Chelsea home win!'
         WHEN awayteam_id = 8455 AND home_goal < away_goal
            THEN 'Chelsea away win!'
         ELSE 'Loss or tie :(' END AS outcome
FROM match
# use CASE and END IS NOT NULL
WHERE CASE WHEN hometeam_id = 8455 AND home_goal > away_goal
            THEN 'Chelsea home win!'
         WHEN awayteam_id = 8455 AND home_goal < away_goal
            THEN 'Chelsea away win!'
         ELSE 'Loss or tie :(' END IS NOT NULL;

In [None]:
# CASE WHEN with aggregate fxn
# note ELSE is assumed NULL
SELECT
    season,
    SUM(CASE WHEN hometeam_id = 8650
               THEN home_goal END) AS home_goals,
    SUM(CASE WHEN awayteam_id = 8650
               THEN away_goal END) AS away_goals
FROM match
GROUP BY season;

# example - sum logical values, need to convert to bool
SELECT 
    c.name AS country,
    -- Sum the total records in each season where the home team won
    SUM(CASE WHEN m.season = '2012/2013' AND m.home_goal > m.away_goal 
        THEN 1 ELSE 0 END) AS matches_2012_2013,
     SUM(CASE WHEN m.season = '2013/2014' AND m.home_goal > m.away_goal 
        THEN 1 ELSE 0 END) AS matches_2013_2014,
    SUM(CASE WHEN m.season = '2014/2015' AND m.home_goal > m.away_goal THEN 1 ELSE 0 END) AS matches_2014_2015
FROM country AS c
LEFT JOIN match AS m
ON c.id = m.country_id
-- Group by country name alias
GROUP BY country;

In [None]:
# ROUND(..., 2) AS alias
SELECT
    season,
    ROUND(AVG(CASE WHEN hometeam_id = 8650
               THEN home_goal END),2) AS home_goals,
    ROUND(AVG(CASE WHEN awayteam_id = 8650
               THEN away_goal END),2) AS away_goals
FROM match
GROUP BY season;

In [None]:
# Calculate percentages with CASE and AVG using boolean values
SELECT
    season,
    AVG(CASE WHEN hometeam_id = 8455 AND home_goal > away_goal THEN 1
             WHEN hometeam_id = 8455 AND home_goal < away_goal THEN 0
             END) AS pct_homewins,
    AVG(CASE WHEN awayteam_id = 8455 AND away_goal > home_goal THEN 1
             WHEN awayteam_id = 8455 AND away_goal < home_goal THEN 0
             END) AS pct_awaywins,
FROM match
GROUP BY season;

In [None]:
# subqueries
# *** remember to match filter for each subquery and query on WHERE
SELECT
    team_long_name,
    team_short_name AS abbr
FROM team
WHERE
    team_api_id IN
    (SELECT hometeam_id
     FROM match
     WHERE country_id = 15722);
    
# in FROM - transform data
SELECT team, home_avg
FROM (SELECT
         t.team_long_name AS team,
         AVG(m.home_goal) AS home_avg
      FROM match AS m
      LEFT JOIN team AS t
      ON m.hometeam_id = t.team_api_id
      WHERE season = '2011/2012'
      GROUP BY team) AS subquery
ORDER BY home_avg DESC
LIMIT 3;

# in SELECT - single value or aggregate
SELECT
    date,
    (home_goal + away_goal) AS goals,
    (home_goal + away_goal) - 
        (SELECT AVG(home_goal + away_goal)
         FROM match
         WHERE season = '2011/2012') AS diff
FROM match
WHERE season = '2011/2012';

# DON"T FORGET! match the filter in WHERE
# subquery in FROM and WHERE
SELECT 
    -- Select the stage and average goals from the subquery
    s.stage,
    ROUND(s.avg_goals,2) AS avg_goals
FROM 
    -- Select the stage and average goals in 2012/2013
    (SELECT
         stage,
         AVG(home_goal + away_goal) AS avg_goals
    FROM match
    WHERE season = '2012/2013'
    GROUP BY stage) AS s
WHERE 
    -- Filter the main query using the subquery
    s.avg_goals > (SELECT AVG(home_goal + away_goal) 
                    FROM match WHERE season = '2012/2013');

In [None]:
# correlated subquery - uses values from outer query to generate result
SELECT
    c.name AS country
    (SELECT
        AVG(home_goal + away_goal)
     FROM match AS m
     # correlated part
     WHERE m.country_id = c.id)
        AS avg_goals
FROM country AS c
GROUP BY country;

SELECT 
	-- Select country ID, date, home, and away goals from match
	main.country_id,
    main.date,
    main.home_goal,
    main.away_goal
FROM match AS main
WHERE 
	-- Filter for matches with the highest number of goals scored
    -- total goals = max goals
	(home_goal + away_goal) = 
        (SELECT MAX(sub.home_goal + sub.away_goal)
         FROM match AS sub
         WHERE main.country_id = sub.country_id
               AND main.season = sub.season);

In [None]:
# nested subquery - can be correlated or uncorrelated or both
SELECT
    EXTRACT(MONTH FROM date) AS MONTH
    SUM(m.home_goal + m.away_goal) AS total_goals,
    SUM(m.home_goal + m.away_goal) -
    (SELECT AVG(goals)
     FROM (SELECT
              EXTRACT(MONTH FROM date) AS month,
           FROM match
           GROUP BY month) AS s) AS diff
FROM match AS m
GROUP BY month;

In [None]:
# CTE = Common Table Expressions
'''
WITH cte_name AS(
    SELECT ...
    FROM ...
    WHERE ...
),
-- New subquery
cte_name2()

QUERY...
SELECT
FROM
INNER JOIN cte_name
ON 
INNER JOIN cte_name2
ON 
GROUP BY
'''

-- Set up your CTE
WITH match_list AS (
  -- Select the league, date, home, and away goals
    SELECT 
        l.name AS league, 
        m.date, 
        m.home_goal, 
        m.away_goal,
       (m.home_goal + m.away_goal) AS total_goals
    FROM match AS m
    LEFT JOIN league as l ON m.country_id = l.id)
-- Select the league, date, home, and away goals from the CTE
SELECT league, date, home_goal, away_goal
FROM match_list
-- Filter by total goals
WHERE total_goals >= 10;

In [None]:
# window functions - OVER clause (think like create an AVG column)
SELECT 
    -- Select the id, country name, season, home, and away goals
    m.id, 
    c.name AS country, 
    m.season,
    m.home_goal,
    m.away_goal,
    -- Use a window to include the aggregate average in each row
    AVG(m.home_goal + m.away_goal) OVER() AS overall_avg
FROM match AS m
LEFT JOIN country AS c ON m.country_id = c.id;

In [None]:
# RANK - default ascending (smallest to largest value)
# DESC rank
SELECT
    date
    (home_goal + away_goal) AS goals,
    RANK() OVER(ORDER BY home_goal + away_goal DESC) AS goals_rank
FROM match
WHERE season = '2011/2012';

In [None]:
# window partitions - separate values for different categories
# ie. avg goals by season and by country
SELECT
    date,
    season,
    home_goal,
    away_goal,
    CASE WHEN hometeam_id = 8673 THEN 'home' 
        ELSE 'away' END AS warsaw_location,
    -- Calculate the average goals scored partitioned by season
    AVG(home_goal) OVER(PARTITION BY season) AS season_homeavg,
    AVG(away_goal) OVER(PARTITION BY season) AS season_awayavg
FROM match
-- Filter the data set for Legia Warszawa matches only
WHERE 
    hometeam_id = 8673 
    OR awayteam_id = 8673
ORDER BY (home_goal + away_goal) DESC;

# partition by multiple columns
# also EXTRACT(MONTH FROM date)

In [None]:
# sliding windows - think running totals
'''
Syntax
- some calculation like
    - `SUM(data) OVER(ORDER BY ROWS...) AS alias
- `ROWS BETWEEN <start> AND <finish> # slice of rows
- keywords for start and finish parameters
    - `PRECEDING` # rows before current row
        - ie. `1 PRECEDING` # 1 row before
    - `FOLLOWING` # rows after current row
    - `UNBOUNDED PRECEDING` # every row since the beginning
    - `UNBOUNDED FOLLOWING`
    - `CURRENT ROW` # stop at current row
'''
'''
Complete the window function by:
Assessing the running total of home goals scored by FC Utrecht.
Assessing the running average of home goals scored.
Ordering both the running average and running total by date.
'''
SELECT 
    date,
    home_goal,
    away_goal,
    -- Create a running total and running average of home goals
    SUM(home_goal) OVER(ORDER BY date 
         ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS running_total,
    AVG(home_goal) OVER(ORDER BY date 
         ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS running_avg
FROM match
WHERE 
    hometeam_id = 9908 
    AND season = '2011/2012';