## Window Functions

- about window functions and how to pass aggregate functions along a dataset. 
- how to calculate running totals and partitioned averages.

### The match is OVER
- The `OVER()` clause allows to pass an aggregate function down a data set, similar to subqueries in `SELECT`. 
- The `OVER()` clause offers significant benefits over subqueries in select -- namely, queries will run faster, and the `OVER()` clause has a wide range of additional functions and clauses you can include with it 

In [1]:
import pandas as pd
import sqlite3
%reload_ext sql
%sql sqlite:///database.sqlite

con=sqlite3.connect("database.sqlite")
mycur = con.cursor()
mycur.execute("SELECT name FROM sqlite_master WHERE type='table' ORDER BY name;")
available_table=(mycur.fetchall())
con.close() 
available_table


[('Country',),
 ('League',),
 ('Match',),
 ('Player',),
 ('Player_Attributes',),
 ('Team',),
 ('Team_Attributes',),
 ('sqlite_sequence',)]

In [2]:
%%sql
SELECT 
    -- Select the id, country name, season, home, and away goals
    m.id, 
       c.name AS country, 
       m.season,
    m.home_goal,
    m.away_goal,
       -- Use a window to include the aggregate average in each row
    AVG(m.home_goal + m.away_goal) OVER() AS overall_avg
FROM match AS m
LEFT JOIN country AS c ON m.country_id = c.id
LIMIT 10;

 * sqlite:///database.sqlite
Done.


id,country,season,home_goal,away_goal,overall_avg
1,Belgium,2008/2009,1,1,2.705531390738673
2,Belgium,2008/2009,0,0,2.705531390738673
3,Belgium,2008/2009,0,3,2.705531390738673
4,Belgium,2008/2009,5,0,2.705531390738673
5,Belgium,2008/2009,1,3,2.705531390738673
6,Belgium,2008/2009,1,1,2.705531390738673
7,Belgium,2008/2009,2,2,2.705531390738673
8,Belgium,2008/2009,1,2,2.705531390738673
9,Belgium,2008/2009,1,0,2.705531390738673
10,Belgium,2008/2009,4,1,2.705531390738673


### `RANK()`

- Window functions allow you to create a `RANK` of information according to any variable you want to use to sort your data. 

    - need to specify what column/calculation you want to use to calculate your rank.
    - including an `ORDER BY` clause inside the `OVER()` clause. 

In [3]:
%%sql
SELECT 
    -- Select the league name and average goals scored
    l.name AS league,
    AVG(m.home_goal + m.away_goal) AS avg_goals,
    -- Rank each league according to the average goals
    RANK() OVER(ORDER BY AVG(m.home_goal + m.away_goal)) AS league_rank
FROM league AS l
LEFT JOIN match AS m 
ON l.id = m.country_id
WHERE m.season = '2011/2012'
GROUP BY l.name
-- Order the query by the rank you created
ORDER BY league_rank;

 * sqlite:///database.sqlite
Done.


league,avg_goals,league_rank
Poland Ekstraklasa,2.1958333333333333,1
France Ligue 1,2.5157894736842104,2
Italy Serie A,2.583798882681564,3
Switzerland Super League,2.623456790123457,4
Scotland Premier League,2.6359649122807016,5
Portugal Liga ZON Sagres,2.6416666666666666,6
Spain LIGA BBVA,2.763157894736842,7
England Premier League,2.805263157894737,8
Germany 1. Bundesliga,2.8594771241830066,9
Belgium Jupiler League,2.879166666666667,10


In [4]:
%%sql
SELECT 
    -- Select the league name and average goals scored
    l.name AS league,
    AVG(m.home_goal+ m.away_goal) AS avg_goals,
    -- Rank leagues in descending order by average goals
    RANK() OVER(ORDER BY avg(m.home_goal + m.away_goal) DESC) AS league_rank
FROM league AS l
LEFT JOIN match AS m 
ON l.id = m.country_id
WHERE m.season = '2011/2012'
GROUP BY l.name
-- Order the query by the rank you created
ORDER BY league_rank;

 * sqlite:///database.sqlite
Done.


league,avg_goals,league_rank
Netherlands Eredivisie,3.258169934640523,1
Belgium Jupiler League,2.879166666666667,2
Germany 1. Bundesliga,2.8594771241830066,3
England Premier League,2.805263157894737,4
Spain LIGA BBVA,2.763157894736842,5
Portugal Liga ZON Sagres,2.6416666666666666,6
Scotland Premier League,2.6359649122807016,7
Switzerland Super League,2.623456790123457,8
Italy Serie A,2.583798882681564,9
France Ligue 1,2.5157894736842104,10


### OVER with a PARTITION
#### PARTITION with a column


In [5]:
%%sql
SELECT
	date,
	season,
	home_goal,
	away_goal,
	CASE WHEN hometeam_id = 8673 THEN 'home' 
		 ELSE 'away' END AS warsaw_location,
    -- Calculate the average goals scored partitioned by season
    AVG(home_goal) OVER(PARTITION BY season) AS season_homeavg,
    AVG(away_goal) OVER(PARTITION BY season) AS season_awayavg
FROM match
-- Filter the data set for Legia Warszawa matches only
WHERE 
	hometeam_id = 8673 
    OR awayteam_id = 8673
ORDER BY (home_goal + away_goal) DESC
LIMIT 10;

 * sqlite:///database.sqlite
Done.


date,season,home_goal,away_goal,warsaw_location,season_homeavg,season_awayavg
2013-09-14 00:00:00,2013/2014,3,5,away,1.7666666666666666,1.2333333333333334
2009-10-24 00:00:00,2009/2010,5,2,home,1.2333333333333334,0.7
2011-05-25 00:00:00,2010/2011,2,5,away,1.6333333333333333,1.1333333333333333
2014-09-13 00:00:00,2014/2015,4,3,home,1.5666666666666669,1.3333333333333333
2011-02-25 00:00:00,2010/2011,3,3,away,1.6333333333333333,1.1333333333333333
2013-07-20 00:00:00,2013/2014,5,1,home,1.7666666666666666,1.2333333333333334
2008-11-28 00:00:00,2008/2009,2,3,away,1.6,0.7
2009-05-30 00:00:00,2008/2009,4,1,home,1.6,0.7
2010-03-26 00:00:00,2009/2010,2,3,away,1.2333333333333334,0.7
2010-10-22 00:00:00,2010/2011,1,4,away,1.6333333333333333,1.1333333333333333


#### PARTITION with multi columns

In [6]:
%%sql

SELECT 
    date,
    season,
    home_goal,
    away_goal,
    CASE WHEN hometeam_id = 8673 THEN 'home' 
         ELSE 'away' END AS warsaw_location,
    -- Calculate average goals partitioned by season and month
    avg(home_goal) OVER(PARTITION BY season, 
            STRFTIME('%month', date)) AS season_mo_home,
    avg(away_goal) OVER(PARTITION BY season,  
            STRFTIME('%month', date)) AS season_mo_away
FROM Match
WHERE 
    hometeam_id = 8673 
    OR awayteam_id = 8673
ORDER BY (home_goal + away_goal) DESC
LIMIT 10;

 * sqlite:///database.sqlite
Done.


date,season,home_goal,away_goal,warsaw_location,season_mo_home,season_mo_away
2013-09-14 00:00:00,2013/2014,3,5,away,2.25,2.5
2009-10-24 00:00:00,2009/2010,5,2,home,2.5,0.75
2011-05-25 00:00:00,2010/2011,2,5,away,2.0,1.1666666666666667
2014-09-13 00:00:00,2014/2015,4,3,home,2.0,2.6666666666666665
2011-02-25 00:00:00,2010/2011,3,3,away,3.0,3.0
2013-07-20 00:00:00,2013/2014,5,1,home,2.5,2.0
2009-05-30 00:00:00,2008/2009,4,1,home,2.0,0.6
2008-11-28 00:00:00,2008/2009,2,3,away,1.8333333333333333,0.6666666666666666
2010-03-26 00:00:00,2009/2010,2,3,away,1.0,1.25
2011-04-02 00:00:00,2010/2011,2,3,home,1.6,1.2


### Sliding Windows
#### Slide to the left

In [7]:
%%sql
SELECT 
    date,
    home_goal,
    away_goal,
    -- Create a running total and running average of home goals
    sum(home_goal) OVER(ORDER BY date 
         ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS running_total,
    avg(home_goal) OVER(ORDER BY date 
         ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS running_avg
FROM match
WHERE 
    hometeam_id = 9908 
    AND season = '2011/2012';

 * sqlite:///database.sqlite
Done.


date,home_goal,away_goal,running_total,running_avg
2011-08-14 00:00:00,2,2,2,2.0
2011-08-27 00:00:00,3,1,5,2.5
2011-09-18 00:00:00,2,2,7,2.333333333333333
2011-10-01 00:00:00,3,0,10,2.5
2011-10-22 00:00:00,1,4,11,2.2
2011-11-06 00:00:00,6,4,17,2.833333333333333
2011-12-04 00:00:00,2,6,19,2.7142857142857144
2011-12-11 00:00:00,2,2,21,2.625
2012-01-22 00:00:00,1,1,22,2.4444444444444446
2012-02-12 00:00:00,1,1,23,2.3


#### Slide to the right

In [8]:
%%sql
SELECT 
    -- Select the date, home goal, and away goals
    date,
    home_goal,
    away_goal,
    -- Create a running total and running average of home goals
    sum(home_goal) OVER(ORDER BY date DESC
         ROWS BETWEEN  CURRENT ROW AND UNBOUNDED FOLLOWING) AS running_total,
    avg(home_goal) OVER(ORDER BY date DESC
         ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) AS running_avg
FROM match
WHERE 
    awayteam_id = 9908 
    AND season = '2011/2012';

 * sqlite:///database.sqlite
Done.


date,home_goal,away_goal,running_total,running_avg
2012-05-06 00:00:00,1,3,25,1.4705882352941178
2012-04-21 00:00:00,0,2,24,1.5
2012-04-12 00:00:00,3,0,24,1.6
2012-03-25 00:00:00,3,1,21,1.5
2012-03-11 00:00:00,1,1,18,1.3846153846153846
2012-02-26 00:00:00,1,0,17,1.4166666666666667
2012-02-05 00:00:00,0,2,16,1.4545454545454546
2012-01-28 00:00:00,2,0,16,1.6
2011-12-17 00:00:00,1,0,14,1.5555555555555556
2011-11-25 00:00:00,2,0,13,1.625


#### Combine all to meet the needs 
- use CASE statements, 
- subqueries, 
- common table expressions, 
- and window functions in queries 
    > **to structure a data set that best meets needs.**

In [9]:
%%sql

SELECT 
    m.id, 
       t.team_long_name,
       -- Identify matches as home/away wins or ties
    CASE WHEN m.home_goal > m.away_goal THEN 'MU Win'
         WHEN m.home_goal < m.away_goal THEN 'MU Loss'
         ELSE 'Tie' END AS outcome
FROM match AS m
-- Left join team on the home team ID and team API id
LEFT JOIN team AS t 
ON m.hometeam_id = t.team_api_id
WHERE 
    -- Filter for 2014/2015 and Manchester United as the home team
    season = '2014/2015'
    AND t.team_long_name = 'Manchester United';

 * sqlite:///database.sqlite
Done.


id,team_long_name,outcome
4013,Manchester United,MU Loss
4031,Manchester United,MU Win
4051,Manchester United,MU Win
4062,Manchester United,MU Win
4085,Manchester United,MU Win
4105,Manchester United,MU Win
4145,Manchester United,MU Loss
4164,Manchester United,MU Win
4181,Manchester United,MU Win
4203,Manchester United,MU Win


In [10]:
%%sql
-- Set up the home team CTE
with home as (
  SELECT m.id, t.team_long_name,
      CASE WHEN m.home_goal > m.away_goal THEN 'MU Win'
           WHEN m.home_goal < m.away_goal THEN 'MU Loss' 
           ELSE 'Tie' END AS outcome
  FROM match AS m
  LEFT JOIN team AS t ON m.hometeam_id = t.team_api_id),
-- Set up the away team CTE
away as (
  SELECT m.id, t.team_long_name,
      CASE WHEN m.home_goal > m.away_goal THEN 'MU Win'
           WHEN m.home_goal < m.away_goal THEN 'MU Loss' 
           ELSE 'Tie' END AS outcome
  FROM match AS m
  LEFT JOIN team AS t ON m.awayteam_id = t.team_api_id)
-- Select team names, the date and goals
SELECT DISTINCT
    m.date,
    home.team_long_name AS home_team,
    away.team_long_name AS away_team,
    m.home_goal,
    m.away_goal
-- Join the CTEs onto the match table
FROM match AS m
LEFT JOIN home ON m.id = home.id
LEFT JOIN away ON m.id = away.id
WHERE m.season = '2014/2015'
      AND (home.team_long_name = 'Manchester United' 
           OR away.team_long_name = 'Manchester United');

 * sqlite:///database.sqlite
Done.


date,home_team,away_team,home_goal,away_goal
2014-08-16 00:00:00,Manchester United,Swansea City,1,2
2014-11-02 00:00:00,Manchester City,Manchester United,1,0
2014-11-08 00:00:00,Manchester United,Crystal Palace,1,0
2014-11-22 00:00:00,Arsenal,Manchester United,1,2
2014-11-29 00:00:00,Manchester United,Hull City,3,0
2014-12-02 00:00:00,Manchester United,Stoke City,2,1
2014-12-08 00:00:00,Southampton,Manchester United,1,2
2014-12-14 00:00:00,Manchester United,Liverpool,3,0
2014-12-20 00:00:00,Aston Villa,Manchester United,1,1
2014-12-26 00:00:00,Manchester United,Newcastle United,3,1


In [11]:
%%sql
-- Set up the home team CTE
with home as (
  SELECT m.id, t.team_long_name,
      CASE WHEN m.home_goal > m.away_goal THEN 'MU Win'
           WHEN m.home_goal < m.away_goal THEN 'MU Loss' 
           ELSE 'Tie' END AS outcome
  FROM match AS m
  LEFT JOIN team AS t ON m.hometeam_id = t.team_api_id),
-- Set up the away team CTE
away as (
  SELECT m.id, t.team_long_name,
      CASE WHEN m.home_goal > m.away_goal THEN 'MU Loss'
           WHEN m.home_goal < m.away_goal THEN 'MU Win' 
           ELSE 'Tie' END AS outcome
  FROM match AS m
  LEFT JOIN team AS t ON m.awayteam_id = t.team_api_id)
-- Select columns and and rank the matches by date
SELECT DISTINCT
    m.date,
    home.team_long_name AS home_team,
    away.team_long_name AS away_team,
    m.home_goal, m.away_goal,
    RANK() OVER(ORDER BY ABS(home_goal - away_goal) DESC) as match_rank
-- Join the CTEs onto the match table
FROM match AS m
LEFT JOIN away ON m.id = away.id
LEFT JOIN home ON m.id = home.id
WHERE m.season = '2014/2015'
      AND ((home.team_long_name = 'Manchester United' AND home.outcome = 'MU Loss')
      OR (away.team_long_name = 'Manchester United' AND away.outcome = 'MU Loss'));

 * sqlite:///database.sqlite
Done.


date,home_team,away_team,home_goal,away_goal,match_rank
2015-04-26 00:00:00,Everton,Manchester United,3,0,1
2014-09-21 00:00:00,Leicester City,Manchester United,5,3,2
2014-08-16 00:00:00,Manchester United,Swansea City,1,2,3
2014-11-02 00:00:00,Manchester City,Manchester United,1,0,3
2015-01-11 00:00:00,Manchester United,Southampton,0,1,3
2015-02-21 00:00:00,Swansea City,Manchester United,2,1,3
2015-04-18 00:00:00,Chelsea,Manchester United,1,0,3
2015-05-02 00:00:00,Manchester United,West Bromwich Albion,0,1,3
