In [1]:
import pandas as pd
import sqlalchemy as sa
import psycopg2 as ps
from sqlalchemy import create_engine

In [2]:
%load_ext sql
%sql postgresql://postgres:lingga28@localhost:2828/datacamp
conn = create_engine('postgresql://postgres:lingga28@localhost/datacamp')

# 1. Running totals of athlete medals
### Exercises
The running total (or cumulative sum) of a column helps you determine what each row's contribution is to the total sum.

### Instruction
Return the athletes, the number of medals they earned, and the medals running total, ordered by the athletes' names in alphabetical order.

In [5]:
%%sql

WITH Athlete_Medals AS (
  SELECT
    Athlete, COUNT(*) AS Medals
  FROM Summer_Medals
  WHERE
    Country = 'USA' AND Medal = 'Gold'
    AND Year >= 2000
  GROUP BY Athlete)

SELECT
  -- Calculate the running total of athlete medals
  athlete,
  medals,
  SUM(medals) OVER (ORDER BY Athlete_Medals ASC) AS Max_Medals
FROM Athlete_Medals
ORDER BY Athlete ASC
LIMIT 10; --just an addition, so that the table is not elongated

 * postgresql://postgres:***@localhost:2828/datacamp
10 rows affected.


athlete,medals,max_medals
ABDUR-RAHIM Shareef,1,1
ABERNATHY Brent,1,2
ADRIAN Nathan,3,5
AHRENS Chris,1,6
AINSWORTH Kurt,1,7
ALLEN Ray,1,8
ALLEN Wyatt,1,9
AMBROSI Christie,1,10
AMICO Leah,1,11
ANAE Tumua,1,12


# 2. Maximum country medals by year
### Exercises
Getting the maximum of a country's earned medals so far helps you determine whether a country has broken its medals record by comparing the current year's earned medals and the maximum so far.

### Instruction
Return the year, country, medals, and the maximum medals earned so far for each country, ordered by year in ascending order.

In [6]:
%%sql

WITH Country_Medals AS (
  SELECT
    Year, Country, COUNT(*) AS Medals
  FROM Summer_Medals
  WHERE
    Country IN ('CHN', 'KOR', 'JPN')
    AND Medal = 'Gold' AND Year >= 2000
  GROUP BY Year, Country)

SELECT
  -- Return the max medals earned so far per country
  Year,
  country,
  medals,
  MAX(medals) OVER (PARTITION BY country
                ORDER BY year ASC) AS Max_Medals
FROM Country_Medals
ORDER BY Country ASC, Year ASC;

 * postgresql://postgres:***@localhost:2828/datacamp
12 rows affected.


year,country,medals,max_medals
2000,CHN,39,39
2004,CHN,52,52
2008,CHN,74,74
2012,CHN,56,74
2000,JPN,5,5
2004,JPN,21,21
2008,JPN,23,23
2012,JPN,7,23
2000,KOR,12,12
2004,KOR,14,14


# 3. Minimum country medals by year
So far, you've seen MAX and SUM, aggregate functions normally used with GROUP BY, being used as window functions. You can also use the other aggregate functions, like MIN, as window functions.

### Instruction
Return the year, medals earned, and minimum medals earned so far.

In [7]:
%%sql

WITH France_Medals AS (
  SELECT
    Year, COUNT(*) AS Medals
  FROM Summer_Medals
  WHERE
    Country = 'FRA'
    AND Medal = 'Gold' AND Year >= 2000
  GROUP BY Year)

SELECT
  Year,
  Medals,
  MIN(Medals) OVER (ORDER BY year ASC) AS Min_Medals
FROM France_Medals
ORDER BY Year ASC;

 * postgresql://postgres:***@localhost:2828/datacamp
4 rows affected.


year,medals,min_medals
2000,22,22
2004,21,21
2008,25,21
2012,30,21


# 4. Number of rows in a frame
How many rows does the following frame span?

ROWS BETWEEN 3 PRECEDING AND 2 FOLLOWING

### Possible Answers:
- A. 5
- B. 6
- C. 4

Answer: B

# 5. Moving maximum of Scandinavian athletes' medals
### Exercises
Frames allow you to restrict the rows passed as input to your window function to a sliding window for you to define the start and finish.

Adding a frame to your window function allows you to calculate "moving" metrics, inputs of which slide from row to row.

### Instruction
Return the year, medals earned, and the maximum medals earned, comparing only the current year and the next year.

In [9]:
%%sql

WITH Scandinavian_Medals AS (
  SELECT
    Year, COUNT(*) AS Medals
  FROM Summer_Medals
  WHERE
    Country IN ('DEN', 'NOR', 'FIN', 'SWE', 'ISL')
    AND Medal = 'Gold'
  GROUP BY Year)

SELECT
  -- Select each year's medals
  year,
  Medals,
  -- Get the max of the current and next years'  medals
  MAX(medals) OVER (ORDER BY year ASC
             ROWS BETWEEN CURRENT ROW
             AND 1 FOLLOWING) AS Max_Medals
FROM Scandinavian_Medals
ORDER BY Year ASC;

 * postgresql://postgres:***@localhost:2828/datacamp
26 rows affected.


year,medals,max_medals
1896,1,1
1900,1,77
1908,77,141
1912,141,159
1920,159,159
1924,48,48
1928,24,24
1932,17,17
1936,15,54
1948,54,54


# 6. Moving maximum of Chinese athletes' medals
### Exercises
Frames allow you to "peek" forwards or backward without first using the relative fetching functions, LAG and LEAD, to fetch previous rows' values into the current row.

### Instruction
Return the athletes, medals earned, and the maximum medals earned, comparing only the last two and current athletes, ordering by athletes' names in alphabetical order.

In [10]:
%%sql

WITH Chinese_Medals AS (
  SELECT
    Athlete, COUNT(*) AS Medals
  FROM Summer_Medals
  WHERE
    Country = 'CHN' AND Medal = 'Gold'
    AND Year >= 2000
  GROUP BY Athlete)

SELECT
  -- Select the athletes and the medals they've earned
  athlete,
  medals,
  -- Get the max of the last two and current rows' medals 
  MAX(medals) OVER (ORDER BY athlete ASC
            ROWS BETWEEN 2 PRECEDING
            AND CURRENT ROW) AS Max_Medals
FROM Chinese_Medals
ORDER BY Athlete ASC
LIMIT 10; --just an addition, so that the table is not elongated

 * postgresql://postgres:***@localhost:2828/datacamp
10 rows affected.


athlete,medals,max_medals
CAI Yalin,1,1
CAI Yun,1,1
CAO Lei,1,1
CAO Yuan,1,1
CHEN Ding,1,1
CHEN Jing,1,1
CHEN Qi,1,1
CHEN Ruolin,4,4
CHEN Xiaomin,1,4
CHEN Xiexia,1,4


# 7. Moving average's frame
### Exercises
If you want your moving average to cover the last 3 and current Olympic games, how would you define its frame?

### Answer the question
### Possible Answers:
- A. ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING
- B. ROWS BETWEEN 3 PRECEDING AND 1 PRECEDING
- C. ROWS BETWEEN 3 PRECEDING AND CURRENT ROW

Answers: C

# 8. Moving average of Russian medals
### Exercises
Using frames with aggregate window functions allow you to calculate many common metrics, including moving averages and totals. These metrics track the change in performance over time.

### Instruction
Calculate the 3-year moving average of medals earned.

In [11]:
%%sql

WITH Russian_Medals AS (
  SELECT
    Year, COUNT(*) AS Medals
  FROM Summer_Medals
  WHERE
    Country = 'RUS'
    AND Medal = 'Gold'
    AND Year >= 1980
  GROUP BY Year)

SELECT
  Year, Medals,
  --- Calculate the 3-year moving average of medals earned
  AVG(medals) OVER
    (ORDER BY Year ASC
     ROWS BETWEEN
     2 PRECEDING AND CURRENT ROW) AS Medals_MA
FROM Russian_Medals
ORDER BY Year ASC;

 * postgresql://postgres:***@localhost:2828/datacamp
5 rows affected.


year,medals,medals_ma
1996,36,36.0
2000,66,51.0
2004,47,49.666666666666664
2008,43,52.0
2012,47,45.666666666666664


# 9. Moving total of countries' medals
### Exercises
What if your data is split into multiple groups spread over one or more columns in the table? Even with a defined frame, if you can't somehow separate the groups' data, one group's values will affect the average of another group's values.

### Instruction
Calculate the 3-year moving sum of medals earned per country.

In [12]:
%%sql

WITH Country_Medals AS (
  SELECT
    Year, Country, COUNT(*) AS Medals
  FROM Summer_Medals
  GROUP BY Year, Country)

SELECT
  Year, Country, Medals,
  -- Calculate each country's 3-game moving total
  SUM(Medals) OVER
    (PARTITION BY country
     ORDER BY Year ASC
     ROWS BETWEEN
     2 PRECEDING AND CURRENT ROW) AS Medals_MA
FROM Country_Medals
ORDER BY Country ASC, Year ASC
LIMIT 10; --just an addition, so that the table is not elongated

 * postgresql://postgres:***@localhost:2828/datacamp
10 rows affected.


year,country,medals,medals_ma
2008,AFG,1,1
2012,AFG,1,2
1988,AHO,1,1
1984,ALG,2,2
1992,ALG,2,4
1996,ALG,3,7
2000,ALG,5,10
2008,ALG,2,10
2012,ALG,1,8
1908,ANZ,19,19
