## Using CASE 
learn how to use the CASE WHEN statement to 
- create categorical variables, 
- aggregate data into a single column with multiple filtering conditions, 
- and calculate counts and percentages.

In [1]:
import pandas as pd
import sqlite3
%reload_ext sql
%sql sqlite:///database.sqlite

con=sqlite3.connect("database.sqlite")
mycur = con.cursor()
mycur.execute("SELECT name FROM sqlite_master WHERE type='table' ORDER BY name;")
available_table=(mycur.fetchall())
con.close() 
available_table


[('Country',),
 ('League',),
 ('Match',),
 ('Player',),
 ('Player_Attributes',),
 ('Team',),
 ('Team_Attributes',),
 ('sqlite_sequence',)]

In [2]:
%%sql
SELECT * FROM Country

 * sqlite:///database.sqlite
Done.


id,name
1,Belgium
1729,England
4769,France
7809,Germany
10257,Italy
13274,Netherlands
15722,Poland
17642,Portugal
19694,Scotland
21518,Spain


In [3]:
%%sql
SELECT COUNT(DISTINCT team_long_name) FROM Team


 * sqlite:///database.sqlite
Done.


COUNT(DISTINCT team_long_name)
296


### CASE statements comparing column values

In [10]:
%%sql
SELECT 
    CASE WHEN hometeam_id = 10189 THEN 'FC Schalke 04'
         WHEN hometeam_id = 9823 THEN 'FC Bayern Munich'
         ELSE 'Other' END AS home_team,
    COUNT(id) AS total_matches
FROM Match
WHERE country_id = 7809 -- Germany

GROUP BY home_team;

 * sqlite:///database.sqlite
Done.


home_team,total_matches
FC Bayern Munich,136
FC Schalke 04,136
Other,2176


In [40]:
%%sql
SELECT 
	m.date,
	t.team_long_name AS opponent,
    -- Complete the CASE statement with an alias
	CASE WHEN m.home_goal > m.away_goal THEN 'Barcelona win!'
        WHEN m.home_goal < m.away_goal THEN 'Barcelona loss :(' 
        ELSE 'Tie' END AS outcome 
FROM Match AS m
LEFT JOIN Team AS t 
ON m.awayteam_id = t.team_api_id
-- Filter for Barcelona as the home team
WHERE m.hometeam_id = 8634
LIMIT 10; 

 * sqlite:///database.sqlite
Done.


date,opponent,outcome
2008-11-08 00:00:00,Real Valladolid,Barcelona win!
2008-11-23 00:00:00,Getafe CF,Tie
2008-12-06 00:00:00,Valencia CF,Barcelona win!
2008-12-13 00:00:00,Real Madrid CF,Barcelona win!
2009-01-03 00:00:00,RCD Mallorca,Barcelona win!
2009-01-17 00:00:00,RC Deportivo de La Coruña,Barcelona win!
2008-09-13 00:00:00,Racing Santander,Tie
2009-01-24 00:00:00,CD Numancia,Barcelona win!
2009-02-08 00:00:00,Real Sporting de Gijón,Barcelona win!
2009-02-21 00:00:00,RCD Espanyol,Barcelona loss :(


In [18]:
%%sql

SELECT  
    m.date,
    t.team_long_name AS opponent,
    CASE WHEN m.home_goal < m.away_goal THEN 'Barcelona win!'
        WHEN m.home_goal > m.away_goal THEN 'Barcelona loss :(' 
        ELSE 'Tie' END AS outcome
FROM Match AS m
LEFT JOIN Team AS t 
ON m.hometeam_id = t.team_api_id
WHERE m.awayteam_id = 8634
LIMiT 10

 * sqlite:///database.sqlite
Done.


date,opponent,outcome
2008-08-31 00:00:00,CD Numancia,Barcelona loss :(
2008-11-16 00:00:00,RC Recreativo,Barcelona win!
2008-11-29 00:00:00,Sevilla FC,Barcelona win!
2008-12-21 00:00:00,Villarreal CF,Barcelona win!
2009-01-11 00:00:00,CA Osasuna,Barcelona win!
2009-02-01 00:00:00,Racing Santander,Barcelona win!
2009-02-14 00:00:00,Real Betis Balompié,Tie
2009-03-01 00:00:00,Atlético Madrid,Barcelona loss :(
2009-03-15 00:00:00,UD Almería,Barcelona win!
2009-04-04 00:00:00,Real Valladolid,Barcelona win!


In [20]:
%%sql
SELECT 
    date,
    -- Identify the home team as Barcelona or Real Madrid
    CASE WHEN hometeam_id = 8634 THEN 'FC Barcelona' 
        ELSE 'Real Madrid CF' END AS home,
    -- Identify the away team as Barcelona or Real Madrid
    CASE WHEN awayteam_id = 8634 THEN 'FC Barcelona' 
        ELSE 'Real Madrid CF' END AS away
FROM Match
WHERE (awayteam_id = 8634 OR hometeam_id = 8634)
      AND (awayteam_id = 8633 OR hometeam_id = 8633);

 * sqlite:///database.sqlite
Done.


date,home,away
2008-12-13 00:00:00,FC Barcelona,Real Madrid CF
2009-05-02 00:00:00,Real Madrid CF,FC Barcelona
2009-11-29 00:00:00,FC Barcelona,Real Madrid CF
2010-04-10 00:00:00,Real Madrid CF,FC Barcelona
2010-11-29 00:00:00,FC Barcelona,Real Madrid CF
2011-04-16 00:00:00,Real Madrid CF,FC Barcelona
2011-12-10 00:00:00,Real Madrid CF,FC Barcelona
2012-04-21 00:00:00,FC Barcelona,Real Madrid CF
2013-03-02 00:00:00,Real Madrid CF,FC Barcelona
2012-10-07 00:00:00,FC Barcelona,Real Madrid CF


### Filtering CASE statement

In [23]:
%%sql
SELECT
    team_long_name,
    team_api_id
FROM Team
-- Filter for team name
WHERE team_long_name = 'Bologna';

 * sqlite:///database.sqlite
Done.


team_long_name,team_api_id
Bologna,9857


In [26]:
%%sql
-- Select the season, date, home_goal, and away_goal columns
SELECT 
    season,
    date,
    home_goal,
    away_goal
FROM Match
WHERE 
-- Exclude games not won by Bologna
    CASE WHEN hometeam_id = 9857 AND home_goal > away_goal THEN 'Bologna Win'
        WHEN awayteam_id = 9857 AND away_goal > home_goal THEN 'Bologna Win' 
        END IS NOT NULL
LIMIT 10

 * sqlite:///database.sqlite
Done.


season,date,home_goal,away_goal
2008/2009,2008-08-31 00:00:00,1,2
2008/2009,2008-12-13 00:00:00,5,2
2008/2009,2009-01-18 00:00:00,1,2
2008/2009,2009-01-28 00:00:00,0,1
2008/2009,2009-03-08 00:00:00,3,0
2008/2009,2009-04-26 00:00:00,2,0
2008/2009,2009-05-17 00:00:00,2,1
2008/2009,2009-05-31 00:00:00,3,1
2008/2009,2008-10-19 00:00:00,3,1
2009/2010,2009-10-28 00:00:00,2,1


### COUNT using CASE WHEN

In [27]:
%%sql
SELECT 
	c.name AS country,
    -- Count games from the 2012/2013 season
	COUNT(CASE WHEN m.season = '2012/2013' 
          	   THEN m.id ELSE NULL END) AS matches_2012_2013
FROM country AS c
LEFT JOIN match AS m
ON c.id = m.country_id
-- Group by country name alias
GROUP BY country;

 * sqlite:///database.sqlite
Done.


country,matches_2012_2013
Belgium,240
England,380
France,380
Germany,306
Italy,380
Netherlands,306
Poland,240
Portugal,240
Scotland,228
Spain,380


In [41]:
%%sql
SELECT 
    c.name AS country,
    -- Sum the total records in each season where the home team won
    SUM(CASE WHEN m.season = '2012/2013' AND m.home_goal > m.away_goal 
        THEN 1 ELSE 0 END) AS matches_2012_2013,
    SUM(CASE WHEN m.season = '2013/2014' AND m.home_goal > m.away_goal
        THEN 1 ELSE 0 END) AS matches_2013_2014,
    SUM(CASE WHEN m.season = '2014/2015' AND m.home_goal > m.away_goal
        THEN 1 ELSE 0 END) AS matches_2014_2015
FROM country AS c
LEFT JOIN match AS m
ON c.id = m.country_id
-- Group by country name alias
GROUP BY c.name;

 * sqlite:///database.sqlite
Done.


country,matches_2012_2013,matches_2013_2014,matches_2014_2015
Belgium,102,6,106
England,166,179,172
France,170,168,181
Germany,130,145,145
Italy,177,181,152
Netherlands,137,144,138
Poland,97,110,114
Portugal,103,108,137
Scotland,89,102,102
Spain,189,179,171


In [34]:
%%sql
SELECT 
    c.name AS country,
    -- Count the home wins, away wins, and ties in each country
	COUNT(CASE WHEN m.home_goal > m.away_goal THEN m.id 
        END) AS home_wins,
	COUNT(CASE WHEN m.home_goal < m.away_goal THEN m.id 
        END) AS away_wins,
	COUNT(CASE WHEN m.home_goal = m.away_goal THEN m.id 
        END) AS ties
FROM country AS c
LEFT JOIN Match AS m
ON c.id = m.country_id
GROUP BY country;

 * sqlite:///database.sqlite
Done.


country,home_wins,away_wins,ties
Belgium,810,493,425
England,1390,867,783
France,1359,822,859
Germany,1107,744,597
Italy,1407,814,796
Netherlands,1171,696,581
Poland,870,525,525
Portugal,908,611,533
Scotland,760,617,447
Spain,1485,851,704
