# Part 2.4: Combining Tables in SQL Course

Importing sqlite3 to run queries in python:

In [1]:
import pandas as pd
import sqlite3

Connecting to the database and creating a cursor object:

In [29]:
connection = sqlite3.connect('chinook.db')
cursor = connection.cursor()

## Introduction to Joins

Joining Tables:

In [3]:
query = """

SELECT
    -- All columns
    *
FROM
    -- customer table
    customer
INNER JOIN
    -- join the columns of the invoice table
    invoice
    -- matching rows based on customer_id in the first table and customer_id in the second table
    ON customer.customer_id = invoice.customer_id
LIMIT
    -- Display top 5 rows
    5;

"""

pd.read_sql_query(query,connection)

Unnamed: 0,customer_id,first_name,last_name,company,address,city,state,country,postal_code,phone,...,support_rep_id,invoice_id,customer_id.1,invoice_date,billing_address,billing_city,billing_state,billing_country,billing_postal_code,total
0,18,Michelle,Brooks,,627 Broadway,New York,NY,USA,10012-2612,+1 (212) 221-3546,...,3,1,18,2017-01-03 00:00:00,627 Broadway,New York,NY,USA,10012-2612,15.84
1,30,Edward,Francis,,230 Elgin Street,Ottawa,ON,Canada,K2P 1L7,+1 (613) 234-3322,...,3,2,30,2017-01-03 00:00:00,230 Elgin Street,Ottawa,ON,Canada,K2P 1L7,9.9
2,40,Dominique,Lefebvre,,"8, Rue Hanovre",Paris,,France,75002,+33 01 47 42 71 71,...,4,3,40,2017-01-05 00:00:00,"8, Rue Hanovre",Paris,,France,75002,1.98
3,18,Michelle,Brooks,,627 Broadway,New York,NY,USA,10012-2612,+1 (212) 221-3546,...,3,4,18,2017-01-06 00:00:00,627 Broadway,New York,NY,USA,10012-2612,7.92
4,27,Patrick,Gray,,1033 N Park Ave,Tucson,AZ,USA,85719,+1 (520) 622-4200,...,4,5,27,2017-01-07 00:00:00,1033 N Park Ave,Tucson,AZ,USA,85719,16.83


Selecting Columns:

In [4]:
query = """

SELECT 
    customer.customer_id,first_name,last_name,email,invoice_id,invoice_date,total
FROM 
    customer
INNER JOIN 
    invoice
    ON customer.customer_id = invoice.customer_id;

"""

pd.read_sql_query(query,connection)

Unnamed: 0,customer_id,first_name,last_name,email,invoice_id,invoice_date,total
0,18,Michelle,Brooks,michelleb@aol.com,1,2017-01-03 00:00:00,15.84
1,30,Edward,Francis,edfrancis@yachoo.ca,2,2017-01-03 00:00:00,9.90
2,40,Dominique,Lefebvre,dominiquelefebvre@gmail.com,3,2017-01-05 00:00:00,1.98
3,18,Michelle,Brooks,michelleb@aol.com,4,2017-01-06 00:00:00,7.92
4,27,Patrick,Gray,patrick.gray@aol.com,5,2017-01-07 00:00:00,16.83
...,...,...,...,...,...,...,...
609,55,Mark,Taylor,mark.taylor@yahoo.au,610,2020-12-21 00:00:00,6.93
610,52,Emma,Jones,emma_jones@hotmail.com,611,2020-12-27 00:00:00,1.98
611,33,Ellie,Sullivan,ellie.sullivan@shaw.ca,612,2020-12-27 00:00:00,11.88
612,20,Dan,Miller,dmiller@comcast.com,613,2020-12-29 00:00:00,8.91


Aliasing in Joins:

In [8]:
query = """

SELECT 
    t.track_id, t.name AS track_name, t.composer, t.name, g.name as genre
FROM 
    track AS t
JOIN 
    genre AS g
    ON t.genre_id = g.genre_id
LIMIT
    5;
"""

pd.read_sql_query(query,connection)

Unnamed: 0,track_id,track_name,composer,name,genre
0,1,For Those About To Rock (We Salute You),"Angus Young, Malcolm Young, Brian Johnson",For Those About To Rock (We Salute You),Rock
1,2,Balls to the Wall,,Balls to the Wall,Rock
2,3,Fast As a Shark,"F. Baltes, S. Kaufman, U. Dirkscneider & W. Ho...",Fast As a Shark,Rock
3,4,Restless and Wild,"F. Baltes, R.A. Smith-Diesel, S. Kaufman, U. D...",Restless and Wild,Rock
4,5,Princess of the Dawn,Deaffy & R.A. Smith-Diesel,Princess of the Dawn,Rock


## Joins and Other Clauses

Joining and GROUP BY:

In [9]:
query = """

SELECT 
    genre.name AS genre, 
    COUNT(*) AS num_of_tracks
FROM 
    genre
JOIN 
    track
    ON genre.genre_id = track.genre_id
GROUP BY
    genre.name

"""

pd.read_sql_query(query,connection)

Unnamed: 0,genre,num_of_tracks
0,Alternative,40
1,Alternative & Punk,332
2,Blues,81
3,Bossa Nova,15
4,Classical,74
5,Comedy,17
6,Drama,64
7,Easy Listening,24
8,Electronica/Dance,30
9,Heavy Metal,28


Joining multiple tables:

In [10]:
query = """

SELECT 
    i.*, 
    e.first_name
FROM 
    invoice as i
JOIN 
    customer AS c
    ON 
    i.customer_id = c.customer_id
JOIN 
    employee AS e
    ON 
    c.support_rep_id = e.employee_id;

"""

pd.read_sql_query(query,connection)

Unnamed: 0,invoice_id,customer_id,invoice_date,billing_address,billing_city,billing_state,billing_country,billing_postal_code,total,first_name
0,1,18,2017-01-03 00:00:00,627 Broadway,New York,NY,USA,10012-2612,15.84,Jane
1,2,30,2017-01-03 00:00:00,230 Elgin Street,Ottawa,ON,Canada,K2P 1L7,9.90,Jane
2,3,40,2017-01-05 00:00:00,"8, Rue Hanovre",Paris,,France,75002,1.98,Margaret
3,4,18,2017-01-06 00:00:00,627 Broadway,New York,NY,USA,10012-2612,7.92,Jane
4,5,27,2017-01-07 00:00:00,1033 N Park Ave,Tucson,AZ,USA,85719,16.83,Margaret
...,...,...,...,...,...,...,...,...,...,...
609,610,55,2020-12-21 00:00:00,421 Bourke Street,Sidney,NSW,Australia,2010,6.93,Margaret
610,611,52,2020-12-27 00:00:00,202 Hoxton Street,London,,United Kingdom,N1 5LH,1.98,Jane
611,612,33,2020-12-27 00:00:00,5112 48 Street,Yellowknife,NT,Canada,X1A 1N6,11.88,Jane
612,613,20,2020-12-29 00:00:00,541 Del Medio Avenue,Mountain View,CA,USA,94040-111,8.91,Margaret


## Less Common Joins

Self-joins:

In [12]:
query = """

   SELECT e1.employee_id,
          e2.employee_id AS manager_id,
          e1.first_name || " " || e1.last_name AS report,
          e2.first_name || " " || e2.last_name AS manager
     FROM employee AS e1
LEFT JOIN employee AS e2
       ON e1.reports_to = e2.employee_id;

"""

pd.read_sql_query(query,connection)

Unnamed: 0,employee_id,manager_id,report,manager
0,1,,Andrew Adams,
1,2,1.0,Nancy Edwards,Andrew Adams
2,3,2.0,Jane Peacock,Nancy Edwards
3,4,2.0,Margaret Park,Nancy Edwards
4,5,2.0,Steve Johnson,Nancy Edwards
5,6,1.0,Michael Mitchell,Andrew Adams
6,7,6.0,Robert King,Michael Mitchell
7,8,6.0,Laura Callahan,Michael Mitchell


CROSS JOIN:

In [14]:
query = """

SELECT 
    c1.first_name, 
    c1.last_name, 
    c1.email, 
    c2.first_name, 
    c2.last_name, 
    c2.email
FROM 
    customer AS c1
CROSS JOIN 
    customer AS c2
WHERE 
    c1.customer_id != c2.customer_id
LIMIT
    5;
    
"""

pd.read_sql_query(query,connection)

Unnamed: 0,first_name,last_name,email,first_name.1,last_name.1,email.1
0,Luís,Gonçalves,luisg@embraer.com.br,Leonie,Köhler,leonekohler@surfeu.de
1,Luís,Gonçalves,luisg@embraer.com.br,François,Tremblay,ftremblay@gmail.com
2,Luís,Gonçalves,luisg@embraer.com.br,Bjørn,Hansen,bjorn.hansen@yahoo.no
3,Luís,Gonçalves,luisg@embraer.com.br,František,Wichterlová,frantisekw@jetbrains.com
4,Luís,Gonçalves,luisg@embraer.com.br,Helena,Holý,hholy@gmail.com


Joining on Conditions:

In [16]:
query = """

SELECT 
    t.track_id, 
    t.name, 
    COUNT(i.invoice_id) AS no_of_purchases
FROM 
    track AS t
LEFT JOIN 
    invoice_line AS il
    ON 
    t.track_id = il.track_id
LEFT JOIN 
    invoice AS i
    ON il.invoice_id = i.invoice_id AND SUBSTRING(i.invoice_date,1,4) = '2020'
GROUP BY 
    t.track_id, t.name
LIMIT
10;
"""

pd.read_sql_query(query,connection)

Unnamed: 0,track_id,name,no_of_purchases
0,1,For Those About To Rock (We Salute You),1
1,2,Balls to the Wall,0
2,3,Fast As a Shark,1
3,4,Restless and Wild,2
4,5,Princess of the Dawn,0
5,6,Put The Finger On You,3
6,7,Let's Get It Up,0
7,8,Inject The Venom,1
8,9,Snowballed,2
9,10,Evil Walks,2


In [19]:
query = """

SELECT
i1.invoice_id, i1.invoice_date, i1.total,
ROUND(SUM(i2.total),2) AS running_total
FROM
invoice as i1
JOIN invoice AS i2
ON i1.invoice_id >= i2.invoice_id AND i1.invoice_date >= i2.invoice_date
GROUP BY
i1.invoice_id, i1.invoice_date, i1.total
LIMIT
10;

"""

pd.read_sql_query(query,connection)

Unnamed: 0,invoice_id,invoice_date,total,running_total
0,1,2017-01-03 00:00:00,15.84,15.84
1,2,2017-01-03 00:00:00,9.9,25.74
2,3,2017-01-05 00:00:00,1.98,27.72
3,4,2017-01-06 00:00:00,7.92,35.64
4,5,2017-01-07 00:00:00,16.83,52.47
5,6,2017-01-10 00:00:00,1.98,54.45
6,7,2017-01-12 00:00:00,10.89,65.34
7,8,2017-01-13 00:00:00,9.9,75.24
8,9,2017-01-18 00:00:00,8.91,84.15
9,10,2017-01-18 00:00:00,1.98,86.13


## Set Operators

UNION:

In [23]:
query = """

SELECT *
FROM invoice_2017q1_2018q2

UNION

SELECT *
FROM invoice_2018q1_2018q4

"""

pd.read_sql_query(query,connection)

Unnamed: 0,invoice_id,customer_id,invoice_date,billing_address,billing_city,billing_state,billing_country,billing_postal_code,total
0,1,18,2017-01-03 00:00:00,627 Broadway,New York,NY,USA,10012-2612,15.84
1,2,30,2017-01-03 00:00:00,230 Elgin Street,Ottawa,ON,Canada,K2P 1L7,9.90
2,3,40,2017-01-05 00:00:00,"8, Rue Hanovre",Paris,,France,75002,1.98
3,4,18,2017-01-06 00:00:00,627 Broadway,New York,NY,USA,10012-2612,7.92
4,5,27,2017-01-07 00:00:00,1033 N Park Ave,Tucson,AZ,USA,85719,16.83
...,...,...,...,...,...,...,...,...,...
300,301,34,2018-12-15 00:00:00,Rua da Assunção 53,Lisbon,,Portugal,,7.92
301,302,40,2018-12-18 00:00:00,"8, Rue Hanovre",Paris,,France,75002,3.96
302,303,28,2018-12-21 00:00:00,302 S 700 E,Salt Lake City,UT,USA,84102,9.90
303,304,56,2018-12-23 00:00:00,307 Macacha Güemes,Buenos Aires,,Argentina,1106,6.93


UNION ALL:

In [25]:
query = """

SELECT invoice_id, customer_id, invoice_date, total
FROM invoice_2017q1_2018q2

UNION ALL

SELECT invoice_id, customer_id, invoice_date, total
FROM invoice_2018q1_2018q4;

"""

pd.read_sql_query(query,connection)

Unnamed: 0,invoice_id,customer_id,invoice_date,total
0,1,18,2017-01-03 00:00:00,15.84
1,2,30,2017-01-03 00:00:00,9.90
2,3,40,2017-01-05 00:00:00,1.98
3,4,18,2017-01-06 00:00:00,7.92
4,5,27,2017-01-07 00:00:00,16.83
...,...,...,...,...
391,301,34,2018-12-15 00:00:00,7.92
392,302,40,2018-12-18 00:00:00,3.96
393,303,28,2018-12-21 00:00:00,9.90
394,304,56,2018-12-23 00:00:00,6.93


INTERSECT:

In [27]:
query = """

SELECT *
     FROM invoice_2017q1_2018q2

INTERSECT

   SELECT *
     FROM invoice_2018q1_2018q4
     
"""

pd.read_sql_query(query,connection)

Unnamed: 0,invoice_id,customer_id,invoice_date,billing_address,billing_city,billing_state,billing_country,billing_postal_code,total
0,155,59,2018-01-02 00:00:00,"3,Raj Bhavan Road",Bangalore,,India,560001,8.91
1,156,10,2018-01-03 00:00:00,"Rua Dr. Falcão Filho, 155",São Paulo,SP,Brazil,01007-010,10.89
2,157,57,2018-01-04 00:00:00,"Calle Lira, 198",Santiago,,Chile,,0.99
3,158,57,2018-01-07 00:00:00,"Calle Lira, 198",Santiago,,Chile,,5.94
4,159,20,2018-01-08 00:00:00,541 Del Medio Avenue,Mountain View,CA,USA,94040-111,5.94
...,...,...,...,...,...,...,...,...,...
86,241,45,2018-06-18 00:00:00,Erzsébet krt. 58.,Budapest,,Hungary,H-1073,1.98
87,242,36,2018-06-18 00:00:00,Tauentzienstraße 8,Berlin,,Germany,10789,9.90
88,243,6,2018-06-23 00:00:00,Rilská 3174/6,Prague,,Czech Republic,14300,10.89
89,244,53,2018-06-23 00:00:00,113 Lupus St,London,,United Kingdom,SW1V 3EN,4.95


EXCEPT:

In [26]:
query = """

SELECT *
  FROM invoice_2017q1_2018q2

EXCEPT

SELECT *
FROM invoice_2018q1_2018q4

ORDER BY invoice_date DESC;

"""

pd.read_sql_query(query,connection)

Unnamed: 0,invoice_id,customer_id,invoice_date,billing_address,billing_city,billing_state,billing_country,billing_postal_code,total
0,154,39,2017-12-17 00:00:00,"4, Rue Milton",Paris,,France,75009,2.97
1,153,1,2017-12-14 00:00:00,"Av. Brigadeiro Faria Lima, 2170",São José dos Campos,SP,Brazil,12227-000,13.86
2,152,12,2017-12-12 00:00:00,"Praça Pio X, 119",Rio de Janeiro,RJ,Brazil,20040-020,5.94
3,151,35,2017-12-06 00:00:00,"Rua dos Campeões Europeus de Viena, 4350",Porto,,Portugal,,2.97
4,150,22,2017-12-01 00:00:00,120 S Orange Ave,Orlando,FL,USA,32801,2.97
...,...,...,...,...,...,...,...,...,...
149,5,27,2017-01-07 00:00:00,1033 N Park Ave,Tucson,AZ,USA,85719,16.83
150,4,18,2017-01-06 00:00:00,627 Broadway,New York,NY,USA,10012-2612,7.92
151,3,40,2017-01-05 00:00:00,"8, Rue Hanovre",Paris,,France,75002,1.98
152,1,18,2017-01-03 00:00:00,627 Broadway,New York,NY,USA,10012-2612,15.84
