# Intermediate Joins in SQL


These are my notes for intermediate joins in SQL. I will be using sqlite module along with pandas to run queries.
I will be using CASEs, Subqueries, different types of joins, aliases and aggrigations 

(c) Miradiz Rakhmatov

In [1]:
import pandas as pd
import sqlite3

In [2]:
## connect to the database 
con = sqlite3.connect('data/chinook.db')

## create a function that takes an SQL query as an argument to produce the outcome in pandas DataFrame
def run(query):
    return pd.read_sql(query, con)

# Schema diagram:
![](data/chinook-schema.svg)

In [3]:
## database tables

db_content = '''
SELECT
    name,
    type
FROM sqlite_master
WHERE type IN ("table");
'''

run(db_content)

Unnamed: 0,name,type
0,album,table
1,artist,table
2,customer,table
3,employee,table
4,genre,table
5,invoice,table
6,invoice_line,table
7,media_type,table
8,playlist,table
9,playlist_track,table


## 1) Query that gathers data about the invoice with an invoice number  4 
#### Columns to include: name of the track, name of media type, price paid for the track, quantity purchased


In [4]:
q1='''
SELECT 
    t.track_id id, 
    t.name track_name,
    m.name track_type,
    il.unit_price,
    il.quantity
FROM track t
JOIN media_type m ON t.media_type_id = m.media_type_id
JOIN invoice_line il ON t.track_id = il.track_id
WHERE il.invoice_id = 4    
'''

run(q1)

Unnamed: 0,id,track_name,track_type,unit_price,quantity
0,3448,"Lamentations of Jeremiah, First Set \ Incipit ...",Protected AAC audio file,0.99,1
1,2560,Violent Pornography,MPEG audio file,0.99,1
2,3336,War Pigs,Purchased AAC audio file,0.99,1
3,829,Let's Get Rocked,MPEG audio file,0.99,1
4,1872,Attitude,MPEG audio file,0.99,1
5,748,Dealer,MPEG audio file,0.99,1
6,1778,You're What's Happening (In The World Today),MPEG audio file,0.99,1
7,2514,Spoonman,MPEG audio file,0.99,1


## 2) Similar query like above including the column with artist name 

In [5]:
q2='''
SELECT 
    t.track_id id, 
    t.name track_name,
    ar.name artist,
    m.name track_type,
    il.unit_price,
    il.quantity
FROM track t
JOIN media_type m ON t.media_type_id = m.media_type_id
JOIN invoice_line il ON t.track_id = il.track_id
JOIN album al ON t.album_id = al.album_id
JOIN artist ar ON al.artist_id = ar.artist_id
WHERE il.invoice_id = 4    
'''

run(q2)

Unnamed: 0,id,track_name,artist,track_type,unit_price,quantity
0,3448,"Lamentations of Jeremiah, First Set \ Incipit ...",The King's Singers,Protected AAC audio file,0.99,1
1,2560,Violent Pornography,System Of A Down,MPEG audio file,0.99,1
2,3336,War Pigs,Cake,Purchased AAC audio file,0.99,1
3,829,Let's Get Rocked,Def Leppard,MPEG audio file,0.99,1
4,1872,Attitude,Metallica,MPEG audio file,0.99,1
5,748,Dealer,Deep Purple,MPEG audio file,0.99,1
6,1778,You're What's Happening (In The World Today),Marvin Gaye,MPEG audio file,0.99,1
7,2514,Spoonman,Soundgarden,MPEG audio file,0.99,1


## 3) Query to return the top 5 albums, as calculated by the number of times a track from that album has been purchased.
#### Columns to include: albumn name, artist name, number of times a track from that album has been purchased.



In [6]:
q3='''

SELECT 
    sub.album, 
    ar.name, 
    sub.tracks_purchased
FROM artist ar
JOIN (SELECT 
        al.title album, 
        al.artist_id,
        COUNT(il.track_id) tracks_purchased
    FROM invoice_line il
    JOIN track t ON t.track_id = il.track_id
    JOIN album al ON t.album_id = al.album_id
    GROUP BY 1) sub ON sub.artist_id = ar.artist_id
ORDER BY 3 DESC
'''
## second version

q3v2='''
SELECT sub.album, sub.artist, COUNT(il.track_id) tracks_purchased
FROM invoice_line il
JOIN (SELECT ar.name artist, al.title album, t.track_id
FROM album al
JOIN artist ar ON al.artist_id = ar.artist_id
JOIN track t ON al.album_id = t.album_id) sub ON il.track_id = sub.track_id
GROUP BY 1 
ORDER BY 3 DESC
LIMIT 5
'''

run(q3v2)

Unnamed: 0,album,artist,tracks_purchased
0,Are You Experienced?,Jimi Hendrix,187
1,Faceless,Godsmack,96
2,Mezmerize,System Of A Down,93
3,Get Born,JET,90
4,The Doors,The Doors,83


## 4) Query to return information about each employee and their supervisor.
###  Recursive join:

In [7]:
q4= '''
SELECT 
    e1.first_name || " " || e1.last_name employee_name, 
    e1.title employee_title, 
    e2.first_name || " " ||  e2.last_name supervisor_name, 
    e2.title supervisor_title
FROM employee e1
LEFT JOIN employee e2 ON e1.reports_to=e2.employee_id
ORDER BY employee_name ASC
'''
## reports_to column includes employee IDs of supervisors 
## hence, only three employees exist in reports_to column which makes them supervisors
## e1.reports_to=e2.employee_id creates a "second" table which only has three distinct employee IDs (supervisors)
 
run(q4)

Unnamed: 0,employee_name,employee_title,supervisor_name,supervisor_title
0,Andrew Adams,General Manager,,
1,Jane Peacock,Sales Support Agent,Nancy Edwards,Sales Manager
2,Laura Callahan,IT Staff,Michael Mitchell,IT Manager
3,Margaret Park,Sales Support Agent,Nancy Edwards,Sales Manager
4,Michael Mitchell,IT Manager,Andrew Adams,General Manager
5,Nancy Edwards,Sales Manager,Andrew Adams,General Manager
6,Robert King,IT Staff,Michael Mitchell,IT Manager
7,Steve Johnson,Sales Support Agent,Nancy Edwards,Sales Manager


## 5) Query that summarizes the purchases of each customer.
#### Columns to include: full name of each customer, number of purchases, total amount spent and spender category



In [12]:
q5= '''
SELECT 
    c.first_name || " " || c.last_name customer_name, 
    COUNT(i.total) number_of_purchases,
    SUM(i.total) total_spent,
    CASE
        WHEN SUM(i.total) < 40 THEN "small spender"
        WHEN SUM(i.total) > 100 THEN "big spender"
        WHEN SUM(i.total) >= 40 AND SUM(i.total) <= 100 THEN 'regular'
    END customer_category
FROM customer c
JOIN invoice i ON c.customer_id = i.customer_id
GROUP BY 1
ORDER BY 1
'''

## Using .head(15) to show only 15 rows
run(q5).head(15)

Unnamed: 0,customer_name,number_of_purchases,total_spent,customer_category
0,Aaron Mitchell,8,70.29,regular
1,Alexandre Rocha,10,69.3,regular
2,Astrid Gruber,9,69.3,regular
3,Bjørn Hansen,9,72.27,regular
4,Camille Bernard,9,79.2,regular
5,Daan Peeters,7,60.39,regular
6,Dan Miller,12,95.04,regular
7,Diego Gutiérrez,5,39.6,small spender
8,Dominique Lefebvre,9,72.27,regular
9,Eduardo Martins,12,60.39,regular


# THE END 