#  Answering Business Questions using SQL

In this guided project, we're going to practice using our SQL skills to answer business questions. We'll be using the Chinook database provided as a SQLite database file called *chinook.db*.


In [1]:
import sqlite3
import pandas as pd

In [2]:
def run_query(q):
    with sqlite3.connect('chinook.db') as conn:
        return pd.read_sql(q,conn)
    
def run_command(comm):
    conn = sqlite3.connect('chinook.db')
    cursor = conn.cursor()
    cursor.execute(command)
    conn.comit()
    conn.close()
    
def show_tables(q):
    return run_query(q)
    

In [5]:
show_tables(
    """SELECT
            name, type
        FROM sqlite_master
        WHERE type IN ("table","view")
        """)

Unnamed: 0,name,type
0,album,table
1,artist,table
2,customer,table
3,employee,table
4,genre,table
5,invoice,table
6,invoice_line,table
7,media_type,table
8,playlist,table
9,playlist_track,table


## Top genres in USA

In [16]:
run_query(
    """
    WITH usa_tracks_sold AS (
    SELECT il.*
    FROM invoice_line il
    INNER JOIN invoice i on i.invoice_id = il.invoice_id
    INNER JOIN customer c on c.customer_id = i.customer_id
    WHERE c.country = 'USA'
    )
    SELECT
        g.name genre,
        COUNT(uts.invoice_line_id) tracks_sold,
        CAST(COUNT(uts.invoice_line_id) AS FLOAT) / 
        (
        SELECT COUNT(*) from usa_tracks_sold
        ) percentage_sold
        FROM usa_tracks_sold uts
        INNER JOIN track t on t.track_id = uts.track_id
        INNER JOIN genre g on g.genre_id = t.genre_id
        GROUP BY 1
        ORDER BY 2 DESC
        LIMIT 10;
    """)

Unnamed: 0,genre,tracks_sold,percentage_sold
0,Rock,561,0.533777
1,Alternative & Punk,130,0.123692
2,Metal,124,0.117983
3,R&B/Soul,53,0.050428
4,Blues,36,0.034253
5,Alternative,35,0.033302
6,Latin,22,0.020932
7,Pop,22,0.020932
8,Hip Hop/Rap,20,0.019029
9,Jazz,14,0.013321


Holy Molly ROCK LIVES!!

## Sales agent performance

In [21]:
run_query(
    """
    WITH sales_suppport_agent AS (
    SELECT
        i.customer_id customer_id,
        c.support_rep_id support_rep_id,
        SUM(i.total) total
    FROM invoice i 
    INNER JOIN customer c on c.customer_id = i.customer_id
    GROUP BY 1,2    
    )
    SELECT
        e.first_name ||  " " || e.last_name employee_name,
        e.title title,
        e.hire_date hire_date,
        SUM(ssa.total) total_sales
    FROM sales_suppport_agent ssa
    INNER JOIN employee e ON e.employee_id = ssa.support_rep_id
    GROUP BY 1;
""")

Unnamed: 0,employee_name,title,hire_date,total_sales
0,Jane Peacock,Sales Support Agent,2017-04-01 00:00:00,1731.51
1,Margaret Park,Sales Support Agent,2017-05-03 00:00:00,1584.0
2,Steve Johnson,Sales Support Agent,2017-10-17 00:00:00,1393.92


## Sales by country

In particular, you have been directed to calculate data, for each country, on the:

- total number of customers
- total value of sales
- average value of sales per customer
- average order value

In case there is only one customer in a country, they are to be grouped as 'Other'.

In [27]:
run_query(
    """
    WITH country_selection AS (
    SELECT
        CASE
            WHEN (
            SELECT COUNT(*)
            FROM customer
            WHERE country = c.country
            ) = 1 THEN 'Other'
            ELSE c.country
        END AS country,
        c.country, 
        c.customer_id,
        il.*
    FROM customer c
    INNER JOIN invoice i ON i.customer_id = c.customer_id
    INNER JOIN invoice_line il on il.invoice_id = i.invoice_id
    )
    
    SELECT
    country,
    n_customers,
    total_sales,
    average_sale,
    average_order
    FROM(
        SELECT
            country,
            COUNT(distinct  customer_id) n_customers,
            SUM(unit_price) total_sales,
            SUM(unit_price)/COUNT(distinct  customer_id) average_sale,
            SUM(unit_price) / count(distinct invoice_id) average_order,
            CASE
                WHEN country = "Other" THEN 1
                ELSE 0
            END AS sort
            FROM country_selection
            GROUP BY 1
            ORDER BY sort ASC, total_sales DESC);
            """
)

Unnamed: 0,country,n_customers,total_sales,average_sale,average_order
0,USA,13,1040.49,80.037692,7.942672
1,Canada,8,535.59,66.94875,7.047237
2,Brazil,5,427.68,85.536,7.011148
3,France,5,389.07,77.814,7.7814
4,Germany,4,334.62,83.655,8.161463
5,Czech Republic,2,273.24,136.62,9.108
6,United Kingdom,3,245.52,81.84,8.768571
7,Portugal,2,185.13,92.565,6.383793
8,India,2,183.15,91.575,8.721429
9,Other,15,1094.94,72.996,7.448571


yada yada yada

In [28]:
run_query(
"""WITH invoice_first_track AS
    (
     SELECT
         il.invoice_id invoice_id,
         MIN(il.track_id) first_track_id
     FROM invoice_line il
     GROUP BY 1
    )

SELECT
    album_purchase,
    COUNT(invoice_id) number_of_invoices,
    CAST(count(invoice_id) AS FLOAT) / (
                                         SELECT COUNT(*) FROM invoice
                                      ) percent
FROM
    (
    SELECT
        ifs.*,
        CASE
            WHEN
                 (
                  SELECT t.track_id FROM track t
                  WHERE t.album_id = (
                                      SELECT t2.album_id FROM track t2
                                      WHERE t2.track_id = ifs.first_track_id
                                     ) 

                  EXCEPT 

                  SELECT il2.track_id FROM invoice_line il2
                  WHERE il2.invoice_id = ifs.invoice_id
                 ) IS NULL
             AND
                 (
                  SELECT il2.track_id FROM invoice_line il2
                  WHERE il2.invoice_id = ifs.invoice_id

                  EXCEPT 

                  SELECT t.track_id FROM track t
                  WHERE t.album_id = (
                                      SELECT t2.album_id FROM track t2
                                      WHERE t2.track_id = ifs.first_track_id
                                     ) 
                 ) IS NULL
             THEN "yes"
             ELSE "no"
         END AS "album_purchase"
     FROM invoice_first_track ifs
    )
GROUP BY album_purchase;
""")

Unnamed: 0,album_purchase,number_of_invoices,percent
0,no,500,0.814332
1,yes,114,0.185668
