In [None]:
import pandas as pd

In [None]:
from sqlalchemy import create_engine

# create_engine creates a connection to an existing database
# I have 'Chinook_sqlite.sqlite' downloaded into my folder, and python
# connects to this database
engine = create_engine('sqlite:///Chinook_Sqlite.sqlite')

In [None]:
engine.table_names()

In [None]:
command = '''
SELECT * 
FROM Album
LIMIT 5
'''
pd.read_sql_query(command, engine)

In [None]:
command = '''
SELECT * 
FROM Artist
LIMIT 5
'''
pd.read_sql_query(command, engine)

In [None]:
command = '''
SELECT * 
FROM Invoice
LIMIT 5
'''
pd.read_sql_query(command, engine)

In [None]:
command = '''
SELECT * 
FROM InvoiceLine
LIMIT 7
'''
pd.read_sql_query(command, engine)

In [None]:
command = '''
SELECT * 
FROM Track
LIMIT 6
'''
pd.read_sql_query(command, engine)

In [None]:
command = '''
SELECT * 
FROM Customer
LIMIT 5
'''
pd.read_sql_query(command, engine)

Provide a query showing the Invoices of customers who are from Brazil. The resultant table should show the customer's full name, Invoice ID, Date of the invoice and billing country.

In [None]:
command = '''
SELECT c.FirstName, c.lastname, i.invoiceid, i.invoicedate, i.billingcountry    -- selects the desired columns
FROM customer AS c                     -- provide an alias to the table, so we dont have to type the full name out
    JOIN invoice AS i
    ON c.customerid = i.customerid     -- this is how the tables are linked
WHERE c.country = 'Brazil'
LIMIT 20                               -- limits how many rows we get back
'''
pd.read_sql_query(command, engine)

Provide a query showing a unique list of billing countries from the Invoice table.

In [None]:
command = '''
SELECT DISTINCT billingcountry 
FROM invoice
'''
pd.read_sql_query(command, engine)

Provide a query that shows the invoices associated with each sales agent. The resultant table should include the Sales Agent's full name.

Each invoice has a customer.

Each customer has a support rep (employee)

In [None]:
command = '''
SELECT e.firstname, e.lastname,   -- employee first and last name
       i.*   -- all columns from invoice table 

FROM invoice AS i
    JOIN customer AS c
    ON c.customerid = i.customerid

    JOIN employee AS e
    ON e.employeeid = c.supportrepid
    
ORDER BY e.employeeid;
'''
pd.read_sql_query(command, engine)

Provide a query that shows the Invoice Total, Customer name, Country and Sale Agent name for all invoices and customers.

In [None]:
command = '''
SELECT e.firstname AS 'employee first', 
        e.lastname AS 'employee last', 
        c.firstname AS 'customer first', 
        c.lastname AS 'customer last', 
        c.country, i.total
FROM employee AS e
        JOIN customer AS c 
        ON e.employeeid = c.supportrepid
        JOIN invoice AS i 
        ON c.customerid = i.customerid
'''
pd.read_sql_query(command, engine)

How many Invoices were there in 2011? What are the total sales for that year?

In [None]:
command = '''
SELECT invoiceId, InvoiceDate, total
FROM invoice as i
WHERE i.invoicedate BETWEEN datetime('2011-01-01') AND datetime('2011-12-31');
'''
pd.read_sql_query(command, engine)  # result has 83 rows

In [None]:
command = '''
SELECT count(i.invoiceId) as 'count',
    sum(i.total) as 'sum'
FROM invoice as i
WHERE i.invoicedate BETWEEN datetime('2011-01-01') AND datetime('2011-12-31')
'''
pd.read_sql_query(command, engine)

Count how many orders were made on each day

In [None]:
command = '''
SELECT i.InvoiceDate, count(i.invoiceId) as 'count'
FROM invoice as i
WHERE i.invoicedate BETWEEN datetime('2011-01-01') AND datetime('2011-12-31')
GROUP BY i.invoiceDate
'''
pd.read_sql_query(command, engine)

Looking at the InvoiceLine table, provide a query that COUNTs the number of line items for each Invoice.

In [None]:
command = '''
SELECT *
FROM invoiceline
LIMIT 10
'''
pd.read_sql_query(command, engine)

In [None]:
command = '''
SELECT invoiceid, count(invoicelineid) AS 'Count'
FROM invoiceline
GROUP BY invoiceid
ORDER BY Count DESC
'''
pd.read_sql_query(command, engine)

Find the invoice with the maximum number of Invoiceline IDs most elegant please

CTE Common Table Expression - allows you to query tables that you created as intermediate steps

In [None]:
command = '''
WITH InvoiceCounts (id, count) 
AS 
(  -- an intermediate table that aggregates the invoicelineIDs 
   -- pretty much the exact same table we generated in previous step
    SELECT invoiceid, count(invoicelineid) AS 'Count'
    FROM invoiceline
    GROUP BY invoiceid
    ORDER BY Count DESC
)

SELECT MAX(count) as Max, MIN(count) as Min
FROM InvoiceCounts
'''
pd.read_sql_query(command, engine)

In [None]:
command = '''
WITH InvoiceCounts (id, count) 
AS 
(  -- an intermediate table that aggregates the invoicelineIDs 
   -- pretty much the exact same table we generated in previous step
    SELECT invoiceid, count(invoicelineid) AS 'Count'
    FROM invoiceline
    GROUP BY invoiceid
    ORDER BY Count DESC
)

SELECT count, COUNT(id) as "HowMany"
FROM InvoiceCounts
GROUP BY count
'''
pd.read_sql_query(command, engine)

Provide a query that includes the purchased track name AND artist name with each invoice line item.


In [None]:
command = '''
SELECT i.*, 
    t.name AS 'track', 
    ar.name AS 'artist'
FROM invoiceline AS i
        JOIN track AS t 
            ON i.trackid = t.trackid     -- i links to t
        JOIN album AS al 
            ON t.albumid = al.albumid    -- t links to al
        JOIN artist AS ar 
            ON al.artistid = ar.artistid  -- al links to ar
'''
pd.read_sql_query(command, engine)

In [None]:
# Look up 
# differences betwen LEFT AND RIGHT JOINS
# https://blog.codinghorror.com/a-visual-explanation-of-sql-joins/

One table is of products
Another table is of sales
assume productid is the link

how do find all the products that do not exist in the sales table?

product_table AS p LEFT OUTER JOIN sales_table AS s
 ON p.productid = s.productid
 WHERE s.productid IS NULL