In [1]:
import sqlite3
import pandas as pd

# Connect to Chinook database
conn = sqlite3.connect('chinook.db')

print("Connected to Chinook database")

Connected to Chinook database


In [2]:
# Configure pandas display settings
pd.set_option('display.max_columns', None)
pd.set_option('display.width', None)
pd.set_option('display.max_colwidth', 30)

In [5]:
SCHEMA REFERENCE
Key Tables andColumns
tracks

TrackId, Name, AlbumId, MediaTypeId, GenreId, Composer
Milliseconds, Bytes, UnitPrice

albums

AlbumId, Title, ArtistId

artists

ArtistId, Name

customers

CustomerId, FirstName, LastName, Company, Address, City, State, Country, PostalCode, Phone, Fax, Email, SupportRepId

invoices

InvoiceId, CustomerId, InvoiceDate, BillingAddress, BillingCity, BillingState, BillingCountry, BillingPostalCode, Total

invoice_items

InvoiceLineId, InvoiceId, TrackId, UnitPrice, Quantity

genres

GenreId, Name

media_types

MediaTypeId, Name

employees

EmployeeId, LastName, FirstName, Title, ReportsTo, BirthDate, HireDate, Address, City, State, Country, PostalCode, Phone, Fax, Email

SyntaxError: invalid syntax (1543158634.py, line 1)

In [4]:
# Problem 1: Tracks longer than average track length
query = """
        SELECT track.Name, Track.Milliseconds
        FROM Track
        WHERE Milliseconds > (
        SELECT AVG(Milliseconds)
        FROM Track
        );
        
"""

result = pd.read_sql_query(query, conn)
print(result)


                              Name  Milliseconds
0      You Oughta Know (Alternate)        491885
1                Master Of Puppets        436453
2        Snoopy's search-Red baron        456071
3                          Stratus        582086
4                    No More Tears        555075
..                             ...           ...
489         Rehab (Hot Chip Remix)        418293
490    You Sent Me Flying / Cherry        409906
491            Amy Amy Amy (Outro)        663426
492  Symphony No. 3 Op. 36 for ...        567494
493  Concerto for Violin, Strin...        493573

[494 rows x 2 columns]


In [4]:
# Problem 2:Customers who spent more than the average customer
query = """
        SELECT Customer.FirstName, Customer.LastName, SUM(Invoice.Total)
        FROM Customer
        JOIN Invoice 
        ON Customer.CustomerId = Invoice.CustomerId
        GROUP BY Customer.CustomerId, Customer.FirstName, Customer.LastName
        HAVING SUM(Invoice.Total) > (
            SELECT AVG(Customer_Total)
            FROM 
                (SELECT SUM(Total) as Customer_total
                FROM Invoice
                GROUP BY CustomerId
                )
        );
        
        """
# The slippery slope here is that you want to make sure that you dont count the customers twice so there is an inner query within the query
result = pd.read_sql_query(query, conn)
print(result)

    FirstName      LastName  SUM(Invoice.Total)
0        Luís     Gonçalves               39.62
1    François      Tremblay               39.62
2       Bjørn        Hansen               39.62
3   František   Wichterlová               40.62
4      Helena          Holý               49.62
5      Astrid        Gruber               42.62
6        Jack         Smith               39.62
7         Dan        Miller               39.62
8     Heather       Leacock               39.62
9       Frank       Ralston               43.62
10     Victor       Stevens               42.62
11    Richard    Cunningham               47.62
12      Julia       Barnett               43.62
13       João     Fernandes               39.62
14       Fynn    Zimmermann               43.62
15      Wyatt        Girard               39.62
16   Isabelle       Mercier               40.62
17      Terhi    Hämäläinen               41.62
18   Ladislav        Kovács               45.62
19       Hugh      O'Reilly             

In [9]:
# Problem 3: Artists who have more tracks than average artist
query = """
        SELECT Artist.Name, COUNT(Track.TrackId)
        FROM Artist
            JOIN Album ON Artist.ArtistId = Album.ArtistId
            JOIN Track ON Album.AlbumId = Track.AlbumId
        GROUP BY Artist.ArtistId, Artist.Name
        HAVING COUNT(Track.TrackId) > (
        SELECT AVG(track_count)
            FROM
            (
            SELECT COUNT(Track.TrackId) as track_count
            FROM Artist
                JOIN Album ON Artist.ArtistId = Album.AlbumId
                JOIN Track ON Album.AlbumId = Track.AlbumId
            GROUP BY Artist.ArtistId
            
            )
        );
        
"""

result = pd.read_sql_query(query, conn)
print(result)

                             Name  COUNT(Track.TrackId)
0                           AC/DC                    18
1                       Aerosmith                    15
2               Alanis Morissette                    13
3            Antônio Carlos Jobim                    31
4                      Audioslave                    40
..                            ...                   ...
92                     The Office                    53
93  Battlestar Galactica (Clas...                    24
94                  House Of Pain                    19
95                  Chris Cornell                    14
96                  Amy Winehouse                    23

[97 rows x 2 columns]


In [15]:
# Problem 4: Invoices whose total value is above the
# overall average invoice value
query = """

    SELECT Invoice.InvoiceId, Invoice.Total
    FROM Invoice
    WHERE Invoice.Total > (
        SELECT AVG(Invoice.Total)
        FROM Invoice    
    );
        
        
"""

result = pd.read_sql_query(query, conn)
print(result)

     InvoiceId  Total
0            3   5.94
1            4   8.91
2            5  13.86
3           10   5.94
4           11   8.91
..         ...    ...
174        403   8.91
175        404  25.86
176        409   5.94
177        410   8.91
178        411  13.86

[179 rows x 2 columns]


In [18]:
# Problem 4: Customers who never made a purchase
query = """
        SELECT Customer.FirstName, Customer.LastName
        FROM Customer
        WHERE Customer.CustomerId NOT IN (
            SELECT Invoice.CustomerId
            FROM Invoice
        );
        
        
"""

result = pd.read_sql_query(query, conn)
print(result)

Empty DataFrame
Columns: [FirstName, LastName]
Index: []
