# Database / SQL Final Project

This project will involve three related CSV files.
  * [play_list_music.csv](./play_list_music.csv)
  * [play_list_track_customers.csv](./play_list_track_customers.csv)
  * [play_list_track_buy.csv](./play_list_track_buy.csv)


Your task for this project is to build a SQLite database from these files, then perform some analytics.
This project should be broken down into the following tasks:
  1. Download and inspect the files.
  1. Design a database that is **properly normalized**.
  1. Implement your database design.
  1. Load data from files into database.
  1. Write some basic queries.

All your code should be implemented in this notebook.
Below the notebook is partitioned into markdown and code execution cells.

In the cells below, connect to your database.
Remember to update the SSO to your pawprint.

In [1]:
import getpass
# This collects a masked password from the user
mypasswd = getpass.getpass()

········


In [2]:
import psycopg2
from psycopg2.extensions import adapt, register_adapter, AsIs
connection = psycopg2.connect(database = 'dsa_student', 
                              user = 'may96c', 
                              host = 'pgsql.dsa.lan',
                              password = mypasswd)

cursor = connection.cursor()

In [3]:
import pandas as pd 
import numpy as np

In [51]:
play_list_music = pd.read_csv('play_list_music.csv')
play_list_music.head(n=10)
play_list_music['genre'].unique()
play_list_music['genre_id'] = pd.factorize(play_list_music['genre'])[0]

play_list_music['media_type_id'] = pd.factorize(play_list_music['media_type'])[0]

print(play_list_music['media_type'].unique())
print(play_list_music['media_type_id'].unique())

play_list_music = play_list_music.rename(columns={'id':'track_id'})


play_list_music['album_id'] = pd.factorize(play_list_music['album'])[0]


play_list_music['artist_id'] = pd.factorize(play_list_music['artist'])[0]

play_list_music['playlist_id'] = pd.factorize(play_list_music['playlist'])[0]

play_list_music.head(30)




['MPEG audio file' 'Protected AAC audio file'
 'Protected MPEG-4 video file' 'Purchased AAC audio file' 'AAC audio file']
[0 1 2 3 4]


Unnamed: 0,track_id,artist,album,song,playlist,media_type,genre,Bytes,genre_id,media_type_id,album_id,artist_id,playlist_id
0,1,AC/DC,For Those About To Rock We Salute You,For Those About To Rock (We Salute You),Music,MPEG audio file,Rock,11170334,0,0,0,0,0
1,1,AC/DC,For Those About To Rock We Salute You,For Those About To Rock (We Salute You),Heavy Metal Classic,MPEG audio file,Rock,11170334,0,0,0,0,1
2,6,AC/DC,For Those About To Rock We Salute You,Put The Finger On You,Music,MPEG audio file,Rock,6713451,0,0,0,0,0
3,7,AC/DC,For Those About To Rock We Salute You,Let's Get It Up,Music,MPEG audio file,Rock,7636561,0,0,0,0,0
4,8,AC/DC,For Those About To Rock We Salute You,Inject The Venom,Music,MPEG audio file,Rock,6852860,0,0,0,0,0
5,9,AC/DC,For Those About To Rock We Salute You,Snowballed,Music,MPEG audio file,Rock,6599424,0,0,0,0,0
6,10,AC/DC,For Those About To Rock We Salute You,Evil Walks,Music,MPEG audio file,Rock,8611245,0,0,0,0,0
7,11,AC/DC,For Those About To Rock We Salute You,C.O.D.,Music,MPEG audio file,Rock,6566314,0,0,0,0,0
8,12,AC/DC,For Those About To Rock We Salute You,Breaking The Rules,Music,MPEG audio file,Rock,8596840,0,0,0,0,0
9,13,AC/DC,For Those About To Rock We Salute You,Night Of The Long Knives,Music,MPEG audio file,Rock,6706347,0,0,0,0,0


In [97]:
genre1 = play_list_music[['genre_id','genre']]
genre = genre1.drop_duplicates()
genre

Unnamed: 0,genre_id,genre
0,0,Rock
100,1,Jazz
114,2,Metal
144,3,Alternative & Punk
156,4,Rock And Roll
281,5,Blues
292,6,Latin
429,7,Reggae
484,8,Pop
535,9,Soundtrack


In [95]:
media_type1 = play_list_music[['media_type_id', 'media_type']]
media_type = media_type1.drop_duplicates()
media_type

Unnamed: 0,media_type_id,media_type
0,0,MPEG audio file
4487,1,Protected AAC audio file
4963,2,Protected MPEG-4 video file
5178,3,Purchased AAC audio file
5198,4,AAC audio file


In [111]:
tracka = play_list_music[['track_id','song','album_id','media_type_id','Bytes','genre_id']]
track1 = tracka.merge(cut, on = 'track_id', how = 'left')
track1.head(10) 
track = track1.drop_duplicates()
track

Unnamed: 0,track_id,song,album_id,media_type_id,Bytes,genre_id,UnitPrice
0,1,For Those About To Rock (We Salute You),0,0,11170334,0,0.99
2,6,Put The Finger On You,0,0,6713451,0,0.99
3,7,Let's Get It Up,0,0,7636561,0,
4,8,Inject The Venom,0,0,6852860,0,0.99
6,9,Snowballed,0,0,6599424,0,0.99
...,...,...,...,...,...,...,...
5587,3355,Love Comes,343,4,3240609,0,0.99
5588,3356,Muita Bobeira,344,4,2775071,6,0.99
5589,3357,OAM's Blues,345,4,4292028,1,
5590,3358,One Step Beyond,342,4,6034098,14,


In [150]:
playlistwithtracks1 = play_list_music[['playlist_id','track_id']]
playlistwithtracks = playlistwithtracks1.drop_duplicates()
playlistwithtracks

Unnamed: 0,playlist_id,track_id
0,0,1
1,1,1
2,0,6
3,0,7
4,0,8
...,...,...
5207,0,3356
5208,0,3357
5209,0,3358
5210,0,3359


In [101]:
playlist1 = play_list_music[['playlist_id','playlist']]
playlist = playlist1.drop_duplicates()
playlist

Unnamed: 0,playlist_id,playlist
0,0,Music
1,1,Heavy Metal Classic
20,2,90’s Music
79,3,Grunge
314,4,Brazilian Music
895,5,On-The-Go 1
4694,6,Classical
4695,7,Classical 101 - The Basics
4781,8,Classical 101 - Next Steps
4894,9,Classical 101 - Deep Cuts


In [107]:
album1 = play_list_music[['album_id','album','artist_id']]
album = album1.drop_duplicates()
album

Unnamed: 0,album_id,album,artist_id
0,0,For Those About To Rock We Salute You,0
11,1,Let There Be Rock,0
19,2,Big Ones,1
49,3,Jagged Little Pill,2
75,4,Facelift,3
...,...,...,...
5202,342,Realize,199
5203,343,Every Kind of Light,200
5207,344,Duos II,201
5208,345,Worlds,202


In [104]:
artist1 = play_list_music[['artist_id','artist']]
artist = artist1.drop_duplicates()
artist

Unnamed: 0,artist_id,artist
0,0,AC/DC
19,1,Aerosmith
49,2,Alanis Morissette
75,3,Alice In Chains
100,4,Antônio Carlos Jobim
...,...,...
5202,199,Karsh Kale
5203,200,The Posies
5207,201,Luciana Souza/Romero Lubambo
5208,202,Aaron Goldberg


In [55]:
customer = pd.read_csv('play_list_track_customers.csv')
customer.head(n=10)

Unnamed: 0,CustomerId,FirstName,LastName,Company,Address,City,State,Country,PostalCode,Phone,Fax,Email
0,1,Luís,Gonçalves,Embraer - Empresa Brasileira de Aeronáutica S.A.,"Av. Brigadeiro Faria Lima, 2170",São José dos Campos,SP,Brazil,12227-000,+55 (12) 3923-5555,+55 (12) 3923-5566,luisg@embraer.com.br
1,2,Leonie,Köhler,,Theodor-Heuss-Straße 34,Stuttgart,,Germany,70174,+49 0711 2842222,,leonekohler@surfeu.de
2,3,François,Tremblay,,1498 rue Bélanger,Montréal,QC,Canada,H2G 1A7,+1 (514) 721-4711,,ftremblay@gmail.com
3,4,Bjørn,Hansen,,Ullevålsveien 14,Oslo,,Norway,171,+47 22 44 22 22,,bjorn.hansen@yahoo.no
4,5,František,Wichterlová,JetBrains s.r.o.,Klanova 9/506,Prague,,Czech Republic,14700,+420 2 4172 5555,+420 2 4172 5555,frantisekw@jetbrains.com
5,6,Helena,Holý,,Rilská 3174/6,Prague,,Czech Republic,14300,+420 2 4177 0449,,hholy@gmail.com
6,7,Astrid,Gruber,,"Rotenturmstraße 4, 1010 Innere Stadt",Vienne,,Austria,1010,+43 01 5134505,,astrid.gruber@apple.at
7,8,Daan,Peeters,,Grétrystraat 63,Brussels,,Belgium,1000,+32 02 219 03 03,,daan_peeters@apple.be
8,9,Kara,Nielsen,,Sønder Boulevard 51,Copenhagen,,Denmark,1720,+453 3331 9991,,kara.nielsen@jubii.dk
9,10,Eduardo,Martins,Woodstock Discos,"Rua Dr. Falcão Filho, 155",São Paulo,SP,Brazil,01007-010,+55 (11) 3033-5446,+55 (11) 3033-4564,eduardo@woodstock.com.br


In [143]:
play_list_track_buy = pd.read_csv('play_list_track_buy.csv')
play_list_track_buy.head(n=10)
play_list_track_buy = play_list_track_buy.rename(columns={'trackid':'track_id'})

cut = play_list_track_buy[['track_id','UnitPrice']]
invoice1 = play_list_track_buy


invoice1['invoice_row_id'] = invoice1.index + 1 

invoice_row = invoice1[['invoice_row_id','InvoiceId','track_id','UnitPrice']]
invoice2 = invoice1[['InvoiceId','CustomerId','BillingAddress','BillingCity']]
invoice = invoice2.drop_duplicates()
invoice

Unnamed: 0,InvoiceId,CustomerId,BillingAddress,BillingCity
0,1,2,Theodor-Heuss-Straße 34,Stuttgart
2,2,4,Ullevålsveien 14,Oslo
6,3,8,Grétrystraat 63,Brussels
12,4,14,8210 111 ST NW,Edmonton
21,5,23,69 Salem Street,Boston
...,...,...,...,...
2206,408,25,319 N. Frances Street,Madison
2210,409,29,796 Dundas Street West,Toronto
2216,410,35,"Rua dos Campeões Europeus de Viena, 4350",Porto
2225,411,44,Porthaninkatu 9,Helsinki


In [1]:
# Then connects to the DB
from sqlalchemy.engine.url import URL
from sqlalchemy import create_engine

# -------------- Add Content Below

# SQLAlchemy Connection Parameters



# SQLAlchemy Engine



# -------------- Add Content Above
del mypasswd

NameError: name 'mypasswd' is not defined

# Design a database that is _properly normalized_.

Note: You can expect up approximately ten (10) tables to be derived from three CSV files.

There is no implementation cell, the output should be an ERD or sketch.

Visit the course Canvas Site for Normalization videos. 

# Implement your database design.

Use the cells below to add your `CREATE TABLE` statements.
Add extra cells as necessary

In [62]:
sqlCreateTable = """ CREATE TABLE may96c.genre (
    genre_id INT PRIMARY KEY,
    genre VARCHAR); """

cursor.execute(sqlCreateTable)

connection.commit()

In [63]:
sqlCreateTable = """ CREATE TABLE may96c.media_type  (
    media_type_id INT PRIMARY KEY,
    media_type VARCHAR) ; """

cursor.execute(sqlCreateTable)

connection.commit()

In [64]:
sqlCreateTable = """CREATE TABLE  may96c.playlist (
    playlist_id INT PRIMARY KEY,
    playlist VARCHAR);"""
cursor.execute(sqlCreateTable)

connection.commit()

In [72]:
sqlCreateTable = """CREATE TABLE may96c.artists (
    artist_id INT PRIMARY KEY,
    artist VARCHAR) ; """

cursor.execute(sqlCreateTable)

connection.commit()

In [76]:
sqlCreateTable = """CREATE TABLE may96c.albums ( 
    album_id INT PRIMARY KEY,
    album VARCHAR,
    artist_id INT,
    CONSTRAINT fk_album
        FOREIGN KEY(artist_id)
            REFERENCES may96c.artists(artist_id)
); """

cursor.execute(sqlCreateTable)

connection.commit()

In [77]:
sqlCreateTable = """CREATE TABLE may96c.track ( 
    track_id INT PRIMARY KEY,
    song VARCHAR,
    album_id INT,
    media_type_id INT,
    Bytes INT,
    genre_id INT, 
    UnitPrice INT,
    CONSTRAINT fk_track_a
        FOREIGN KEY(album_id)
            REFERENCES may96c.albums(album_id),
    CONSTRAINT fk_track_b
        FOREIGN KEY(media_type_id)
            REFERENCES may96c.media_type(media_type_id),
    CONSTRAINT fk_track_c
        FOREIGN KEY(genre_id)
            REFERENCES may96c.genre(genre_id)
); """ 

cursor.execute(sqlCreateTable)

connection.commit()

In [81]:
sqlCreateTable = """CREATE TABLE may96c.customer (
    CustomerId INT PRIMARY KEY, 
    FirstName VARCHAR,
    LastName VARCHAR,
    Company VARCHAR,
    Address VARCHAR,
    City VARCHAR,
    State VARCHAR,
    Country VARCHAR, 
    PostalCode VARCHAR,
    Phone VARCHAR,
    Fax VARCHAR ,
    Email VARCHAR ); """

cursor.execute(sqlCreateTable)

connection.commit()

In [136]:
sqlCreateTable = """CREATE TABLE may96c.invoice (
    InvoiceId INT primary key,
    CustomerId INT, 
    BillingAddress VARCHAR, 
    BillingCity VARCHAR,
    CONSTRAINT fk_invoice_b 
        FOREIGN KEY (CustomerId)
        REFERENCES may96c.customer(CustomerId) ); """

cursor.execute(sqlCreateTable)

connection.commit()

In [141]:
sqlCreateTable = """CREATE TABLE may96c.invoice_row (
    invoice_row_id INT primary key,
    InvoiceId INT, 
    track_id INT, 
    UnitPrice INT, 
    CONSTRAINT fk_invoice_rowa
        FOREIGN KEY (InvoiceId)
        REFERENCES may96c.invoice(InvoiceId),
    CONSTRAINT fk_invoice_rowb
        FOREIGN KEY (track_id)
        REFERENCES may96c.track(track_id)); """

cursor.execute(sqlCreateTable)

connection.commit()


In [135]:
sqlCreateTable = """CREATE TABLE may96c.playlistwithtracks (
    playlist_id INT,
    track_id INT,
    CONSTRAINT fk_pwta
    FOREIGN KEY (playlist_id)
    REFERENCES may96c.playlist(playlist_id),
    CONSTRAINT fk_pwtb
    FOREIGN KEY (track_id)
    REFERENCES may96c.track(track_id),
    PRIMARY KEY (playlist_id,track_id)); """ 

cursor.execute(sqlCreateTable)

connection.commit()


# Load data from files into database.

### Use Excel or Pandas to carve the provided CSV files above into the **set of appropriate files** you need to load into your database.
   1. Example: Save File As *new_csv_name.csv*
   1. Remove unneeded columns
   1. Remove duplicate rows
   1. Save File, Navigate in JupyterHub folder view (your first JupyterHub tab)
   1. Upload file


   1. Load the CSV into your database using Python.
     




In [93]:
print(list(genre))
s = ''
for i in list(genre):
    s += '%s,'
print(s)

['genre', 'genre_id']
%s,%s,


In [98]:
register_adapter(np.int64,AsIs)
register_adapter(np.float64,AsIs)
genre = genre.where(pd.notnull(genre), None)
INSERT_SQL = 'INSERT INTO may96c.genre '
INSERT_SQL += ' (genre_id, genre) VALUES '
INSERT_SQL += '(%s,%s)'

with connection, connection.cursor() as cursor:
    
    for row in genre.itertuples(index=False, name=None):  # pull each row as a tuple
        
        cursor.execute(INSERT_SQL,row)

In [96]:
print(list(media_type))
s = ''
for i in list(media_type):
    s += '%s,'
print(s)

['media_type_id', 'media_type']
%s,%s,


In [99]:
register_adapter(np.int64,AsIs)
register_adapter(np.float64,AsIs)
media_type = media_type.where(pd.notnull(media_type), None)
INSERT_SQL = 'INSERT INTO may96c.media_type '
INSERT_SQL += ' (media_type_id, media_type) VALUES '
INSERT_SQL += '(%s,%s)'

with connection, connection.cursor() as cursor:
    
    for row in media_type.itertuples(index=False, name=None):  # pull each row as a tuple
        
        cursor.execute(INSERT_SQL,row)

In [102]:
print(list(playlist))
s = ''
for i in list(playlist):
    s += '%s,'
print(s)

['playlist_id', 'playlist']
%s,%s,


In [103]:
register_adapter(np.int64,AsIs)
register_adapter(np.float64,AsIs)
playlist = playlist.where(pd.notnull(playlist), None)
INSERT_SQL = 'INSERT INTO may96c.playlist '
INSERT_SQL += ' (playlist_id, playlist) VALUES '
INSERT_SQL += '(%s,%s)'

with connection, connection.cursor() as cursor:
    
    for row in playlist.itertuples(index=False, name=None):  # pull each row as a tuple
        
        cursor.execute(INSERT_SQL,row)

In [105]:
print(list(artist))
s = ''
for i in list(artist):
    s += '%s,'
print(s)

['artist_id', 'artist']
%s,%s,


In [106]:
register_adapter(np.int64,AsIs)
register_adapter(np.float64,AsIs)
artist = artist.where(pd.notnull(artist), None)
INSERT_SQL = 'INSERT INTO may96c.artists '
INSERT_SQL += ' (artist_id, artist) VALUES '
INSERT_SQL += '(%s,%s)'

with connection, connection.cursor() as cursor:
    
    for row in artist.itertuples(index=False, name=None):  # pull each row as a tuple
        
        cursor.execute(INSERT_SQL,row)

In [108]:
print(list(album))
s = ''
for i in list(album):
    s += '%s,'
print(s)

['album_id', 'album', 'artist_id']
%s,%s,%s,


In [109]:
register_adapter(np.int64,AsIs)
register_adapter(np.float64,AsIs)
album = album.where(pd.notnull(album), None)
INSERT_SQL = 'INSERT INTO may96c.albums '
INSERT_SQL += ' (album_id, album, artist_id) VALUES '
INSERT_SQL += '(%s,%s,%s)'

with connection, connection.cursor() as cursor:
    
    for row in album.itertuples(index=False, name=None):  # pull each row as a tuple
        
        cursor.execute(INSERT_SQL,row)

In [112]:
print(list(track))
s = ''
for i in list(track):
    s += '%s,'
print(s)

['track_id', 'song', 'album_id', 'media_type_id', 'Bytes', 'genre_id', 'UnitPrice']
%s,%s,%s,%s,%s,%s,%s,


In [113]:
register_adapter(np.int64,AsIs)
register_adapter(np.float64,AsIs)
track = track.where(pd.notnull(track), None)
INSERT_SQL = 'INSERT INTO may96c.track '
INSERT_SQL += ' (track_id, song, album_id, media_type_id, Bytes, genre_id, UnitPrice) VALUES '
INSERT_SQL += '(%s,%s,%s,%s,%s,%s,%s)'

with connection, connection.cursor() as cursor:
    
    for row in track.itertuples(index=False, name=None):  # pull each row as a tuple
        
        cursor.execute(INSERT_SQL,row)

In [114]:
print(list(customer))
s = ''
for i in list(customer):
    s += '%s,'
print(s)

['CustomerId', 'FirstName', 'LastName', 'Company', 'Address', 'City', 'State', 'Country', 'PostalCode', 'Phone', 'Fax', 'Email']
%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,


In [115]:
register_adapter(np.int64,AsIs)
register_adapter(np.float64,AsIs)
customer = customer.where(pd.notnull(customer), None)
INSERT_SQL = 'INSERT INTO may96c.customer '
INSERT_SQL += ' (CustomerId, FirstName, LastName, Company, Address, City, State, Country, PostalCode, Phone, Fax, Email) VALUES '
INSERT_SQL += '(%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)'

with connection, connection.cursor() as cursor:
    
    for row in customer.itertuples(index=False, name=None):  # pull each row as a tuple
        
        cursor.execute(INSERT_SQL,row)

In [146]:
print(list(invoice))
s = ''
for i in list(invoice):
    s += '%s,'
print(s)

['InvoiceId', 'CustomerId', 'BillingAddress', 'BillingCity']
%s,%s,%s,%s,


In [147]:
register_adapter(np.int64,AsIs)
register_adapter(np.float64,AsIs)
invoice = invoice.where(pd.notnull(invoice), None)
INSERT_SQL = 'INSERT INTO may96c.invoice '
INSERT_SQL += ' (InvoiceId, CustomerId, BillingAddress, BillingCity) VALUES '
INSERT_SQL += '(%s,%s,%s,%s)'

with connection, connection.cursor() as cursor:
    
    for row in invoice.itertuples(index=False, name=None):  # pull each row as a tuple
        
        cursor.execute(INSERT_SQL,row)

In [148]:
print(list(invoice_row))
s = ''
for i in list(invoice_row):
    s += '%s,'
print(s)

['invoice_row_id', 'InvoiceId', 'track_id', 'UnitPrice']
%s,%s,%s,%s,


In [149]:
register_adapter(np.int64,AsIs)
register_adapter(np.float64,AsIs)
invoice_row = invoice_row.where(pd.notnull(invoice_row), None)
INSERT_SQL = 'INSERT INTO may96c.invoice_row '
INSERT_SQL += ' (invoice_row_id, InvoiceId, track_id, UnitPrice) VALUES '
INSERT_SQL += '(%s,%s,%s,%s)'

with connection, connection.cursor() as cursor:
    
    for row in invoice_row.itertuples(index=False, name=None):  # pull each row as a tuple
        
        cursor.execute(INSERT_SQL,row)

In [152]:
print(list(playlistwithtracks))
s = ''
for i in list(playlistwithtracks):
    s += '%s,'
print(s)

['playlist_id', 'track_id']
%s,%s,


In [153]:
register_adapter(np.int64,AsIs)
register_adapter(np.float64,AsIs)
playlistwithtracks = playlistwithtracks.where(pd.notnull(playlistwithtracks), None)
INSERT_SQL = 'INSERT INTO may96c.playlistwithtracks'
INSERT_SQL += ' (playlist_id, track_id) VALUES '
INSERT_SQL += '(%s,%s)'

with connection, connection.cursor() as cursor:
    
    for row in playlistwithtracks.itertuples(index=False, name=None):  # pull each row as a tuple
        
        cursor.execute(INSERT_SQL,row)

## Once Loaded
  * Write SQL to show the `COUNT(*)` from each table loaded.

In [10]:
leng = 'SELECT count(*) FROM may96c.genre'
cursor.execute(leng)
cursor.fetchall()

[(25,)]

In [12]:
lenmediat = 'SELECT count(*) FROM may96c.media_type'
cursor.execute(lenmediat)
cursor.fetchall()

[(5,)]

In [13]:
lplay = 'SELECT count(*) FROM may96c.playlist'
cursor.execute(lplay)
cursor.fetchall()

[(12,)]

In [14]:
lart = 'SELECT count(*) FROM may96c.artists'
cursor.execute(lart)
cursor.fetchall()

[(204,)]

In [15]:
lal = 'SELECT count(*) FROM may96c.albums'
cursor.execute(lal)
cursor.fetchall()

[(347,)]

In [19]:
ltr = 'SELECT count(*) FROM may96c.track'
cursor.execute(ltr)
cursor.fetchall()

[(3503,)]

In [20]:
lcus = 'SELECT count(*) FROM may96c.customer'
cursor.execute(lcus)
cursor.fetchall()

[(59,)]

In [21]:
linv = 'SELECT count(*) FROM may96c.invoice'
cursor.execute(linv)
cursor.fetchall()

[(412,)]

In [22]:
linvr = 'SELECT count(*) FROM may96c.invoice_row'
cursor.execute(linvr)
cursor.fetchall()

[(2240,)]

In [23]:
lpwt = 'SELECT count(*) FROM may96c.playlistwithtracks'
cursor.execute(lpwt)
cursor.fetchall()

[(5212,)]

#  Write some basic queries.


## List each artist and the average bytes per song.

In [12]:
avb = 'SELECT AVG(Bytes), artist FROM may96c.artists JOIN may96c.albums on may96c.albums.artist_id = may96c.artists.artist_id JOIN may96c.track ON may96c.albums.album_id = may96c.track.album_id GROUP BY artist'
cursor.execute(avb)
cursor.fetchall()

[(Decimal('4244149.000000000000'),
  'Choir Of Westminster Abbey & Simon Preston'),
 (Decimal('426654344.65217391'), 'Lost'),
 (Decimal('5064553.000000000000'), 'Orchestra of The Age of Enlightenment'),
 (Decimal('8066677.059701492537'), 'Pearl Jam'),
 (Decimal('7335806.571428571429'), 'João Suplicy'),
 (Decimal('8175678.357142857143'), 'Passengers'),
 (Decimal('5680780.500000000000'), 'Karsh Kale'),
 (Decimal('11497862.964912280702'), 'Led Zeppelin'),
 (Decimal('11449106.858695652174'), 'Deep Purple'),
 (Decimal('8975021.375000000000'), 'Def Leppard'),
 (Decimal('7505847.050000000000'), 'The Who'),
 (Decimal('8359081.083333333333'), 'Jota Quest'),
 (Decimal('3347810.000000000000'),
  'Anne-Sophie Mutter, Herbert Von Karajan & Wiener Philharmoniker'),
 (Decimal('9618496.866666666667'), 'Aerosmith'),
 (Decimal('6747161.552631578947'), 'Titãs'),
 (Decimal('6818198.647058823529'), 'Green Day'),
 (Decimal('8806079.888888888889'), 'AC/DC'),
 (Decimal('8943953.333333333333'), 'Frank Zappa & 

## List average number of tracks per album for each artist.

In [3]:
avgtperart = 'SELECT AVG(cnt), artist_id FROM (SELECT COUNT(track_id) as cnt, may96c.track.album_id, artist_id FROM may96c.track JOIN may96c.albums ON may96c.track.album_id = may96c.albums.album_id GROUP BY artist_id, may96c.track.album_id ) as cool GROUP BY artist_id ;'
cursor.execute(avgtperart)
cursor.fetchall()

[(Decimal('1.00000000000000000000'), 183),
 (Decimal('11.0000000000000000'), 100),
 (Decimal('1.00000000000000000000'), 136),
 (Decimal('15.0000000000000000'), 38),
 (Decimal('1.00000000000000000000'), 150),
 (Decimal('1.00000000000000000000'), 139),
 (Decimal('1.00000000000000000000'), 140),
 (Decimal('1.00000000000000000000'), 193),
 (Decimal('11.0000000000000000'), 12),
 (Decimal('11.5000000000000000'), 164),
 (Decimal('1.00000000000000000000'), 137),
 (Decimal('12.0000000000000000'), 78),
 (Decimal('15.0000000000000000'), 24),
 (Decimal('1.00000000000000000000'), 191),
 (Decimal('1.00000000000000000000'), 159),
 (Decimal('17.5000000000000000'), 25),
 (Decimal('1.00000000000000000000'), 122),
 (Decimal('12.0000000000000000'), 94),
 (Decimal('1.00000000000000000000'), 141),
 (Decimal('23.0000000000000000'), 186),
 (Decimal('1.00000000000000000000'), 154),
 (Decimal('10.6666666666666667'), 49),
 (Decimal('24.0000000000000000'), 47),
 (Decimal('1.00000000000000000000'), 177),
 (Decimal

## List the top five customers in terms of tracks purchased.

In [4]:
top5 = 'SELECT count(track_id) as purchasecnt, may96c.invoice.CustomerId, FirstName, LastName FROM may96c.invoice_row JOIN may96c.invoice ON may96c.invoice_row.InvoiceId = may96c.invoice.InvoiceId JOIN customer on may96c.invoice.CustomerId = may96c.Invoice.CustomerId GROUP BY may96c.invoice.CustomerId, FirstName,LastName ORDER BY purchasecnt DESC LIMIT 5 ;'
cursor.execute(top5)
cursor.fetchall()

[(38, 18, 'Victor', 'Stevens'),
 (38, 51, 'Dominique', 'Lefebvre'),
 (38, 46, 'Aaron', 'Mitchell'),
 (38, 2, 'Johannes', 'Van der Berg'),
 (38, 52, 'Julia', 'Barnett')]

## List the top genre preference per customer.

In [5]:
topgenre = 'SELECT * FROM (SELECT RANK() OVER (PARTITION BY CustomerId ORDER BY totaled DESC) as COUNTRANK, genre_id, CustomerId FROM (SELECT Count(*) as totaled, genre_id, CustomerId FROM may96c.track JOIN may96c.invoice_row ON may96c.track.track_id = may96c.invoice_row.track_id JOIN may96c.invoice ON may96c.invoice_row.InvoiceId =may96c.invoice.InvoiceId GROUP BY CustomerId, genre_id) s ORDER BY CustomerId ) as q WHERE COUNTRANK = 1' 
cursor.execute(topgenre)
cursor.fetchall()

[(1, 0, 1),
 (1, 0, 2),
 (1, 2, 3),
 (1, 0, 4),
 (1, 0, 5),
 (1, 0, 6),
 (1, 0, 7),
 (1, 0, 8),
 (1, 0, 9),
 (1, 0, 10),
 (1, 6, 11),
 (1, 6, 12),
 (1, 0, 12),
 (1, 0, 13),
 (1, 0, 14),
 (1, 0, 15),
 (1, 2, 16),
 (1, 0, 17),
 (1, 0, 18),
 (1, 0, 19),
 (1, 6, 20),
 (1, 0, 21),
 (1, 2, 22),
 (1, 6, 23),
 (1, 0, 24),
 (1, 0, 25),
 (1, 0, 26),
 (1, 0, 27),
 (1, 0, 28),
 (1, 0, 29),
 (1, 0, 30),
 (1, 6, 31),
 (1, 6, 32),
 (1, 0, 33),
 (1, 0, 34),
 (1, 0, 35),
 (1, 2, 36),
 (1, 0, 37),
 (1, 0, 38),
 (1, 0, 39),
 (1, 0, 40),
 (1, 0, 41),
 (1, 2, 42),
 (1, 0, 43),
 (1, 0, 44),
 (1, 0, 45),
 (1, 0, 46),
 (1, 0, 47),
 (1, 0, 48),
 (1, 0, 49),
 (1, 0, 50),
 (1, 6, 51),
 (1, 6, 52),
 (1, 0, 53),
 (1, 0, 54),
 (1, 0, 55),
 (1, 3, 56),
 (1, 0, 56),
 (1, 0, 57),
 (1, 0, 58),
 (1, 0, 59)]

# Save your notebook, then `File > Close and Halt`