In [15]:
# Run this in a Jupyter notebook in VS Code
import urllib.request
import os

# Download the Chinook database if it doesn't exist
if not os.path.exists('chinook.db'):
    print("Downloading Chinook database...")
    url = "https://github.com/lerocha/chinook-database/raw/master/ChinookDatabase/DataSources/Chinook_Sqlite.sqlite"
    urllib.request.urlretrieve(url, "chinook.db")
    print("Download complete!")
else:
    print("Chinook database already exists")

Chinook database already exists


In [16]:
# Connecting to the database
import sqlalchemy as sa
from sqlalchemy import create_engine
import pandas as pd

# Connect to the database
engine = create_engine('sqlite:///chinook.db')

# Test connection
try:
    with engine.connect() as connection:
        print("Successfully connected to the Chinook database!")
except Exception as e:
    print(f"Error connecting to the database: {e}")

Successfully connected to the Chinook database!


In [17]:
# Get list of tables
query = "SELECT name FROM sqlite_master WHERE type='table'"
tables = pd.read_sql(query, engine)
print("Tables in the Chinook database:")
print(tables)

Tables in the Chinook database:
             name
0           Album
1          Artist
2        Customer
3        Employee
4           Genre
5         Invoice
6     InvoiceLine
7       MediaType
8        Playlist
9   PlaylistTrack
10          Track


In [18]:
# Loop through each table and display its structure
for table_name in tables['name']:
    query = f"PRAGMA table_info({table_name})"
    columns = pd.read_sql(query, engine)
    print(f"\nColumn in {table_name}:")
    print(columns[['name', 'type']])



Column in Album:
       name           type
0   AlbumId        INTEGER
1     Title  NVARCHAR(160)
2  ArtistId        INTEGER

Column in Artist:
       name           type
0  ArtistId        INTEGER
1      Name  NVARCHAR(120)

Column in Customer:
            name          type
0     CustomerId       INTEGER
1      FirstName  NVARCHAR(40)
2       LastName  NVARCHAR(20)
3        Company  NVARCHAR(80)
4        Address  NVARCHAR(70)
5           City  NVARCHAR(40)
6          State  NVARCHAR(40)
7        Country  NVARCHAR(40)
8     PostalCode  NVARCHAR(10)
9          Phone  NVARCHAR(24)
10           Fax  NVARCHAR(24)
11         Email  NVARCHAR(60)
12  SupportRepId       INTEGER

Column in Employee:
          name          type
0   EmployeeId       INTEGER
1     LastName  NVARCHAR(20)
2    FirstName  NVARCHAR(20)
3        Title  NVARCHAR(30)
4    ReportsTo       INTEGER
5    BirthDate      DATETIME
6     HireDate      DATETIME
7      Address  NVARCHAR(70)
8         City  NVARCHAR(40)
9       

## Step 3: Basic Queries - Simple Data Extraction
Now that we understand the database structure, let's learn how to extract data with basic SQL queries. We'll go line by line through each query and explain what's happening.
## Query 1: Getting Tracks with their Album Titles

In [25]:
# QUERY 1: A simple query to get all albums

query = "SELECT * FROM album LIMIT 5"
result = pd.read_sql(query, engine)
print("albums:")
print(result)

albums:
   AlbumId                                  Title  ArtistId
0        1  For Those About To Rock We Salute You         1
1        2                      Balls to the Wall         2
2        3                      Restless and Wild         2
3        4                      Let There Be Rock         1
4        5                               Big Ones         3


In [26]:
# Query 2: Select specific columns

query = "SELECT AlbumId, Title FROM album LIMIT 5"
result = pd.read_sql(query, engine)
print("Albums with just ID and Title:")
print(result)

Albums with just ID and Title:
   AlbumId                                  Title
0        1  For Those About To Rock We Salute You
1        2                      Balls to the Wall
2        3                      Restless and Wild
3        4                      Let There Be Rock
4        5                               Big Ones


In [27]:
# Query 3: Using Column Aliases

query = "SELECT AlbumId as ID, Title as AlbumName FROM album LIMIT 5"
result = pd.read_sql(query, engine)
print("Albums with renamed columns:")
print(result)

Albums with renamed columns:
   ID                              AlbumName
0   1  For Those About To Rock We Salute You
1   2                      Balls to the Wall
2   3                      Restless and Wild
3   4                      Let There Be Rock
4   5                               Big Ones


In [None]:
# ADDING A WHERE CLAUSE FOR FILTERING
# Query 4: Filtering with WHERE
query = "SELECT AlbumId, Title FROM album WHERE AlbumId < 10"
result = pd.read_sql(query, engine)
print("Albums with AlbumId less than 10:")
print(result)

Albums with AlbumId less than 10:
   AlbumId                                  Title
0        1  For Those About To Rock We Salute You
1        2                      Balls to the Wall
2        3                      Restless and Wild
3        4                      Let There Be Rock
4        5                               Big Ones
5        6                     Jagged Little Pill
6        7                               Facelift
7        8                         Warner 25 Anos
8        9         Plays Metallica By Four Cellos


In [29]:
# USING TEXT in WHERE CLAUSE
# Query 5: Filtering text columns
query = "SELECT AlbumId, Title FROM album WHERE Title = 'Big Ones' "
result = pd.read_sql(query, engine)
print("Albums with Title 'Big Ones':")
print(result)

Albums with Title 'Big Ones':
   AlbumId     Title
0        5  Big Ones


In [30]:
# Query 6: Pattern matching with LIKE
query = "SELECT AlbumId, Title FROM album WHERE Title LIKE '%Rock%'"
result = pd.read_sql(query, engine)
print("Albums with 'Rock' in the title:")
print(result)

Albums with 'Rock' in the title:
   AlbumId                                              Title
0        1              For Those About To Rock We Salute You
1        4                                  Let There Be Rock
2       59                                Deep Purple In Rock
3      108                                  Rock In Rio [CD1]
4      109                                  Rock In Rio [CD2]
5      213  Pure Cult: The Best Of The Cult (For Rockers, ...
6      216                      Hot Rocks, 1964-1971 (Disc 1)


In [31]:
# USING ORDER BY
# Query 7: Sorting result using ORDER BY
query = "SELECT AlbumId, Title FROM Album ORDER BY Title LIMIT 10"
result = pd.read_sql(query, engine)
print("Albums ordered alphabetically by title:")
print(result)

Albums ordered alphabetically by title:
   AlbumId                                              Title
0      156                             ...And Justice For All
1      257  20th Century Masters - The Millennium Collecti...
2      296                      A Copland Celebration, Vol. I
3       94                         A Matter of Life and Death
4       95                                    A Real Dead One
5       96                                    A Real Live One
6      285                                 A Soprano Inspired
7      139         A TempestadeTempestade Ou O Livro Dos Dias
8      203                                            A-Sides
9      160                                      Ace Of Spades


In [33]:
# Query 8: Sorting Descending order (Ascending is default)
query = "SELECT AlbumId, Title FROM album ORDER BY AlbumId DESC LIMIT 10"
result = pd.read_sql(query, engine)
print("Albums ordered in Descending order on AlbumID:")
print(result)

Albums ordered in Descending order on AlbumID:
   AlbumId                                              Title
0      347  Koyaanisqatsi (Soundtrack from the Motion Pict...
1      346                              Mozart: Chamber Music
2      345                                Monteverdi: L'Orfeo
3      344  Schubert: The Late String Quartets & String Qu...
4      343                             Respighi:Pines of Rome
5      342  Locatelli: Concertos for Violin, Strings and C...
6      341  Great Recordings of the Century - Shubert: Sch...
7      340        Liszt - 12 Études D'Execution Transcendante
8      339  Great Recordings of the Century: Paganini's 24...
9      338                        Nielsen: The Six Symphonies


In [36]:
# COMBINING WHERE AND ORDER BY
query = """
SELECT AlbumId, Title
FROM Album
WHERE AlbumId > 100
ORDER BY Title
LIMIT 20
"""

result = pd.read_sql(query, engine)
print("Filtered and ordered albums:")
print(result)

Filtered and ordered albums:
    AlbumId                                              Title
0       156                             ...And Justice For All
1       257  20th Century Masters - The Millennium Collecti...
2       296                      A Copland Celebration, Vol. I
3       285                                 A Soprano Inspired
4       139         A TempestadeTempestade Ou O Livro Dos Dias
5       203                                            A-Sides
6       160                                      Ace Of Spades
7       232                                       Achtung Baby
8       224                                           Acústico
9       167                                       Acústico MTV
10      307                   Adams, John: The Chairman Dances
11      272  Adorate Deum: Gregorian Chant from the Proper ...
12      233                    All That You Can't Leave Behind
13      273                                  Allegri: Miserere
14      248               