In [39]:
import requests
import json
import sqlite3
import os
from dotenv import load_dotenv

## Project: looking at my top 100 songs
I am going to use Spotify's API to access 4 playlists: my top 100 songs from 2022, 2023, 2024, and 2025 (migrated from Apple Music). Then, I am going to create different SQL tables with to explore relationships between all of my favorite songs from the past 4 years.

In [40]:
# # Request a bearer acess token given my Spotify client id / client secret

# Load environment variables from .env file
load_dotenv()

# Retrieve the credentials
client_id = os.getenv("SPOTIFY_CLIENT_ID")
client_secret = os.getenv("SPOTIFY_CLIENT_SECRET")

# Request a bearer access token using the Spotify API
url = "https://accounts.spotify.com/api/token"
data = {
    "grant_type": "client_credentials",
    "client_id": client_id,
    "client_secret": client_secret,
}

response = requests.post(url, data=data)
response_data = response.json()

authorization = f"Bearer {response_data['access_token']}"

# Using that authorization, access each playlist
# Turn it into text, then turn it into a dictorionary using json.loads

# For 2022
playlist_2022 = "https://api.spotify.com/v1/playlists/71zUjPqRk9OxTqpXYEmzlA/tracks"
playlist_2022_info = requests.get(url=playlist_2022, headers={"Authorization": authorization})
res_2022 = json.loads(playlist_2022_info.text)

# For 2023
playlist_2023 = "https://api.spotify.com/v1/playlists/28b3xd3RpAvarZB5PKr6Nx/tracks"
playlist_2023_info = requests.get(
    url=playlist_2023, headers={"Authorization": authorization}
)
res_2023 = json.loads(playlist_2023_info.text)

# For 2024
playlist_2024 = "https://api.spotify.com/v1/playlists/5P6ptyrOrEwoxn7A3qhC7F/tracks"
playlist_2024_info = requests.get(
    url=playlist_2024, headers={"Authorization": authorization}
)
res_2024 = json.loads(playlist_2024_info.text)

# For 2025 (so far)
playlist_2025 = "https://api.spotify.com/v1/playlists/0VLFZelqY8r0g544QAy8zv/tracks"
playlist_2025_info = requests.get(
    url=playlist_2025, headers={"Authorization": authorization}
)
res_2025 = json.loads(playlist_2025_info.text)

### My SQL Plan:
- Create table PLAYLISTS to hold information about my four playlists (top 100 songs for 2022, 2023, 2024, 2025)
- Create table SONGS to hold information about the unique songs in all of these playlists
- Create table SONG ON PLAYLIST to relate the two tables above (song ID and playlist ID will relate this)

In [41]:
conn = sqlite3.connect("favorite_songs.db")
cursor = conn.cursor()

In [42]:
cursor.execute("""CREATE TABLE IF NOT EXISTS playlists (
               id text,
               name text
               )""")

cursor.execute("INSERT OR IGNORE INTO playlists VALUES ('71zUjPqRk9OxTqpXYEmzlA', 'Replay 2022')")
cursor.execute("INSERT OR IGNORE INTO playlists VALUES ('28b3xd3RpAvarZB5PKr6Nx', 'Replay 2023')")
cursor.execute("INSERT OR IGNORE INTO playlists VALUES ('5P6ptyrOrEwoxn7A3qhC7F', 'Replay 2024')")
cursor.execute("INSERT OR IGNORE INTO playlists VALUES ('0VLFZelqY8r0g544QAy8zv', 'Replay 2025')")

conn.commit()

In [43]:
def get_song_info(playlist_res):
    """Given the dictionary created above, access all of this information
    about each song (should be 100 songs) on the playlist.
    Return the result as a list of tuples for each song"""

    # Holds all of the tuples, one tuple for each song
    song_info = []

    # Iterate through each song
    for i in range(0, len(playlist_res["items"])):
        song_name = playlist_res["items"][i]["track"]["name"]
        song_id = playlist_res["items"][i]["track"]["id"]
        song_artist = playlist_res["items"][i]["track"]["artists"][0]["name"]
        song_artist_id = playlist_res["items"][i]["track"]["artists"][0]["id"]
        song_runtime = playlist_res["items"][i]["track"]["duration_ms"]
        song_popularity = playlist_res["items"][i]["track"]["popularity"]
        song_album = playlist_res["items"][i]["track"]["album"]["name"]
        
        # Tuple holding song's info gets appended
        full_info = (song_id, song_name, song_artist, song_artist_id, song_runtime, song_popularity, song_album)
        song_info.append(full_info)

    return song_info

In [44]:
# cursor.execute(
#     """DROP TABLE Songs"""
# )

cursor.execute(
    """CREATE TABLE IF NOT EXISTS Songs (
    id text,
    name text,
    artist text,
    artist_id text,
    runtime real,
    popularity integer,
    album text
    )"""
)

conn.commit()


def insert_song_info(playlist_res):
    conn = sqlite3.connect("favorite_songs.db")
    cursor = conn.cursor()

    for song in get_song_info(playlist_res):
        id, name, artist, artist_id, runtime, popularity, album = song

        try:
            cursor.execute(
                """
                INSERT INTO Songs (id, name, artist, artist_id, runtime, popularity, album)
                SELECT ?, ?, ?, ?, ?, ?, ?
                WHERE NOT EXISTS (
                    SELECT 1 FROM Songs WHERE id = ?
                )
                """,
                (id, name, artist, artist_id, runtime, popularity, album, id),
            )
            conn.commit()
        except sqlite3.Error as e:
            print("SQLite error:", e)

    cursor.close()
    conn.close()


# Insert songs for multiple years
insert_song_info(res_2022)
insert_song_info(res_2023)
insert_song_info(res_2024)
insert_song_info(res_2025)

In [45]:
cursor.execute(
  """CREATE TABLE IF NOT EXISTS Songs_On_Playlist (
  Song_ID text,
  Playlist_ID text)"""
)

def insert_id_info(playlist_res, playlist_id):
    """Insert song - playlist ids to establish relationship"""
    conn = sqlite3.connect("favorite_songs.db")
    cursor = conn.cursor()

    for i in range(len(playlist_res["items"])):
        song_id = get_song_info(playlist_res)[i][0]

        # Insert only if the song-playlist pair does not already exist
        try:
            cursor.execute(
            """
            INSERT INTO Songs_On_Playlist (song_id, playlist_id)
            SELECT ?, ?
            WHERE NOT EXISTS (
                SELECT 1 FROM Songs_On_Playlist WHERE song_id = ? AND playlist_id = ?
            )
            """,
            (song_id, playlist_id, song_id, playlist_id),
        )

            conn.commit()
        except sqlite3.Error as e:
            print("SQLite error:", e)
        
    cursor.close()
    conn.close()


# Insert song-playlist relationships
insert_id_info(res_2022, "71zUjPqRk9OxTqpXYEmzlA")
insert_id_info(res_2023, "28b3xd3RpAvarZB5PKr6Nx")
insert_id_info(res_2024, "5P6ptyrOrEwoxn7A3qhC7F")
insert_id_info(res_2025, "0VLFZelqY8r0g544QAy8zv")

### Note about genres
Unfortunately, Spotify's API doesn't give a genre for a specific song or album, but it does sometimes provide an array of genres given an artist.

In [49]:
def get_artist_info(artist_id):
    artist_url = f"https://api.spotify.com/v1/artists/{artist_id}"
    artist_info = requests.get(
      url=artist_url, headers={"Authorization": authorization}
  )
    artist = json.loads(artist_info.text)["name"]
    genre = json.loads(artist_info.text)["genres"]

    if artist == "Taylor Swift":
        genre = 'Pop'
    elif genre == []:
        genre = "None"
    else:
        genre = genre[0]

    popularity = json.loads(artist_info.text)["popularity"]
    return artist, genre, popularity

# Taylor Swift apprently has no genres
print(get_artist_info("06HL4z0CvFAxyc27GXpf02"))

# Drake has rap and hip hop
print(get_artist_info("3TVXtAsR1Inumwj472S9r4"))

('Taylor Swift', 'Pop', 97)
('Drake', 'rap', 97)


In [50]:
cursor.execute(
  """DROP TABLE artists"""
)

cursor.execute(
  """CREATE TABLE IF NOT EXISTS artists (
  artist text,
  artist_id text,
  genre text,
  popularity integer)"""
)


def insert_artist_info(artist_id):
    artist, genre, popularity = get_artist_info(artist_id)

    cursor.execute(
        """
        INSERT INTO artists (artist, artist_id, genre, popularity)
        SELECT ?, ?, ?, ?
        WHERE NOT EXISTS (
            SELECT 1 FROM artists WHERE artist_id = ?
        )
        """,
        (artist, artist_id, genre, popularity, artist_id),  # Now includes 4 values
    )
    conn.commit()


# Get the distinct artists
artists = cursor.execute(
  """SELECT DISTINCT(Songs.artist_id)
  FROM Songs
  """
)
for artist_id in artists.fetchall():
  insert_artist_info(str(artist_id[0]))

## Exploration
Now that I have these three tables, I would like to do some exploration.

In [None]:
# Get the average popularity of
popularity = cursor.execute(
  """SELECT Playlists.name AS playlist_name, AVG(Songs.popularity) AS avg_popularity
  FROM Songs
  JOIN Songs_On_Playlist ON Songs.id = Songs_On_Playlist.Song_ID
  JOIN Playlists ON Songs_On_Playlist.Playlist_ID = Playlists.id
  GROUP BY Playlists.id
  ORDER BY avg_popularity DESC"""
)
popularity.fetchall()

[('Replay 2022', 60.18),
 ('Replay 2023', 54.24),
 ('Replay 2024', 49.61616161616162),
 ('Replay 2025', 45.43)]

This result suggests that when I was first getting into music in 2022, I listened to more mainstream songs that are still popular today. Then as the years went on, I started refining my music taste.

In [None]:
# What songs have been on my replay since 2022?
# Basically, I want the songs which have a repeat entry in Songs_On_Playlist (same SongID, different playlistID)
consistent_songs = cursor.execute(
    """SELECT Songs.name, Songs.artist, Songs.popularity, COUNT(*) as Freq
  FROM Songs
  JOIN Songs_On_Playlist ON Songs.id = Songs_On_Playlist.Song_ID
  GROUP BY Songs_On_Playlist.Song_ID
  ORDER BY Freq DESC"""
)
consistent_songs.fetchmany(15)

[('First Time', 'Hozier', 54, 3),
 ('Autumn Leaves - Album Version - (Take 1)', 'Bill Evans Trio', 35, 3),
 ('Jackie And Wilson', 'Hozier', 64, 3),
 ('august', 'Taylor Swift', 35, 3),
 ('cowboy like me', 'Taylor Swift', 62, 3),
 ('Part Of The Band', 'The 1975', 52, 3),
 ('Wintering', 'The 1975', 52, 3),
 ('Happiness', 'The 1975', 61, 3),
 ('Drops of Jupiter (Tell Me)', 'Train', 81, 3),
 ("September In The Rain - Live At Mister Kelly's, Chicago / 1957",
  'Sarah Vaughan',
  22,
  3),
 ('Daylight', 'Taylor Swift', 78, 3),
 ('About You', 'The 1975', 84, 3),
 ('Lover', 'Taylor Swift', 84, 3),
 ('All I Need To Hear', 'The 1975', 58, 3),
 ('Cruel Summer', 'Taylor Swift', 88, 3)]

In [None]:
# What is the average length of the songs I like?
# Ms to S = divide Ms by 60,000
song_lengths = cursor.execute(
  """SELECT AVG(Songs.runtime)/60000
  FROM Songs"""
)

song_lengths.fetchone()

(4.197860273224044,)

In [None]:
# How many distinct albums did I listen to?
# How many times did I have a favorite from a certain album?
# Note to self: HAVING is for use after aggregation (WHERE is not for aggregation)
num_albums = cursor.execute(
    """SELECT Songs.album, COUNT(*) as album_freq
       FROM Songs
       GROUP BY Songs.album
       HAVING album_freq > 2
       ORDER BY album_freq DESC
    """
)
num_albums.fetchall()

[('Midnights (3am Edition)', 12),
 ('Lover', 10),
 ('Being Funny In A Foreign Language', 10),
 ('folklore (deluxe version)', 9),
 ('reputation', 8),
 ('eternal sunshine (slightly deluxe)', 7),
 ('Unreal Unearth', 7),
 ('The 1975 (Deluxe)', 7),
 ("Red (Taylor's Version)", 6),
 ('I like it when you sleep, for you are so beautiful yet so unaware of it',
  6),
 ("Harry's House", 6),
 ('Loveseat', 5),
 ('the record', 4),
 ('A Brief Inquiry Into Online Relationships', 4),
 ('1989 (Deluxe)', 4),
 ('1989', 4),
 ('SABLE,', 3),
 ("Fearless (Taylor's Version)", 3),
 ('Come Away With Me (Remastered 2022)', 3),
 ('Certified Lover Boy', 3)]

In [None]:
# I want to look at how my listening of Taylor Swift has changed over the years
taylor_swift = cursor.execute(
    """SELECT playlists.name, Songs.artist, COUNT(*)
       FROM Songs
       JOIN Songs_On_Playlist sop ON Songs.id = sop.Song_ID
       JOIN playlists ON playlists.id = sop.Playlist_ID
       WHERE Songs.artist = 'Taylor Swift'
       GROUP BY playlists.name
    """
)

taylor_swift.fetchall()

[('Replay 2022', 'Taylor Swift', 480),
 ('Replay 2023', 'Taylor Swift', 192),
 ('Replay 2024', 'Taylor Swift', 48),
 ('Replay 2025', 'Taylor Swift', 16)]

In [None]:
betty_ts = cursor.execute(
    """SELECT playlists.name, Song.artist, Song.name
    FROM Songs
    JOIN Songs_On_Playlist sop ON Songs.id = sop.Song_ID
    WHERE Songs.name = 'Betty'
"""
)
betty_ts.fetchall()

OperationalError: no such column: playlists.name

In [None]:
conn.close()