In [1]:
import os
import spotipy
import pandas as pd
from spotipy.oauth2 import SpotifyClientCredentials

##### Connect to the Spotify API

In [2]:
client_credentials_manager = SpotifyClientCredentials(client_id=os.environ.get("spotify_client_id"), client_secret=os.environ.get("spotify_client_secret"))
sp = spotipy.Spotify(client_credentials_manager=client_credentials_manager)

In [3]:
# Top Songs - Global
playlist_link = "https://open.spotify.com/playlist/37i9dQZEVXbNG2KDcFcKOF"
playlist_URI = playlist_link.split('/')[-1]

In [4]:
data = sp.playlist_tracks(playlist_URI)

##### Album DF

In [5]:
album_list = list()
for row in data['items']:
    album_id = row['track']['album']['id']
    album_name = row['track']['album']['name']
    album_release_date = row['track']['album']['release_date']
    album_total_tracks = row['track']['album']['total_tracks']
    album_url = row['track']['album']['external_urls']['spotify']
    album_element = {'album_id':album_id, 'album_name':album_name, 'release_date':album_release_date, 'total_tracks':album_total_tracks, 'url':album_url}
    album_list.append(album_element)

_list of dictionaries_ elements: [{}, {}, ... {}]

In [6]:
album_list

[{'album_id': '1bBez9PNvkJPW08bU7NYta',
  'album_name': 'Scarlet',
  'release_date': '2023-09-20',
  'total_tracks': 15,
  'url': 'https://open.spotify.com/album/1bBez9PNvkJPW08bU7NYta'},
 {'album_id': '53985D8g3JcGBoULSOYYKX',
  'album_name': 'Seven (feat. Latto)',
  'release_date': '2023-07-14',
  'total_tracks': 3,
  'url': 'https://open.spotify.com/album/53985D8g3JcGBoULSOYYKX'},
 {'album_id': '0fJ0beklzFwoBv01U8792s',
  'album_name': '3D : The Remixes',
  'release_date': '2023-10-02',
  'total_tracks': 7,
  'url': 'https://open.spotify.com/album/0fJ0beklzFwoBv01U8792s'},
 {'album_id': '3UOV8XvCwMKaATRNXrYCjN',
  'album_name': 'greedy',
  'release_date': '2023-09-15',
  'total_tracks': 1,
  'url': 'https://open.spotify.com/album/3UOV8XvCwMKaATRNXrYCjN'},
 {'album_id': '1NAmidJlEaVgA3MpcPFYGq',
  'album_name': 'Lover',
  'release_date': '2019-08-23',
  'total_tracks': 18,
  'url': 'https://open.spotify.com/album/1NAmidJlEaVgA3MpcPFYGq'},
 {'album_id': '18ogtNq9F7DmMkNYO6Xb4k',
  'al

In [7]:
album_df = pd.DataFrame.from_dict(album_list)
album_df.head()

Unnamed: 0,album_id,album_name,release_date,total_tracks,url
0,1bBez9PNvkJPW08bU7NYta,Scarlet,2023-09-20,15,https://open.spotify.com/album/1bBez9PNvkJPW08...
1,53985D8g3JcGBoULSOYYKX,Seven (feat. Latto),2023-07-14,3,https://open.spotify.com/album/53985D8g3JcGBoU...
2,0fJ0beklzFwoBv01U8792s,3D : The Remixes,2023-10-02,7,https://open.spotify.com/album/0fJ0beklzFwoBv0...
3,3UOV8XvCwMKaATRNXrYCjN,greedy,2023-09-15,1,https://open.spotify.com/album/3UOV8XvCwMKaATR...
4,1NAmidJlEaVgA3MpcPFYGq,Lover,2019-08-23,18,https://open.spotify.com/album/1NAmidJlEaVgA3M...


In [8]:
album_df.shape

(50, 5)

In [9]:
album_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 50 entries, 0 to 49
Data columns (total 5 columns):
 #   Column        Non-Null Count  Dtype 
---  ------        --------------  ----- 
 0   album_id      50 non-null     object
 1   album_name    50 non-null     object
 2   release_date  50 non-null     object
 3   total_tracks  50 non-null     int64 
 4   url           50 non-null     object
dtypes: int64(1), object(4)
memory usage: 2.1+ KB


`album_id`: is PK

`release_date`: should be `datetime`

In [10]:
album_df.drop_duplicates(subset=['album_id'], inplace=True)

##### Artist DF

List of key-value tuples

`dict_var.items() --> [(key1, value1), ()...]`

In [11]:
artist_list = list()
for row in data['items']:
    for key, value in row.items():
        if key == 'track':
            for artist in value['album']['artists']:
                artist_dict = {'artist_id':artist['id'], 'artist_name':artist['name'], 'artist_url':artist['href']}
                artist_list.append(artist_dict)

In [12]:
artist_df = pd.DataFrame.from_dict(artist_list)
artist_df.drop_duplicates(subset=['artist_id'], inplace=True)
artist_df.shape

(52, 3)

In [13]:
artist_df.head()

Unnamed: 0,artist_id,artist_name,artist_url
0,5cj0lLjcoR7YOSnhnX0Po5,Doja Cat,https://api.spotify.com/v1/artists/5cj0lLjcoR7...
1,6HaGTQPmzraVmaVxvz6EUc,Jung Kook,https://api.spotify.com/v1/artists/6HaGTQPmzra...
2,3MdXrJWsbVzdn6fe5JYkSQ,Latto,https://api.spotify.com/v1/artists/3MdXrJWsbVz...
4,2LIk90788K0zvyj2JJVwkJ,Jack Harlow,https://api.spotify.com/v1/artists/2LIk90788K0...
5,45dkTj5sMRSjrmBSBeiHym,Tate McRae,https://api.spotify.com/v1/artists/45dkTj5sMRS...


##### Song DF

In [14]:
song_list = list()
for row in data['items']:
    song_id = row['track']['id']
    song_name = row['track']['name']
    song_duration = row['track']['duration_ms']
    song_url = row['track']['external_urls']['spotify']
    song_popularity = row['track']['popularity']
    song_added = row['added_at']
    album_id = row['track']['album']['id']
    artist_id = row['track']['artists'][0]['id']
    song_element = {'song_id': song_id, 'song_name':song_name, 'song_duration':song_duration, 'song_url':song_url, 'song_popularity':song_popularity, 'song_added':song_added, 'album_id':album_id, 'artist_id':artist_id  }

    song_list.append(song_element)

In [15]:
song_list

[{'song_id': '56y1jOTK0XSvJzVv9vHQBK',
  'song_name': 'Paint The Town Red',
  'song_duration': 230480,
  'song_url': 'https://open.spotify.com/track/56y1jOTK0XSvJzVv9vHQBK',
  'song_popularity': 85,
  'song_added': '2023-10-06T10:09:15Z',
  'album_id': '1bBez9PNvkJPW08bU7NYta',
  'artist_id': '5cj0lLjcoR7YOSnhnX0Po5'},
 {'song_id': '7x9aauaA9cu6tyfpHnqDLo',
  'song_name': 'Seven (feat. Latto) (Explicit Ver.)',
  'song_duration': 184400,
  'song_url': 'https://open.spotify.com/track/7x9aauaA9cu6tyfpHnqDLo',
  'song_popularity': 97,
  'song_added': '2023-10-06T10:09:15Z',
  'album_id': '53985D8g3JcGBoULSOYYKX',
  'artist_id': '6HaGTQPmzraVmaVxvz6EUc'},
 {'song_id': '01qFKNWq73UfEslI0GvumE',
  'song_name': '3D (feat. Jack Harlow)',
  'song_duration': 201812,
  'song_url': 'https://open.spotify.com/track/01qFKNWq73UfEslI0GvumE',
  'song_popularity': 84,
  'song_added': '2023-10-06T10:09:15Z',
  'album_id': '0fJ0beklzFwoBv01U8792s',
  'artist_id': '6HaGTQPmzraVmaVxvz6EUc'},
 {'song_id': '3r

In [16]:
song_df = pd.DataFrame.from_dict(song_list)
song_df.head()

Unnamed: 0,song_id,song_name,song_duration,song_url,song_popularity,song_added,album_id,artist_id
0,56y1jOTK0XSvJzVv9vHQBK,Paint The Town Red,230480,https://open.spotify.com/track/56y1jOTK0XSvJzV...,85,2023-10-06T10:09:15Z,1bBez9PNvkJPW08bU7NYta,5cj0lLjcoR7YOSnhnX0Po5
1,7x9aauaA9cu6tyfpHnqDLo,Seven (feat. Latto) (Explicit Ver.),184400,https://open.spotify.com/track/7x9aauaA9cu6tyf...,97,2023-10-06T10:09:15Z,53985D8g3JcGBoULSOYYKX,6HaGTQPmzraVmaVxvz6EUc
2,01qFKNWq73UfEslI0GvumE,3D (feat. Jack Harlow),201812,https://open.spotify.com/track/01qFKNWq73UfEsl...,84,2023-10-06T10:09:15Z,0fJ0beklzFwoBv01U8792s,6HaGTQPmzraVmaVxvz6EUc
3,3rUGC1vUpkDG9CZFHMur1t,greedy,131872,https://open.spotify.com/track/3rUGC1vUpkDG9CZ...,96,2023-10-06T10:09:15Z,3UOV8XvCwMKaATRNXrYCjN,45dkTj5sMRSjrmBSBeiHym
4,1BxfuPKGuaTgP7aM0Bbdwr,Cruel Summer,178426,https://open.spotify.com/track/1BxfuPKGuaTgP7a...,99,2023-10-06T10:09:15Z,1NAmidJlEaVgA3MpcPFYGq,06HL4z0CvFAxyc27GXpf02


In [17]:
song_df.drop_duplicates(subset=['song_id'], inplace=True)

Convert columns to type _datetime_

In [18]:
album_df['release_date'] = pd.to_datetime(album_df['release_date'])

In [19]:
song_df['song_added'] = pd.to_datetime(song_df['song_added'])