## Import Libraries

In [159]:
#!pip install spotipy

In [1]:
import spotipy
from spotipy.oauth2 import SpotifyClientCredentials
import pandas as pd

In [2]:
#add client_id and client_secret key
client_credentials_manager = SpotifyClientCredentials(client_id="XX", client_secret="XX")

In [3]:
sp = spotipy.Spotify(client_credentials_manager = client_credentials_manager)

## Discover Weekly - Updates Every Monday

In [2]:
playlist_link = "https://open.spotify.com/playlist/XX"

In [3]:
playlist_URI = playlist_link.split("/")[-1].split('?')[0]

In [93]:
#Get full details of the tracks of a playlist:  playlist_tracks(playlist_id, fields=None, limit=100, offset=0, market=None, additional_types=('track', ))
data = sp.playlist_tracks(playlist_URI) 

In [94]:
print(type(data))

<class 'dict'>


In [96]:
#No. of song in the list
print(len(data['items']))

30


In [97]:
#Each element in the list contain a dictionary
print(data['items'][0].keys())

dict_keys(['added_at', 'added_by', 'is_local', 'primary_color', 'track', 'video_thumbnail'])


In [98]:
print(data['items'][0]['track'].keys())

dict_keys(['album', 'artists', 'available_markets', 'disc_number', 'duration_ms', 'episode', 'explicit', 'external_ids', 'external_urls', 'href', 'id', 'is_local', 'name', 'popularity', 'preview_url', 'track', 'track_number', 'type', 'uri'])


In [99]:
print(data['items'][0]['added_by'].keys())

dict_keys(['external_urls', 'href', 'id', 'type', 'uri'])


## Artist Table

In [100]:
print(data['items'][0]['track']['artists'])

[{'external_urls': {'spotify': 'https://open.spotify.com/artist/3WGpXCj9YhhfX11TToZcXP'}, 'href': 'https://api.spotify.com/v1/artists/3WGpXCj9YhhfX11TToZcXP', 'id': '3WGpXCj9YhhfX11TToZcXP', 'name': 'Troye Sivan', 'type': 'artist', 'uri': 'spotify:artist:3WGpXCj9YhhfX11TToZcXP'}]


In [101]:
print(data['items'][0]['track']['artists'][0]['name'])
print(data['items'][0]['track']['artists'][0]['id'])
print(data['items'][0]['track']['artists'][0]['external_urls']['spotify'])

Troye Sivan
3WGpXCj9YhhfX11TToZcXP
https://open.spotify.com/artist/3WGpXCj9YhhfX11TToZcXP


In [102]:
artist_list = []
for row in data['items']: #data['items'] is a list
    for key, value in row.items(): #Each element in the list is a dict
        if key == "track":         #Track is one of the keys
            for artist in value['artists']:
                artist_dict = {'artist_id':artist['id'], 'artist_name':artist['name'], 'spotify_url': artist['external_urls']['spotify']}
                artist_list.append(artist_dict)

In [156]:
artist_df = pd.DataFrame(artist_list)
artist_df = artist_df.drop_duplicates(subset=['artist_id'])
display(artist_df.head())
display(artist_df.info())
display(artist_df.dtypes)

Unnamed: 0,artist_id,artist_name,spotify_url
0,3WGpXCj9YhhfX11TToZcXP,Troye Sivan,https://open.spotify.com/artist/3WGpXCj9YhhfX1...
1,0jUQSUOcM7lxVn5eVGTkzQ,Justine Skye,https://open.spotify.com/artist/0jUQSUOcM7lxVn...
2,5LHRHt1k9lMyONurDHEdrp,Tyga,https://open.spotify.com/artist/5LHRHt1k9lMyON...
3,46xBNx0j6cwY6sD9LgMTm1,Rosa Linn,https://open.spotify.com/artist/46xBNx0j6cwY6s...
4,3sXwEUqxSzb11VpuFa5cvJ,Stacey Ryan,https://open.spotify.com/artist/3sXwEUqxSzb11V...


<class 'pandas.core.frame.DataFrame'>
Int64Index: 39 entries, 0 to 38
Data columns (total 3 columns):
 #   Column       Non-Null Count  Dtype 
---  ------       --------------  ----- 
 0   artist_id    39 non-null     object
 1   artist_name  39 non-null     object
 2   spotify_url  39 non-null     object
dtypes: object(3)
memory usage: 1.2+ KB


None

artist_id      object
artist_name    object
spotify_url    object
dtype: object

## Album Table

In [158]:
data['items'][0]['track']['album'].keys()

dict_keys(['album_group', 'album_type', 'artists', 'available_markets', 'external_urls', 'href', 'id', 'images', 'name', 'release_date', 'release_date_precision', 'total_tracks', 'type', 'uri'])

In [113]:
print(data['items'][0]['track']['album']['name'])
print(data['items'][0]['track']['album']['release_date_precision'])
print(data['items'][0]['track']['album']['release_date'])
print(data['items'][0]['track']['album']['total_tracks'])
print(data['items'][1]['track']['album']['external_urls']['spotify'])

Angel Baby
day
2021-09-09
1
https://open.spotify.com/album/0FvZIwN0AHWYQVyuS3KR7H


In [147]:
album_list = []
for row in data['items']:
    album_id = row['track']['album']['id']
    album_name = row['track']['album']['name']
    album_release_date = row['track']['album']['release_date']
    album_total_tracks = row['track']['album']['total_tracks']
    album_url = row['track']['album']['external_urls']['spotify']
    album_element = {'album_id':album_id,'name':album_name,'release_date':album_release_date,
                        'total_tracks':album_total_tracks,'url':album_url}
    album_list.append(album_element)

In [154]:
album_df = pd.DataFrame(album_list)

#Convert release_date object to date object
album_df['release_date'] = pd.to_datetime(album_df['release_date'])
album_df = album_df.drop_duplicates(subset=['album_id'])

In [155]:
display(album_df.head())
display(album_df.info())
display(album_df.dtypes)

Unnamed: 0,album_id,name,release_date,total_tracks,url
0,44CdsgXhU5R2esprq0tf43,Angel Baby,2021-09-09,1,https://open.spotify.com/album/44CdsgXhU5R2esp...
1,0FvZIwN0AHWYQVyuS3KR7H,Dark Side,2023-01-31,10,https://open.spotify.com/album/0FvZIwN0AHWYQVy...
2,3CVEB0FPk25Ds64ALgxjH7,SNAP PACK,2022-09-30,7,https://open.spotify.com/album/3CVEB0FPk25Ds64...
3,0ASnNAycI0eu91gMm9Jfe4,Fall In Love Alone,2022-05-13,1,https://open.spotify.com/album/0ASnNAycI0eu91g...
4,1HJ34zQqSqNvZeO2W6dE01,Pano,2021-12-06,1,https://open.spotify.com/album/1HJ34zQqSqNvZeO...


<class 'pandas.core.frame.DataFrame'>
Int64Index: 30 entries, 0 to 29
Data columns (total 5 columns):
 #   Column        Non-Null Count  Dtype         
---  ------        --------------  -----         
 0   album_id      30 non-null     object        
 1   name          30 non-null     object        
 2   release_date  30 non-null     datetime64[ns]
 3   total_tracks  30 non-null     int64         
 4   url           30 non-null     object        
dtypes: datetime64[ns](1), int64(1), object(3)
memory usage: 1.4+ KB


None

album_id                object
name                    object
release_date    datetime64[ns]
total_tracks             int64
url                     object
dtype: object

## Song Table

In [115]:
data['items'][0]['track'].keys()

dict_keys(['album', 'artists', 'available_markets', 'disc_number', 'duration_ms', 'episode', 'explicit', 'external_ids', 'external_urls', 'href', 'id', 'is_local', 'name', 'popularity', 'preview_url', 'track', 'track_number', 'type', 'uri'])

In [151]:
song_list = []
for row in data['items']:
    song_id = row['track']['id']
    song_name = row['track']['name']
    song_duration_mins = round((row['track']['duration_ms']/(60*1000)),2)
    song_url = row['track']['external_urls']['spotify']
    song_popularity = row['track']['popularity']
    song_added = row['added_at']
    album_id = row['track']['album']['id']
    artist_id = row['track']['album']['artists'][0]['id']
    
    song_element = {'song_id':song_id,'song_name':song_name,'duration_mins':song_duration_mins,'url':song_url,
                    'popularity':song_popularity,'song_added':song_added,'album_id':album_id,
                    'artist_id':artist_id
                   }
    song_list.append(song_element)

In [152]:
song_df = pd.DataFrame(song_list)
song_df['song_added'] = pd.to_datetime(song_df['song_added'])

In [153]:
display(song_df.head())
display(song_df.info())
display(song_df.dtypes)

Unnamed: 0,song_id,song_name,duration_mins,url,popularity,song_added,album_id,artist_id
0,2m6Ko3CY1qXNNja8AlugNc,Angel Baby,3.68,https://open.spotify.com/track/2m6Ko3CY1qXNNja...,78,2023-05-07 16:00:00+00:00,44CdsgXhU5R2esprq0tf43,3WGpXCj9YhhfX11TToZcXP
1,2p6Fh5ruk8YUaO43huubHW,Collide (Sped Up Remix),3.69,https://open.spotify.com/track/2p6Fh5ruk8YUaO4...,76,2023-05-07 16:00:00+00:00,0FvZIwN0AHWYQVyuS3KR7H,0jUQSUOcM7lxVn5eVGTkzQ
2,46cdw28EXOhDPnD1emDC6T,SNAP - High and Fast,2.27,https://open.spotify.com/track/46cdw28EXOhDPnD...,76,2023-05-07 16:00:00+00:00,3CVEB0FPk25Ds64ALgxjH7,46xBNx0j6cwY6sD9LgMTm1
3,5xwBIieMMFUmLDgvG4DjFe,Fall In Love Alone,3.42,https://open.spotify.com/track/5xwBIieMMFUmLDg...,84,2023-05-07 16:00:00+00:00,0ASnNAycI0eu91gMm9Jfe4,3sXwEUqxSzb11VpuFa5cvJ
4,08MFgEQeVLF37EyZ7jcwLc,Pano,4.24,https://open.spotify.com/track/08MFgEQeVLF37Ey...,80,2023-05-07 16:00:00+00:00,1HJ34zQqSqNvZeO2W6dE01,67IN4cLJ7798gUapyZlmac


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 30 entries, 0 to 29
Data columns (total 8 columns):
 #   Column         Non-Null Count  Dtype              
---  ------         --------------  -----              
 0   song_id        30 non-null     object             
 1   song_name      30 non-null     object             
 2   duration_mins  30 non-null     float64            
 3   url            30 non-null     object             
 4   popularity     30 non-null     int64              
 5   song_added     30 non-null     datetime64[ns, UTC]
 6   album_id       30 non-null     object             
 7   artist_id      30 non-null     object             
dtypes: datetime64[ns, UTC](1), float64(1), int64(1), object(5)
memory usage: 2.0+ KB


None

song_id                       object
song_name                     object
duration_mins                float64
url                           object
popularity                     int64
song_added       datetime64[ns, UTC]
album_id                      object
artist_id                     object
dtype: object

In [170]:
song_df

Unnamed: 0,song_id,song_name,duration_mins,url,popularity,song_added,album_id,artist_id
0,2m6Ko3CY1qXNNja8AlugNc,Angel Baby,3.68,https://open.spotify.com/track/2m6Ko3CY1qXNNja...,78,2023-05-07 16:00:00+00:00,44CdsgXhU5R2esprq0tf43,3WGpXCj9YhhfX11TToZcXP
1,2p6Fh5ruk8YUaO43huubHW,Collide (Sped Up Remix),3.69,https://open.spotify.com/track/2p6Fh5ruk8YUaO4...,76,2023-05-07 16:00:00+00:00,0FvZIwN0AHWYQVyuS3KR7H,0jUQSUOcM7lxVn5eVGTkzQ
2,46cdw28EXOhDPnD1emDC6T,SNAP - High and Fast,2.27,https://open.spotify.com/track/46cdw28EXOhDPnD...,76,2023-05-07 16:00:00+00:00,3CVEB0FPk25Ds64ALgxjH7,46xBNx0j6cwY6sD9LgMTm1
3,5xwBIieMMFUmLDgvG4DjFe,Fall In Love Alone,3.42,https://open.spotify.com/track/5xwBIieMMFUmLDg...,84,2023-05-07 16:00:00+00:00,0ASnNAycI0eu91gMm9Jfe4,3sXwEUqxSzb11VpuFa5cvJ
4,08MFgEQeVLF37EyZ7jcwLc,Pano,4.24,https://open.spotify.com/track/08MFgEQeVLF37Ey...,80,2023-05-07 16:00:00+00:00,1HJ34zQqSqNvZeO2W6dE01,67IN4cLJ7798gUapyZlmac
5,3W4U7TEgILGpq0EmquurtH,Old Love,4.16,https://open.spotify.com/track/3W4U7TEgILGpq0E...,83,2023-05-07 16:00:00+00:00,6mKwqGY2IS3qSs3mgs30A5,5kjFzBMHeoAx9xksFSwfUW
6,4Wui4miiL1JR3QqWcbcKwD,Love Me Like You Do,4.23,https://open.spotify.com/track/4Wui4miiL1JR3Qq...,66,2023-05-07 16:00:00+00:00,6xQjNm1pNxmXNhDh0GmAOC,0wzdbYD0TtDPvbjQ5QT7nY
7,3GWaFFi6TlPcZLYmCCHZ9O,Teenage Dream,2.97,https://open.spotify.com/track/3GWaFFi6TlPcZLY...,78,2023-05-07 16:00:00+00:00,1UH6aVsmnWRjsB5Tq0qUhF,3jTU1IOqkO7Mz4zdbXPose
8,5KeI897tZgjezHKvxlaOaR,Nonsense - Sped Up Version,2.29,https://open.spotify.com/track/5KeI897tZgjezHK...,70,2023-05-07 16:00:00+00:00,23eBqMqUmcb1wcxuXfYw6k,74KM79TiuVKeVCqs8QtB0B
9,5bvVA6idKl1R38C5G4tuxC,Know me,3.58,https://open.spotify.com/track/5bvVA6idKl1R38C...,63,2023-05-07 16:00:00+00:00,4iV5wZtnTcuKrdhxb8wLVN,67IWlRdLy3UcfY3q968euj
