Note: Notebook has been last re-run on Dec 2, 2020.  Thus the results in this Notebook show inf

### Setup

In [1]:
from my_credentials import my_client_id, my_client_secret

In [2]:
import spotipy
from spotipy.oauth2 import SpotifyClientCredentials

import pandas as pd

import sys
import os
import re

import requests
from bs4 import BeautifulSoup

In [3]:
# Create a spotipy client
SPOTIPY_CLIENT_ID= my_client_id
SPOTIPY_CLIENT_SECRET= my_client_secret

sp = spotipy.Spotify(client_credentials_manager=SpotifyClientCredentials(SPOTIPY_CLIENT_ID,SPOTIPY_CLIENT_SECRET))

### Get List of Playlists

In [4]:
# url = "https://open.spotify.com/genre/charts-regional"

In [5]:
file = 'Playlists.htm'
with open(file, 'r') as html_file:
    html = html_file.read()

In [6]:
soup = BeautifulSoup(html,'html.parser')

In [7]:
#Extract all the Playlist hrefs
href_list = []

for tag in soup.find_all("a"):
    href_list.append(tag['href'])

### Get Information about the playlists

In [8]:
# Playlist Names and IDs
playlists_infos = []

for href in href_list:
    info = []
    info.append(sp.playlist(href)['name'])
    info.append(sp.playlist(href)['id'])
    
    country = re.sub(" Top 50","",sp.playlist(href)['name'])
    info.append(country)
    
    playlists_infos.append(info)

In [9]:
playlists_infos[:5]

[['Global Top 50', '37i9dQZEVXbMDoHDwVN2tF', 'Global'],
 ['Argentina Top 50', '37i9dQZEVXbMMy2roB9myp', 'Argentina'],
 ['Australia Top 50', '37i9dQZEVXbJPcfkRz0wJ0', 'Australia'],
 ['Belgium Top 50', '37i9dQZEVXbJNSeeHswcKB', 'Belgium'],
 ['Bolivia Top 50', '37i9dQZEVXbJqfMFK4d691', 'Bolivia']]

In [10]:
playlists_df = pd.DataFrame(playlists_infos, columns =['Name', 'Playlist_Id', 'Country']) 
playlists_df

Unnamed: 0,Name,Playlist_Id,Country
0,Global Top 50,37i9dQZEVXbMDoHDwVN2tF,Global
1,Argentina Top 50,37i9dQZEVXbMMy2roB9myp,Argentina
2,Australia Top 50,37i9dQZEVXbJPcfkRz0wJ0,Australia
3,Belgium Top 50,37i9dQZEVXbJNSeeHswcKB,Belgium
4,Bolivia Top 50,37i9dQZEVXbJqfMFK4d691,Bolivia
...,...,...,...
57,Hungary Top 50,37i9dQZEVXbNHwMxAkvmF8,Hungary
58,Uruguay Top 50,37i9dQZEVXbMJJi3wgRbAy,Uruguay
59,United Kingdom Top 50,37i9dQZEVXbLnolsZ8PSNw,United Kingdom
60,United States Top 50,37i9dQZEVXbLRQDuF5jeBp,United States


### Get the Tracklists per Playlist

In [11]:
tracklists_extended = []

for playlist in playlists_df['Playlist_Id']:
    tracklist = []
    
    playlist_id = '/playlist/'+playlist
    tracks_list = sp.playlist_tracks(playlist_id, fields='items(track(id,href,name,duration_ms))')['items']
    
    tracklist.append(tracks_list)
    tracklist.append(playlist)
    
    tracklists_extended.append(tracklist)

In [12]:
#tracklists_extended[61]

In [13]:
#Create a list where each element is one of the playlists
all_tracks_with_playlistid = []

for tracklist in tracklists_extended:
    for num,element in enumerate(tracklist[0]):
        temp_list = []
        #print(element, "\n")
        temp_list.append(element)
        temp_list.append(num+1)
        temp_list.append(tracklist[1])
        all_tracks_with_playlistid.append(temp_list)

In [14]:
for track in all_tracks_with_playlistid:
    track.append(track[0]['track']['id'])
    track.append(track[0]['track']['name'])

In [15]:
for track in all_tracks_with_playlistid:
    del track[0]  
    #print(track[0])
    #print(track[1])
    #print(track[2])
    #print(track[3])

In [16]:
tracks_df = pd.DataFrame(all_tracks_with_playlistid, columns=['Chartposition','Playlist_Id','Track_Id','Track_Name'])

In [17]:
tracks_df.head()

Unnamed: 0,Chartposition,Playlist_Id,Track_Id,Track_Name
0,1,37i9dQZEVXbMDoHDwVN2tF,4MzXwWMhyBbmu6hOcLVD49,DÁKITI
1,2,37i9dQZEVXbMDoHDwVN2tF,0bYg9bo50gSsH3LtXe2SQn,All I Want for Christmas Is You
2,3,37i9dQZEVXbMDoHDwVN2tF,35mvY5S1H3J2QZyna3TFe0,positions
3,4,37i9dQZEVXbMDoHDwVN2tF,2XIc1pqjXV3Cr2BQUGNBck,LA NOCHE DE ANOCHE
4,5,37i9dQZEVXbMDoHDwVN2tF,2FRnf9qhLbvw8fu4IBXx78,Last Christmas


### Get Audio Feature Information

In [18]:
#sp.audio_features(track_id_list)

In [19]:
key_list = []

for trackid in tracks_df['Track_Id'][:1]:
    audiofeatures = sp.audio_features(trackid)
    
    
    key_list.append(audiofeatures[0].keys())
    
    #trackanalysis = [trackid, audiofeatures]
    #print(trackanalysis)  

In [21]:
trackfeatureslist = []
for trackid in tracks_df['Track_Id']:
    audiofeatures = sp.audio_features(trackid)
    features = []
    features.append(trackid)
    for k,v in audiofeatures[0].items():
        features.append(v)    
    trackfeatureslist.append(features)
    #trackanalysis = [trackid, audiofeatures]
    #print(trackanalysis)   

In [23]:
columnslist = ['Track_Id','danceability', 'energy', 'key', 'loudness', 'mode', 'speechiness','acousticness', 'instrumentalness', 'liveness', 'valence', 'tempo','type','id','uri','track_href', 'analysis_url', 'duration_ms', 'time_signature']

In [24]:
trackfeatures_df = pd.DataFrame(trackfeatureslist, columns = columnslist)

In [25]:
trackfeatures_df

Unnamed: 0,Track_Id,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,type,id,uri,track_href,analysis_url,duration_ms,time_signature
0,4MzXwWMhyBbmu6hOcLVD49,0.731,0.573,4,-10.059,0,0.0544,0.40100,0.000052,0.1130,0.145,109.928,audio_features,4MzXwWMhyBbmu6hOcLVD49,spotify:track:4MzXwWMhyBbmu6hOcLVD49,https://api.spotify.com/v1/tracks/4MzXwWMhyBbm...,https://api.spotify.com/v1/audio-analysis/4MzX...,205090,4
1,0bYg9bo50gSsH3LtXe2SQn,0.336,0.627,7,-7.463,1,0.0384,0.16400,0.000000,0.0708,0.350,150.273,audio_features,0bYg9bo50gSsH3LtXe2SQn,spotify:track:0bYg9bo50gSsH3LtXe2SQn,https://api.spotify.com/v1/tracks/0bYg9bo50gSs...,https://api.spotify.com/v1/audio-analysis/0bYg...,241107,4
2,35mvY5S1H3J2QZyna3TFe0,0.737,0.802,0,-4.771,1,0.0878,0.46800,0.000000,0.0931,0.682,144.015,audio_features,35mvY5S1H3J2QZyna3TFe0,spotify:track:35mvY5S1H3J2QZyna3TFe0,https://api.spotify.com/v1/tracks/35mvY5S1H3J2...,https://api.spotify.com/v1/audio-analysis/35mv...,172325,4
3,2XIc1pqjXV3Cr2BQUGNBck,0.856,0.618,7,-4.892,1,0.2860,0.03030,0.000000,0.0866,0.391,81.993,audio_features,2XIc1pqjXV3Cr2BQUGNBck,spotify:track:2XIc1pqjXV3Cr2BQUGNBck,https://api.spotify.com/v1/tracks/2XIc1pqjXV3C...,https://api.spotify.com/v1/audio-analysis/2XIc...,203201,4
4,2FRnf9qhLbvw8fu4IBXx78,0.735,0.478,2,-12.472,1,0.0293,0.18900,0.000002,0.3550,0.947,107.682,audio_features,2FRnf9qhLbvw8fu4IBXx78,spotify:track:2FRnf9qhLbvw8fu4IBXx78,https://api.spotify.com/v1/tracks/2FRnf9qhLbvw...,https://api.spotify.com/v1/audio-analysis/2FRn...,262960,4
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3095,79ME0P8ch9estARqe674HK,0.515,0.754,11,-3.773,1,0.2940,0.28200,0.000000,0.1250,0.365,140.062,audio_features,79ME0P8ch9estARqe674HK,spotify:track:79ME0P8ch9estARqe674HK,https://api.spotify.com/v1/tracks/79ME0P8ch9es...,https://api.spotify.com/v1/audio-analysis/79ME...,248571,4
3096,5Bti0azlFhMattVY76qFr9,0.617,0.212,0,-9.856,1,0.0334,0.95800,0.001570,0.1310,0.219,126.579,audio_features,5Bti0azlFhMattVY76qFr9,spotify:track:5Bti0azlFhMattVY76qFr9,https://api.spotify.com/v1/tracks/5Bti0azlFhMa...,https://api.spotify.com/v1/audio-analysis/5Bti...,261616,4
3097,2h1KRcol4TvqCl1Lf8RWio,0.777,0.729,1,-4.577,0,0.0495,0.37500,0.000000,0.3470,0.239,112.001,audio_features,2h1KRcol4TvqCl1Lf8RWio,spotify:track:2h1KRcol4TvqCl1Lf8RWio,https://api.spotify.com/v1/tracks/2h1KRcol4Tvq...,https://api.spotify.com/v1/audio-analysis/2h1K...,192857,4
3098,0J2otOvMdg62gVMMjwOvSK,0.534,0.499,9,-10.601,0,0.0416,0.04160,0.000041,0.2870,0.227,91.954,audio_features,0J2otOvMdg62gVMMjwOvSK,spotify:track:0J2otOvMdg62gVMMjwOvSK,https://api.spotify.com/v1/tracks/0J2otOvMdg62...,https://api.spotify.com/v1/audio-analysis/0J2o...,229589,4


## Merge all Dataframes

In [26]:
tracks_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3100 entries, 0 to 3099
Data columns (total 4 columns):
 #   Column         Non-Null Count  Dtype 
---  ------         --------------  ----- 
 0   Chartposition  3100 non-null   int64 
 1   Playlist_Id    3100 non-null   object
 2   Track_Id       3100 non-null   object
 3   Track_Name     3100 non-null   object
dtypes: int64(1), object(3)
memory usage: 97.0+ KB


In [27]:
trackfeatures_df.drop_duplicates(inplace=True)

In [28]:
trackfeatures_df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 1075 entries, 0 to 3099
Data columns (total 19 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   Track_Id          1075 non-null   object 
 1   danceability      1075 non-null   float64
 2   energy            1075 non-null   float64
 3   key               1075 non-null   int64  
 4   loudness          1075 non-null   float64
 5   mode              1075 non-null   int64  
 6   speechiness       1075 non-null   float64
 7   acousticness      1075 non-null   float64
 8   instrumentalness  1075 non-null   float64
 9   liveness          1075 non-null   float64
 10  valence           1075 non-null   float64
 11  tempo             1075 non-null   float64
 12  type              1075 non-null   object 
 13  id                1075 non-null   object 
 14  uri               1075 non-null   object 
 15  track_href        1075 non-null   object 
 16  analysis_url      1075 non-null   object 


In [29]:
tracks_all_info = tracks_df.merge(trackfeatures_df,'left', left_on='Track_Id', right_on='Track_Id')

In [30]:
tracks_all_info

Unnamed: 0,Chartposition,Playlist_Id,Track_Id,Track_Name,danceability,energy,key,loudness,mode,speechiness,...,liveness,valence,tempo,type,id,uri,track_href,analysis_url,duration_ms,time_signature
0,1,37i9dQZEVXbMDoHDwVN2tF,4MzXwWMhyBbmu6hOcLVD49,DÁKITI,0.731,0.573,4,-10.059,0,0.0544,...,0.1130,0.145,109.928,audio_features,4MzXwWMhyBbmu6hOcLVD49,spotify:track:4MzXwWMhyBbmu6hOcLVD49,https://api.spotify.com/v1/tracks/4MzXwWMhyBbm...,https://api.spotify.com/v1/audio-analysis/4MzX...,205090,4
1,2,37i9dQZEVXbMDoHDwVN2tF,0bYg9bo50gSsH3LtXe2SQn,All I Want for Christmas Is You,0.336,0.627,7,-7.463,1,0.0384,...,0.0708,0.350,150.273,audio_features,0bYg9bo50gSsH3LtXe2SQn,spotify:track:0bYg9bo50gSsH3LtXe2SQn,https://api.spotify.com/v1/tracks/0bYg9bo50gSs...,https://api.spotify.com/v1/audio-analysis/0bYg...,241107,4
2,3,37i9dQZEVXbMDoHDwVN2tF,35mvY5S1H3J2QZyna3TFe0,positions,0.737,0.802,0,-4.771,1,0.0878,...,0.0931,0.682,144.015,audio_features,35mvY5S1H3J2QZyna3TFe0,spotify:track:35mvY5S1H3J2QZyna3TFe0,https://api.spotify.com/v1/tracks/35mvY5S1H3J2...,https://api.spotify.com/v1/audio-analysis/35mv...,172325,4
3,4,37i9dQZEVXbMDoHDwVN2tF,2XIc1pqjXV3Cr2BQUGNBck,LA NOCHE DE ANOCHE,0.856,0.618,7,-4.892,1,0.2860,...,0.0866,0.391,81.993,audio_features,2XIc1pqjXV3Cr2BQUGNBck,spotify:track:2XIc1pqjXV3Cr2BQUGNBck,https://api.spotify.com/v1/tracks/2XIc1pqjXV3C...,https://api.spotify.com/v1/audio-analysis/2XIc...,203201,4
4,5,37i9dQZEVXbMDoHDwVN2tF,2FRnf9qhLbvw8fu4IBXx78,Last Christmas,0.735,0.478,2,-12.472,1,0.0293,...,0.3550,0.947,107.682,audio_features,2FRnf9qhLbvw8fu4IBXx78,spotify:track:2FRnf9qhLbvw8fu4IBXx78,https://api.spotify.com/v1/tracks/2FRnf9qhLbvw...,https://api.spotify.com/v1/audio-analysis/2FRn...,262960,4
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3095,46,37i9dQZEVXbLdGSmz6xilI,79ME0P8ch9estARqe674HK,Tiền Nhiều Để Làm Gì (feat. Gducky),0.515,0.754,11,-3.773,1,0.2940,...,0.1250,0.365,140.062,audio_features,79ME0P8ch9estARqe674HK,spotify:track:79ME0P8ch9estARqe674HK,https://api.spotify.com/v1/tracks/79ME0P8ch9es...,https://api.spotify.com/v1/audio-analysis/79ME...,248571,4
3096,47,37i9dQZEVXbLdGSmz6xilI,5Bti0azlFhMattVY76qFr9,Lạ Lùng,0.617,0.212,0,-9.856,1,0.0334,...,0.1310,0.219,126.579,audio_features,5Bti0azlFhMattVY76qFr9,spotify:track:5Bti0azlFhMattVY76qFr9,https://api.spotify.com/v1/tracks/5Bti0azlFhMa...,https://api.spotify.com/v1/audio-analysis/5Bti...,261616,4
3097,48,37i9dQZEVXbLdGSmz6xilI,2h1KRcol4TvqCl1Lf8RWio,The Right Journey (feat. Gducky),0.777,0.729,1,-4.577,0,0.0495,...,0.3470,0.239,112.001,audio_features,2h1KRcol4TvqCl1Lf8RWio,spotify:track:2h1KRcol4TvqCl1Lf8RWio,https://api.spotify.com/v1/tracks/2h1KRcol4Tvq...,https://api.spotify.com/v1/audio-analysis/2h1K...,192857,4
3098,49,37i9dQZEVXbLdGSmz6xilI,0J2otOvMdg62gVMMjwOvSK,Hoa Hải Đường,0.534,0.499,9,-10.601,0,0.0416,...,0.2870,0.227,91.954,audio_features,0J2otOvMdg62gVMMjwOvSK,spotify:track:0J2otOvMdg62gVMMjwOvSK,https://api.spotify.com/v1/tracks/0J2otOvMdg62...,https://api.spotify.com/v1/audio-analysis/0J2o...,229589,4


In [31]:
playlists_df.head()

Unnamed: 0,Name,Playlist_Id,Country
0,Global Top 50,37i9dQZEVXbMDoHDwVN2tF,Global
1,Argentina Top 50,37i9dQZEVXbMMy2roB9myp,Argentina
2,Australia Top 50,37i9dQZEVXbJPcfkRz0wJ0,Australia
3,Belgium Top 50,37i9dQZEVXbJNSeeHswcKB,Belgium
4,Bolivia Top 50,37i9dQZEVXbJqfMFK4d691,Bolivia


In [32]:
chart_infos = tracks_all_info.merge(playlists_df,'left', left_on='Playlist_Id', right_on='Playlist_Id')

In [33]:
chart_infos

Unnamed: 0,Chartposition,Playlist_Id,Track_Id,Track_Name,danceability,energy,key,loudness,mode,speechiness,...,tempo,type,id,uri,track_href,analysis_url,duration_ms,time_signature,Name,Country
0,1,37i9dQZEVXbMDoHDwVN2tF,4MzXwWMhyBbmu6hOcLVD49,DÁKITI,0.731,0.573,4,-10.059,0,0.0544,...,109.928,audio_features,4MzXwWMhyBbmu6hOcLVD49,spotify:track:4MzXwWMhyBbmu6hOcLVD49,https://api.spotify.com/v1/tracks/4MzXwWMhyBbm...,https://api.spotify.com/v1/audio-analysis/4MzX...,205090,4,Global Top 50,Global
1,2,37i9dQZEVXbMDoHDwVN2tF,0bYg9bo50gSsH3LtXe2SQn,All I Want for Christmas Is You,0.336,0.627,7,-7.463,1,0.0384,...,150.273,audio_features,0bYg9bo50gSsH3LtXe2SQn,spotify:track:0bYg9bo50gSsH3LtXe2SQn,https://api.spotify.com/v1/tracks/0bYg9bo50gSs...,https://api.spotify.com/v1/audio-analysis/0bYg...,241107,4,Global Top 50,Global
2,3,37i9dQZEVXbMDoHDwVN2tF,35mvY5S1H3J2QZyna3TFe0,positions,0.737,0.802,0,-4.771,1,0.0878,...,144.015,audio_features,35mvY5S1H3J2QZyna3TFe0,spotify:track:35mvY5S1H3J2QZyna3TFe0,https://api.spotify.com/v1/tracks/35mvY5S1H3J2...,https://api.spotify.com/v1/audio-analysis/35mv...,172325,4,Global Top 50,Global
3,4,37i9dQZEVXbMDoHDwVN2tF,2XIc1pqjXV3Cr2BQUGNBck,LA NOCHE DE ANOCHE,0.856,0.618,7,-4.892,1,0.2860,...,81.993,audio_features,2XIc1pqjXV3Cr2BQUGNBck,spotify:track:2XIc1pqjXV3Cr2BQUGNBck,https://api.spotify.com/v1/tracks/2XIc1pqjXV3C...,https://api.spotify.com/v1/audio-analysis/2XIc...,203201,4,Global Top 50,Global
4,5,37i9dQZEVXbMDoHDwVN2tF,2FRnf9qhLbvw8fu4IBXx78,Last Christmas,0.735,0.478,2,-12.472,1,0.0293,...,107.682,audio_features,2FRnf9qhLbvw8fu4IBXx78,spotify:track:2FRnf9qhLbvw8fu4IBXx78,https://api.spotify.com/v1/tracks/2FRnf9qhLbvw...,https://api.spotify.com/v1/audio-analysis/2FRn...,262960,4,Global Top 50,Global
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3095,46,37i9dQZEVXbLdGSmz6xilI,79ME0P8ch9estARqe674HK,Tiền Nhiều Để Làm Gì (feat. Gducky),0.515,0.754,11,-3.773,1,0.2940,...,140.062,audio_features,79ME0P8ch9estARqe674HK,spotify:track:79ME0P8ch9estARqe674HK,https://api.spotify.com/v1/tracks/79ME0P8ch9es...,https://api.spotify.com/v1/audio-analysis/79ME...,248571,4,Vietnam Top 50,Vietnam
3096,47,37i9dQZEVXbLdGSmz6xilI,5Bti0azlFhMattVY76qFr9,Lạ Lùng,0.617,0.212,0,-9.856,1,0.0334,...,126.579,audio_features,5Bti0azlFhMattVY76qFr9,spotify:track:5Bti0azlFhMattVY76qFr9,https://api.spotify.com/v1/tracks/5Bti0azlFhMa...,https://api.spotify.com/v1/audio-analysis/5Bti...,261616,4,Vietnam Top 50,Vietnam
3097,48,37i9dQZEVXbLdGSmz6xilI,2h1KRcol4TvqCl1Lf8RWio,The Right Journey (feat. Gducky),0.777,0.729,1,-4.577,0,0.0495,...,112.001,audio_features,2h1KRcol4TvqCl1Lf8RWio,spotify:track:2h1KRcol4TvqCl1Lf8RWio,https://api.spotify.com/v1/tracks/2h1KRcol4Tvq...,https://api.spotify.com/v1/audio-analysis/2h1K...,192857,4,Vietnam Top 50,Vietnam
3098,49,37i9dQZEVXbLdGSmz6xilI,0J2otOvMdg62gVMMjwOvSK,Hoa Hải Đường,0.534,0.499,9,-10.601,0,0.0416,...,91.954,audio_features,0J2otOvMdg62gVMMjwOvSK,spotify:track:0J2otOvMdg62gVMMjwOvSK,https://api.spotify.com/v1/tracks/0J2otOvMdg62...,https://api.spotify.com/v1/audio-analysis/0J2o...,229589,4,Vietnam Top 50,Vietnam


## Dataframe to JSON Format as needed

In [34]:
tracks_all_info.to_json (r'tracks_all.json', orient='records')

In [35]:
playlists_df.to_json(r'playlists_meta.json',orient='records')

**File to use for visualisation that has all the information needed:**

In [36]:
chart_infos.to_json(r'chart_infos.json',orient='records')