# Running songs from Spotify playlists Analysis
Exploring Spotify playlists with the word "running" in the title.


## Get libraries


In [112]:
# pip install spotify
# pip install base64
# pip install lyricsgenius

import spotify.sync as spotify
import base64
import requests
import pandas as pd
from urllib.parse import urlencode
import math

## Set up API access

References: __[link](https://developer.spotify.com/documentation/general/guides/authorization-guide/#client-credentials-flow)__

In [113]:
# Setting up spotify credetnials
spotify_client_id = 'f004c13efcfe46fbafedccbfbf106f67'
spotify_client_secret = '2c6a03b87d574d029ff175e302c67937'
spotify_client_creds = f'{spotify_client_id}:{spotify_client_secret}'
spotify_client_creds_b64 = base64.b64encode(spotify_client_creds.encode())
    
# Using client ID and secret to get a token to be used for each requests
spotify_token_url = 'https://accounts.spotify.com/api/token' 
spotify_token_data = {
    'grant_type': 'client_credentials'
}

spotify_token_headers = {
    'Authorization': f"Basic {spotify_client_creds_b64.decode()}" #<base64 encoded client_id:client_secret>
}
# Get the token with a POST request
r = requests.post(spotify_token_url, data=spotify_token_data, headers=spotify_token_headers)
valid_request = r.status_code in range(200, 299)
if valid_request:
    spotify_access_token = r.json()['access_token']
spotify_headers = {
    "Authorization": f"Bearer {spotify_access_token}"
}

## Get playlists with Running in the title

References: __[link](https://developer.spotify.com/documentation/web-api/reference/#category-search)__

In [114]:
# Spotify API call (set limit to 20, but it can be increased)
search_item = "Running"
type_to_search = "playlist"

endpoint = 'https://api.spotify.com/v1/search'
data = urlencode({"q": search_item, "type": type_to_search, "limit":20, "offset":0})

lookup_url = f"{endpoint}?{data}"

spotify_playlists = requests.get(lookup_url, headers=spotify_headers)

spotify_playlists.json()['playlists']

{'href': 'https://api.spotify.com/v1/search?query=Running&type=playlist&offset=0&limit=20',
 'items': [{'collaborative': False,
   'description': 'Run with running music hits in your workout. Want chill? See our <a href="https://open.spotify.com/playlist/4SAp2pqkCTYnu3midNetQ7?si=uyD7Y6jDSoaSFAVDP_o10Q">Chill House Hits</a> playlist!',
   'external_urls': {'spotify': 'https://open.spotify.com/playlist/0JTaSx9jkW1saMOc6t0vIk'},
   'href': 'https://api.spotify.com/v1/playlists/0JTaSx9jkW1saMOc6t0vIk',
   'id': '0JTaSx9jkW1saMOc6t0vIk',
   'images': [{'height': None,
     'url': 'https://i.scdn.co/image/ab67706c0000bebb006f1a4f06c6c4f34a9d4608',
     'width': None}],
   'name': 'RUNNING Music Hits 2021 🏃\u200d♂️',
   'owner': {'display_name': 'OneSevenMusic',
    'external_urls': {'spotify': 'https://open.spotify.com/user/hbhz741mqsyfv0zafwby1nbq0'},
    'href': 'https://api.spotify.com/v1/users/hbhz741mqsyfv0zafwby1nbq0',
    'id': 'hbhz741mqsyfv0zafwby1nbq0',
    'type': 'user',
    'ur

In [115]:
# Define dictionary and database for playlist
playlists_dict = {'playlist_id': None, 'playlist_name': None, 'url': None, 'total_track': None}

columns_playlists = ['playlist_id', 'playlist_name', 'url', 'total_track']

df_playlists = pd.DataFrame(columns=columns_playlists)

# Store the results in a dataframe
for playlist in spotify_playlists.json()['playlists']['items']:
    playlists_dict['playlist_id'] = playlist['id']
    playlists_dict['playlist_name'] = playlist['name']
    playlists_dict['url'] = playlist['external_urls']['spotify']
    playlists_dict['total_track'] = playlist['tracks']['total']
    
    # Store in the pandas dataset
    df_playlists = df_playlists.append(playlists_dict, ignore_index=True)

df_playlists

Unnamed: 0,playlist_id,playlist_name,url,total_track
0,0JTaSx9jkW1saMOc6t0vIk,RUNNING Music Hits 2021 🏃‍♂️,https://open.spotify.com/playlist/0JTaSx9jkW1s...,124
1,37i9dQZF1DWZUTt0fNaCPB,Running to Rock 170-190 BPM,https://open.spotify.com/playlist/37i9dQZF1DWZ...,75
2,0di4SOghem4bqf7KemD1KW,RUNNING MOTIVATION 🏃🏻‍♀️ HITT WORKOUT,https://open.spotify.com/playlist/0di4SOghem4b...,627
3,6TNsAzHaUJvyyZv82Di1uA,running playlist to fuel my main character god...,https://open.spotify.com/playlist/6TNsAzHaUJvy...,106
4,4UdVTRkzvgohO9FL3CPG5v,RUNNING RAP MUSIC 2021 🏃🏾 🎶,https://open.spotify.com/playlist/4UdVTRkzvgoh...,73
5,28aoRaVHiDxC4OVjwHuLLZ,Running 80´s - 90´s,https://open.spotify.com/playlist/28aoRaVHiDxC...,101
6,7gO9WmJaPmIviOlvK1m95P,Running Hits - Running Music 2021,https://open.spotify.com/playlist/7gO9WmJaPmIv...,70
7,4cgeOaRCHDkVDQPaDrRQFR,RUNNING Music Hits 2021🏃‍♂️Workout Music - Gym...,https://open.spotify.com/playlist/4cgeOaRCHDkV...,121
8,5JL4W9Unco1bqhW4fs2iEw,RUNNING bpm 180 workout remix,https://open.spotify.com/playlist/5JL4W9Unco1b...,256
9,30BhfiXadaDXbOS3qEy3sL,Running Hip Hop,https://open.spotify.com/playlist/30BhfiXadaDX...,181


## Get songs in the selected Running playlists

In [116]:
# Define a dict to keep information about songs
songs_dict = {'spotify_id': None, 'artist': None, 'feat_artists': None, 'spotify_url': None, 'title': None, 'popularity': None,
              'lyrics': None, 'duration_ms': None, 'song_art_image_url': None,
             'danceability': None, 'energy': None, 'key': None, 'loudness': None, 'mode': None, 'speechiness': None, 
              'acousticness': None, 'instrumentalness': None, 'liveness': None, 'valence': None, 'tempo': None
}

# Define empti pandas dataframe to store song info
songs_col = ['spotify_id', 'title', 'artist', 'feat_artists', 'spotify_url', 'popularity', 
                 'lyrics', 'duration_ms', 'song_art_image_url', 'danceability', 'energy', 
                 'key', 'loudness', 'mode', 'speechiness','acousticness', 'instrumentalness', 'liveness', 
                 'valence', 'tempo'
]

audio_features_col = ['popularity', 
                 'duration_ms', 'danceability', 'energy', 
                 'key', 'loudness', 'mode', 'speechiness','acousticness', 'instrumentalness', 'liveness', 
                 'valence', 'tempo']

metadata_col = ['spotify_id', 'title', 'artist', 'feat_artists', 'spotify_url', 'song_art_image_url', 'lyrics']

df_songs = pd.DataFrame(columns=songs_col)

In [117]:
# Function definitions
def remove_brackets(mystring):
    ''' Remove brackets from the title, otherwise can not find lyrics from genius
    '''
    start = mystring.find("(")
    end = mystring.find(")")
    result = mystring
    
    if start != -1 and end != -1:
        result = mystring[0:start]
    return result

In [118]:
# Store data of songs in a dataframe
for spotify_id in df_playlists.playlist_id:
    # API call for each different playlists
    endpoint = f'https://api.spotify.com/v1/playlists/{spotify_id}/tracks'
    spotify_tracks_playlist = requests.get(endpoint, headers=spotify_headers)
    # Loop inside each playlist to get data
    for iteration, song in enumerate(spotify_tracks_playlist.json()['items']):

        # Get information fom the playlist api call
        songs_dict['spotify_id'] = song['track']['id']
        songs_dict['title'] = remove_brackets(song['track']['name'])
        songs_dict['popularity'] = song['track']['popularity']
        songs_dict['spotify_url'] = song['track']['external_urls']['spotify']
        songs_dict['duration_ms'] = song['track']['duration_ms']

        # Look if there are multiple artists
        list_feat_artists = []

        for i, artist in enumerate(song['track']['album']['artists']):
            if i == 0:
                songs_dict['artist'] = artist['name']
            else:
                list_feat_artists.append(artist['name'])

        songs_dict['feat_artists'] = list_feat_artists  

        df_songs = df_songs.append(songs_dict, ignore_index=True)

df_songs.sort_values(by='popularity', ascending=False).head(10)

Unnamed: 0,spotify_id,title,artist,feat_artists,spotify_url,popularity,lyrics,duration_ms,song_art_image_url,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo
626,3Wrjm47oTz2sjIgck11l5e,Beggin',Måneskin,[],https://open.spotify.com/track/3Wrjm47oTz2sjIg...,97,,211560,,,,,,,,,,,,
0,4cG7HUWYHBV6R6tHn1gxrl,Friday,Riton,"[Nightcrawlers, Mufasa & Hypeman]",https://open.spotify.com/track/4cG7HUWYHBV6R6t...,94,,169153,,,,,,,,,,,,
713,0VjIjW4GlUZAMYd2vXMi3b,Blinding Lights,The Weeknd,[],https://open.spotify.com/track/0VjIjW4GlUZAMYd...,94,,200040,,,,,,,,,,,,
618,4cG7HUWYHBV6R6tHn1gxrl,Friday,Riton,"[Nightcrawlers, Mufasa & Hypeman]",https://open.spotify.com/track/4cG7HUWYHBV6R6t...,94,,169153,,,,,,,,,,,,
1522,4cG7HUWYHBV6R6tHn1gxrl,Friday,Riton,"[Nightcrawlers, Mufasa & Hypeman]",https://open.spotify.com/track/4cG7HUWYHBV6R6t...,94,,169153,,,,,,,,,,,,
350,0VjIjW4GlUZAMYd2vXMi3b,Blinding Lights,The Weeknd,[],https://open.spotify.com/track/0VjIjW4GlUZAMYd...,94,,200040,,,,,,,,,,,,
1484,0VjIjW4GlUZAMYd2vXMi3b,Blinding Lights,The Weeknd,[],https://open.spotify.com/track/0VjIjW4GlUZAMYd...,94,,200040,,,,,,,,,,,,
954,4cG7HUWYHBV6R6tHn1gxrl,Friday,Riton,"[Nightcrawlers, Mufasa & Hypeman]",https://open.spotify.com/track/4cG7HUWYHBV6R6t...,94,,169153,,,,,,,,,,,,
549,4cG7HUWYHBV6R6tHn1gxrl,Friday,Riton,"[Nightcrawlers, Mufasa & Hypeman]",https://open.spotify.com/track/4cG7HUWYHBV6R6t...,94,,169153,,,,,,,,,,,,
372,6PERP62TejQjgHu81OHxgM,good 4 u,Olivia Rodrigo,[],https://open.spotify.com/track/6PERP62TejQjgHu...,93,,178147,,,,,,,,,,,,


In [168]:
# Dealing with audio_features
n_time = 0
limit = 100
audio_feature_columns = ['danceability', 
                         'energy', 
                         'key', 
                         'loudness', 
                         'mode', 
                         'spechiness', 
                         'acousticness', 
                         'instrumentalness', 
                         'liveness', 
                         'valence',
                         'tempo', 
                         'type', 
                         'id', 
                         'uri', 
                         'track_href', 
                         'analysis_url', 
                         'duration_ms', 
                         'time_signature']
audio_features_df = pd.DataFrame(columns=audio_feature_columns)
while n_time < math.ceil(len(df_songs)/100):
    offset = 100*n_time
    tracks=list(df_songs.spotify_id.iloc[offset: offset+limit])
    endpoint = 'https://api.spotify.com/v1/audio-features'
    data = urlencode({"ids": ','.join(tracks)})
    lookup_url = f"{endpoint}?{data}"
    audio_features = requests.get(lookup_url, headers=spotify_headers)
    audio_features_df = audio_features_df.append(audio_features.json()['audio_features'], ignore_index=True)
    n_time+=1
    # audio_features.json()['audio_features']

AttributeError: 'NoneType' object has no attribute 'keys'

In [204]:
# # Loop inside each playlist to get data
# for iteration, song in enumerate(spotify_tracks_playlist.json()['items']):
# # Get audio features information
# # more info here https://developer.spotify.com/documentation/web-api/reference/#object-audiofeaturesobject
#     spotify_song_id = song['track']['id']
#     endpoint = f"https://api.spotify.com/v1/audio-features/{spotify_song_id}"
#     audio_features = requests.get(endpoint, headers=spotify_headers)
#     songs_dict['danceability'] = audio_features.json()['danceability']
#     songs_dict['energy'] = audio_features.json()['energy']
#     songs_dict['key'] = audio_features.json()['key']
#     songs_dict['loudness'] = audio_features.json()['loudness']
#     songs_dict['mode'] = audio_features.json()['mode']
#     songs_dict['spechiness'] = audio_features.json()['speechiness']
#     songs_dict['acousticness'] = audio_features.json()['acousticness']
#     songs_dict['instrumentalness'] = audio_features.json()['instrumentalness']
#     songs_dict['liveness'] = audio_features.json()['liveness']
#     songs_dict['valence'] = audio_features.json()['valence']
#     songs_dict['tempo'] = audio_features.json()['tempo']

In [205]:
# spotify_playlist_id = spotify_single_playlist.json()["playlists"]["items"][0]["id"]
# endpoint = f'https://api.spotify.com/v1/playlists/{spotify_playlist_id}/tracks'
# data = urlencode({"market": "IT", "limit":10, "offset":0, "fields": "items(track(name, album(artists)))"})
# lookup_url = f"{endpoint}?{data}"

# spotify_tracks_playlist = requests.get(lookup_url, headers=spotify_headers)
# # print(spotify_tracks_playlist.status_code)
# # print(spotify_tracks_playlist.json())

# # # storing songs titles and artists within playlist
# songs = [{','.join([song['track']['album']['artists'][index]['name'] for index in range(len(song['track']['album']['artists']))]): song['track']['name']} for song in spotify_tracks_playlist.json()['items']]
# songs

In [206]:
# # with only one artist name when multiple
# songs = [
#     {song['track']['album']['artists'][0]['name']: song['track']['name']} 
#     for song in spotify_tracks_playlist.json()['items']
# ]

In [207]:
# songs

In [208]:
# # only one
# (artist_name_spotify, song_title_spotify), = songs[0].items()
# print(artist_name_spotify)
# print(song_title_spotify)

In [156]:
import lyricsgenius
genius_token = 'gIPW9PqAO8nW3MO8kGqJouKyx4iWRJkLcuM2LThtRtlj8z557KKj21jwIymggQdr'
genius = lyricsgenius.Genius(genius_token) # before it was api
song = genius.search_songs(song_title_spotify, per_page = 50, page=1) # possible limitations on the number of objects retrieved
artist_name_genius = [song['hits'][song_num]['result']['primary_artist']['name'] 
                      for song_num in range(len(song['hits']))]
song_title_genius = [song['hits'][song_num]['result']['title']
                      for song_num in range(len(song['hits']))]

print(f'Artists: {artist_name_genius}')
print(f'\nSongs titles: {song_title_genius}')

Artists: ['Spotify', 'Spotify', 'Spotify', 'Spotify', 'Spotify', 'Spotify', 'Riton & Nightcrawlers']

Songs titles: ['Today’s Top Hits 6/4/21 (feat. Billie Eilish)', 'Today’s Top Hits 3/5/21 (feat. Selena Gomez)', 'Today’s Top Hits 3/28/21 (feat. Cardi B)', 'Today’s Top Hits 5/14/21 (feat. The Weeknd & Ariana Grande)', 'Today’s Top Hits 5/21/21 (feat. BTS)', 'Today’s Top Hits 5/28/21 (feat. Olivia Rodrigo)', 'Friday (Dopamine Re-Edit)']


In [None]:
# import os
# print(os.getcwd())
# artist.save_lyrics() 

In [None]:
# import pandas as pd
# import json
# # Artist=pd.read_json(f"Lyrics_{artist_name}.json", orient='values')
# with open(f"Lyrics_{artist_name}.json") as json_data:
#     Artist_dict = json.load(json_data)
# for i in Artist_dict['songs'][0].keys():
#     print(i)

In [118]:
# #Create an empty dictionary to store your songs and related data
# artist_dict = {}
# def collectSongData(adic):
#     dps = list()
#     title = adic['title'] #song title
#     url = adic['url'] #genius url
#     artist = adic['artist'] #artist name(s)
#     song_id = adic['id'] #genius id
#     lyrics = adic['lyrics'] #song lyrics
#     year = adic['release_date'] #release date
#     upload_date = adic['description_annotation']['annotatable']['client_timestamps']['lyrics_updated_at'] #lyrics upload date
#     annotations = adic['annotation_count'] #total no. of annotations
#     descr = adic['description'] #song descriptions
    
#     dps.append((title,url,artist,song_id,lyrics,year,upload_date,annotations,descr)) #append all to one tuple list
#     artist_dict[title] = dps #assign list to song dictionary entry named after song title
    
# collectSongData(Artist_dict['songs'][0]) #check function works
# artist_dict

In [106]:
# Searching a song with combination of song_title and artist_name from spotify
# song= genius.search_song(song_title, artist_name)
# print(song.title)