# Running songs from Spotify playlists Analysis
Exploring Spotify playlists with the word "running" in the title.


## Get libraries


In [None]:
import spotify.sync as spotify
import base64
import requests
import pandas as pd
from urllib.parse import urlencode
import math

## Set up API access

References: __[link](https://developer.spotify.com/documentation/general/guides/authorization-guide/#client-credentials-flow)__

In [None]:
# Setting up spotify credetnials
spotify_client_id = '...' # your credentials here
spotify_client_secret = '...' # your credentials here
spotify_client_creds = f'{spotify_client_id}:{spotify_client_secret}'
spotify_client_creds_b64 = base64.b64encode(spotify_client_creds.encode())
    
# Using client ID and secret to get a token to be used for each requests
spotify_token_url = 'https://accounts.spotify.com/api/token' 
spotify_token_data = {
    'grant_type': 'client_credentials'
}

spotify_token_headers = {
    'Authorization': f"Basic {spotify_client_creds_b64.decode()}" #<base64 encoded client_id:client_secret>
}
# Get the token with a POST request
r = requests.post(spotify_token_url, data=spotify_token_data, headers=spotify_token_headers)
valid_request = r.status_code in range(200, 299)
if valid_request:
    spotify_access_token = r.json()['access_token']
spotify_headers = {
    "Authorization": f"Bearer {spotify_access_token}"
}

## Get playlists with Running in the title

References: __[link](https://developer.spotify.com/documentation/web-api/reference/#category-search)__

In [None]:
# Spotify API call (set limit to 20 to be consistent with Nike's playlists)
search_item = "Running"
type_to_search = "playlist"

endpoint = 'https://api.spotify.com/v1/search'
data = urlencode({"q": search_item, "type": type_to_search, "limit":20, "offset":20})

lookup_url = f"{endpoint}?{data}"

spotify_playlists = requests.get(lookup_url, headers=spotify_headers)

spotify_playlists.json()['playlists']

In [None]:
# Define dictionary and database for playlist
playlists_dict = {'playlist_id': None, 'playlist_name': None, 'url': None, 'total_track': None}

columns_playlists = ['playlist_id', 'playlist_name', 'url', 'total_track']

df_playlists = pd.DataFrame(columns=columns_playlists)

# Store the results in a dataframe
for playlist in spotify_playlists.json()['playlists']['items']:
    playlists_dict['playlist_id'] = playlist['id']
    playlists_dict['playlist_name'] = playlist['name']
    playlists_dict['url'] = playlist['external_urls']['spotify']
    playlists_dict['total_track'] = playlist['tracks']['total']
    
    # Store in the pandas dataset
    df_playlists = df_playlists.append(playlists_dict, ignore_index=True)

df_playlists

## Get songs in the selected Running playlists

In [None]:
# Define a dict to keep information about songs
songs_dict = {'spotify_id': None, 'artist': None, 'feat_artists': None, 'spotify_url': None, 'title': None, 'popularity': None,
              'lyrics': None, 'duration_ms': None, 'song_art_image_url': None,
             'danceability': None, 'energy': None, 'key': None, 'loudness': None, 'mode': None, 'speechiness': None, 
              'acousticness': None, 'instrumentalness': None, 'liveness': None, 'valence': None, 'tempo': None
}

# Define empti pandas dataframe to store song info
songs_col = ['spotify_id', 'title', 'artist', 'feat_artists', 'spotify_url', 'popularity', 
                 'lyrics', 'duration_ms', 'song_art_image_url', 'danceability', 'energy', 
                 'key', 'loudness', 'mode', 'speechiness','acousticness', 'instrumentalness', 'liveness', 
                 'valence', 'tempo'
]

audio_features_col = ['popularity', 
                 'duration_ms', 'danceability', 'energy', 
                 'key', 'loudness', 'mode', 'speechiness','acousticness', 'instrumentalness', 'liveness', 
                 'valence', 'tempo']

metadata_col = ['spotify_id', 'title', 'artist', 'feat_artists', 'spotify_url', 'song_art_image_url', 'lyrics']

df_songs = pd.DataFrame(columns=songs_col)

In [None]:
# Function definitions
def remove_brackets(mystring):
    ''' Remove brackets from the title, otherwise can not find lyrics from genius
    '''
    start = mystring.find("(")
    end = mystring.find(")")
    result = mystring
    
    if start != -1 and end != -1:
        result = mystring[0:start]
    return result

Reference: __[here](https://developer.spotify.com/documentation/web-api/reference/#object-audiofeaturesobject)__.

In [None]:
# Store data of songs in a dataframe
for spotify_id in df_playlists.playlist_id:
    # API call for each different playlists (10 tracks per playlists to be consistent with Nike's playlists)
    endpoint = f'https://api.spotify.com/v1/playlists/{spotify_id}/tracks?limit=10'
    spotify_tracks_playlist = requests.get(endpoint, headers=spotify_headers)
    # Loop inside each playlist to get data
    for iteration, song in enumerate(spotify_tracks_playlist.json()['items']):

        # Get information fom the playlist api call
        songs_dict['spotify_id'] = song['track']['id']
        songs_dict['title'] = remove_brackets(song['track']['name'])
        songs_dict['popularity'] = song['track']['popularity']
        songs_dict['spotify_url'] = song['track']['external_urls']['spotify']
        songs_dict['duration_ms'] = song['track']['duration_ms']
        
        # Get audio features information
        # more info here 
        spotify_id = song['track']['id']
        endpoint = f"https://api.spotify.com/v1/audio-features/{spotify_id}"
        audio_features = requests.get(endpoint, headers=spotify_headers)
        songs_dict['danceability'] = audio_features.json()['danceability']
        songs_dict['energy'] = audio_features.json()['energy']
        songs_dict['key'] = audio_features.json()['key']
        songs_dict['loudness'] = audio_features.json()['loudness']
        songs_dict['mode'] = audio_features.json()['mode']
        songs_dict['spechiness'] = audio_features.json()['speechiness']
        songs_dict['acousticness'] = audio_features.json()['acousticness']
        songs_dict['instrumentalness'] = audio_features.json()['instrumentalness']
        songs_dict['liveness'] = audio_features.json()['liveness']
        songs_dict['valence'] = audio_features.json()['valence']
        songs_dict['tempo'] = audio_features.json()['tempo']
        # Look if there are multiple artists
        list_feat_artists = []

        for i, artist in enumerate(song['track']['album']['artists']):
            if i == 0:
                songs_dict['artist'] = artist['name']
            else:
                list_feat_artists.append(artist['name'])

        songs_dict['feat_artists'] = list_feat_artists  

        df_songs = df_songs.append(songs_dict, ignore_index=True)

df_songs.sort_values(by='popularity', ascending=False)

In [None]:
df_songs.drop_duplicates(subset=['spotify_id'], keep='first', inplace = True)

# Get Lyrics 

In [None]:
# df_songs = pd.read_csv(r'/Users/MVadi/Desktop/df_running_songs.csv')

In [None]:
df_songs

In [None]:
import lyricsgenius
genius_token = '...' # your credentials here
genius = lyricsgenius.Genius(genius_token, skip_non_songs=True, 
                             excluded_terms=["(Remix)", "(Live)"], remove_section_headers=True)

In [None]:
for ind in range(len(df_songs)):
    while True:
        try:
            song = genius.search_song(df_songs['title'][ind], df_songs['artist'][ind])
            break
        except:
            pass
    if song is not(None):
        # Add information from genius to the songs database
        df_songs['song_art_image_url'][ind] = song.song_art_image_url
        df_songs['lyrics'][ind] = song.lyrics

In [None]:
no_lyrics = df_songs['lyrics'].isna().sum()
tot_songs = df_songs.shape[0]
perc_songs = (((tot_songs - no_lyrics) / tot_songs) * 100).round(2)
print(f"Lyrics retrieved for {tot_songs - no_lyrics} out of {tot_songs} songs ({perc_songs}%)")

In [None]:
df_songs['lyrics'][0]

In [None]:
df_songs.to_csv(r'/Users/MVadi/Desktop/running_songs.csv', index=False)

In [None]:
# df= pd.read_csv(r'/Users/MVadi/Desktop/running_songs.csv')
# df