# Code to get Spotify Data and Lyrics for a Spotify Playlist

In [None]:
import pandas as pd
import spotipy
import re
from spotipy.oauth2 import SpotifyClientCredentials
from bs4 import BeautifulSoup
import requests
import math
from lyricsgenius import Genius
from tqdm import tqdm_notebook

### Input your Spotify Client ID and Secret ID Below (can be obtained from https://developer.spotify.com/) and Genius API Secret (can be obtained from https://docs.genius.com/)

In [None]:
# Input Spotify API credentials below
cid = 'Enter Spotify CID Here'
secret ='Enter Spotify Secret Here'

# Input Genius API credentials below
genius_secret = 'Enter Genius Secret Here'

In [None]:
# Registering APIs
client_credentials_manager = SpotifyClientCredentials(client_id=cid, client_secret=secret)
sp = spotipy.Spotify(client_credentials_manager = client_credentials_manager)

genius = Genius(genius_secret, skip_non_songs=True, remove_section_headers=True, verbose=False, timeout=50)

### Function which obtains the track info for all the songs in the Spotify playlist

In [None]:
# Insert the URI as a string into the function
def get_playlist_tracks(uri_info, offset):
    uri = []
    track = []
    duration = []
    explicit = []
    artists = []
    popularity = []
    album = []
    one = sp.playlist_tracks(uri_info, limit=100, offset=offset)
    df1 = pd.DataFrame(one)
    
    # Looping over songs in dataframe
    for i in range(len(df1)):
        x = df1['items'][i]['track']
        uri.append(x['uri'])
        track.append(x['name'])
        duration.append(x['duration_ms'])
        explicit.append(x['explicit'])
        popularity.append(x['popularity'])
        album.append(x['album']['name'])
        temp = []
        for a in x['artists']:
            temp.append(a['name'])
        artists.append(temp)
    
    # Creating new dataframe with wanted information
    df2 = pd.DataFrame({
    'track_uri':uri,
    'track_name':track,
    'artists': artists,
    'album': album,
    'duration_ms':duration,
    'explicit':explicit,
    'popularity': popularity})
    
    return df2

### Paginating playlist songs and adding playlist info

In [None]:
# Paginate playlist tracks and add playlist name + info
def get_full_playlist(uri_info):
    p = sp.playlist(uri_info)
    name = p['name']
    tot_tracks = p['tracks']['total']
    uri = p['uri']
    dfs = []
    for k in range(math.ceil(tot_tracks/100)):
        dfs.append(get_playlist_tracks(uri_info, k*100))
    df = pd.concat(dfs)
    df['playlist_name'] = name
    df['playlist_uri'] = uri
    return df

### Function which combines songs from multiple playlists

In [None]:
# Combine songs from multiple playlists
def get_list_of_playlists(list_of_playlist_uris):
    dfs = []
    print('Getting songs from playlists')
    for uri_info in tqdm_notebook(list_of_playlist_uris):
        dfs.append(get_full_playlist(uri_info))
    return pd.concat(dfs).reset_index(drop = True)

### Adding track metadata from Spotify API

In [None]:
# Adding track info metadata
def get_playlist_info(list_of_playlist_uris):
    df = get_list_of_playlists(list_of_playlist_uris)
    danceability = []
    energy = []
    key = []
    loudness = []
    speechiness = []
    acousticness = []
    instrumentalness = []
    liveness = []
    valence = []
    tempo = []
    indxs = []
    print('Getting individual song metadata \n')
    for i in tqdm_notebook(range(len(df))):
        uri = df['track_uri'][i]
        if uri not in df['track_uri'][:i].tolist():
            for x in sp.audio_features(tracks=[uri]):
                if x is not None:
                    danceability.append(x.get('danceability', None))
                    energy.append(x['energy'])
                    key.append(x['key'])
                    loudness.append(x['loudness'])
                    speechiness.append(x['speechiness'])
                    acousticness.append(x['acousticness'])
                    instrumentalness.append(x['instrumentalness'])
                    liveness.append(x['liveness'])
                    valence.append(x['valence'])
                    tempo.append(x['tempo'])
                else:
                    indxs.append(i)
        else:
            indxs.append(i)

    df = df.drop(index=indxs).reset_index(drop=True)
    
    df['danceability'] = danceability
    df['energy'] = energy
    df['key'] = key
    df['loudness'] = loudness
    df['speechiness'] = speechiness
    df['acousticness'] = acousticness
    df['instrumentalness'] = instrumentalness
    df['liveness'] = liveness
    df['valence'] = valence
    df['tempo'] = tempo
    
    return df

### Checks if the lyrics actually come from a song (using regular expressions)

In [None]:
# Check if we have actually have a song (basic regular expression stuff)
def is_song(song):
    if song != None:
        first_line = song.lyrics.split('\n')[0]
        last_word = first_line.split()[-1]
        if 'Lyrics' not in last_word:
            song = None
    return song

### Uses Genius API to get lyrics

In [None]:
# Function which gets lyrics of songs using Genius API
def get_lyrics(df):
    lyrics = []
    print('Getting Lyrics for each song')
    for i in tqdm_notebook(range(len(df))):
        combined_artists = " & ".join(df['artists'][i])
        song = genius.search_song(df['track_name'][i], combined_artists)
        song = is_song(song)
        if (song == None) & (len(df['artists'][i])>1):
            song = genius.search_song(df['track_name'][i], df['artists'][i][0])
            song = is_song(song)
        if song == None:
            lyrics.append(None)            
        else:
            lyrics.append(song.lyrics)
    return lyrics

### Function which combines Spotify data and Genius lyrics into one dataframe

In [None]:

# Combining Spotify metadata and Genius lyrics into one dataframe
def get_playlist_songs(list_of_playlist_uris):
    df = get_playlist_info(list_of_playlist_uris)
    lyrics = get_lyrics(df)
    df['lyrics'] = lyrics
    return df

In [None]:
# Getting tracks
df = get_playlist_songs('Enter list of Spotify playlist URLs')