# Spotify API

In [3]:
import os
import pandas as pd
import numpy as np
import json
import matplotlib.pyplot as plt
import seaborn as sns
%config InlineBackend.figure_format ='retina'
import spotipy
import spotipy.util as util
from spotipy.oauth2 import SpotifyClientCredentials
from spotipy import oauth2
import random
from functools import reduce
import requests
from spotify.spotify_creds import SPOTIFY_CLIENT_ID, SPOTIFY_CLIENT_SECRET, SPOTIFY_USER

Spotify Developer API documentation
https://developer.spotify.com/documentation/web-api/

## Authentification

In [4]:
# secrets
cid = SPOTIFY_CLIENT_ID
secret = SPOTIFY_CLIENT_SECRET
username = SPOTIFY_USER

In [5]:
# First approach
client_credentials_manager = SpotifyClientCredentials(client_id=cid, client_secret=secret)
sp = spotipy.Spotify(client_credentials_manager=client_credentials_manager)

In [6]:
# Second approach
def spotify_authentification():
    """
    Api authentification using requests
    """
    AUTH_URL = 'https://accounts.spotify.com/api/token'

    # POST
    auth_response = requests.post(AUTH_URL, {
        'grant_type': 'client_credentials',
        'client_id': cid,
        'client_secret': secret,
    })

    # convert the response to JSON
    auth_response_data = auth_response.json()

    # save the access token
    access_token = auth_response_data['access_token']

    headers = {
        'Authorization': 'Bearer {token}'.format(token=access_token)
    }
    return headers

headers = spotify_authentification()

In [7]:
sp.playlist(playlist_id = '2Zbn1h9DY5rJagR2NjeRxR?si=5329ce97a256482c')

{'collaborative': False,
 'description': '',
 'external_urls': {'spotify': 'https://open.spotify.com/playlist/2Zbn1h9DY5rJagR2NjeRxR'},
 'followers': {'href': None, 'total': 2},
 'href': 'https://api.spotify.com/v1/playlists/2Zbn1h9DY5rJagR2NjeRxR?additional_types=track',
 'id': '2Zbn1h9DY5rJagR2NjeRxR',
 'images': [{'height': 640,
   'url': 'https://mosaic.scdn.co/640/ab67616d0000b2730c2c6e30fedd6dc367484d79ab67616d0000b2732a896bf89ee1e87f1a774cb6ab67616d0000b27363b382b8786ce29eb28783c5ab67616d0000b273fee3bed41ab4a10414396504',
   'width': 640},
  {'height': 300,
   'url': 'https://mosaic.scdn.co/300/ab67616d0000b2730c2c6e30fedd6dc367484d79ab67616d0000b2732a896bf89ee1e87f1a774cb6ab67616d0000b27363b382b8786ce29eb28783c5ab67616d0000b273fee3bed41ab4a10414396504',
   'width': 300},
  {'height': 60,
   'url': 'https://mosaic.scdn.co/60/ab67616d0000b2730c2c6e30fedd6dc367484d79ab67616d0000b2732a896bf89ee1e87f1a774cb6ab67616d0000b27363b382b8786ce29eb28783c5ab67616d0000b273fee3bed41ab4a1041439

## Artist track features

In [15]:
def artist_track_features(artist = 'Monolink'):
    """
    This function will provide us with a dataframe with all sounds of an artist 
    and their underlying features.
    
    Inputs :
    artist = <Name of the artist>
    
    Outputs :
    > Dataframe containing the sounds of an artist
    """
    # Fetch artist tracks
    d = {}
    track_results = sp.search(q=artist, type='track',limit=50)
    for i, t in enumerate(track_results['tracks']['items']):
        track_id = t['id']
        if track_id not in d:
            d[track_id] = {'artist_feature' : t['artists'][0]['name'],
                           'query' : artist,
                           'track_name' : t['name'],
                           'popularity': t['popularity']}    
            d[track_id].update(sp.audio_features(d.keys())[0])
            
            # add genres
            headers = spotify_authentification()
            BASE_URL = 'https://api.spotify.com/v1/'
            re = requests.get(BASE_URL + 'tracks/' + track_id, headers=headers).json()
            if re != None :
                layer = requests.get(re['artists'][0]['href'], headers=headers).json()
                if 'genres' in layer.keys():
                    d[track_id].update({'genres':layer['genres']})
                else :
                    d[track_id].update({'genres':'None'})
    return pd.DataFrame.from_dict(d, orient='index')




In [20]:
track_dataframe = artist_track_features('Bicep')
track_dataframe.head()

Unnamed: 0,artist_feature,query,track_name,popularity,danceability,energy,key,loudness,mode,speechiness,...,valence,tempo,type,id,uri,track_href,analysis_url,duration_ms,time_signature,genres
0LiXd8DgPozcJVQq3QceEh,TR/ST,Bicep,Bicep,47,0.634,0.869,1,-7.34,1,0.0383,...,0.266,108.002,audio_features,0LiXd8DgPozcJVQq3QceEh,spotify:track:0LiXd8DgPozcJVQq3QceEh,https://api.spotify.com/v1/tracks/0LiXd8DgPozc...,https://api.spotify.com/v1/audio-analysis/0LiX...,277133,4,"[alternative dance, art pop, canadian electrop..."
2aJDlirz6v2a4HREki98cP,Bicep,Bicep,Glue,70,0.634,0.869,1,-7.34,1,0.0383,...,0.266,108.002,audio_features,0LiXd8DgPozcJVQq3QceEh,spotify:track:0LiXd8DgPozcJVQq3QceEh,https://api.spotify.com/v1/tracks/0LiXd8DgPozc...,https://api.spotify.com/v1/audio-analysis/0LiX...,277133,4,"[ambient house, electronica]"
0WfOuXw05LJq4ik1lVvTzi,Bicep,Bicep,Apricots,63,0.634,0.869,1,-7.34,1,0.0383,...,0.266,108.002,audio_features,0LiXd8DgPozcJVQq3QceEh,spotify:track:0LiXd8DgPozcJVQq3QceEh,https://api.spotify.com/v1/tracks/0LiXd8DgPozc...,https://api.spotify.com/v1/audio-analysis/0LiX...,277133,4,"[ambient house, electronica]"
73X9X7kDgsm4YeHpc8prf6,Bicep,Bicep,Apricots,65,0.634,0.869,1,-7.34,1,0.0383,...,0.266,108.002,audio_features,0LiXd8DgPozcJVQq3QceEh,spotify:track:0LiXd8DgPozcJVQq3QceEh,https://api.spotify.com/v1/tracks/0LiXd8DgPozc...,https://api.spotify.com/v1/audio-analysis/0LiX...,277133,4,"[ambient house, electronica]"
0HZtVLVL6oLU9WobKQxqGu,Bicep,Bicep,Lido,56,0.634,0.869,1,-7.34,1,0.0383,...,0.266,108.002,audio_features,0LiXd8DgPozcJVQq3QceEh,spotify:track:0LiXd8DgPozcJVQq3QceEh,https://api.spotify.com/v1/tracks/0LiXd8DgPozc...,https://api.spotify.com/v1/audio-analysis/0LiX...,277133,4,"[ambient house, electronica]"


In [None]:
track_dataframe.columns

## Get playlist features

📌 Change non-regular artists KeyError

In [17]:
def playlist_track_features(playlist_id = '501WYBYmwFxO4sZfY2ZimM'):
    '''
    Function that retrieving songs and the underlying features from a spotify playlist
    input : playlist_id
    output : Dataframe with features of a song [Dancabiliy, etc..]
    '''
    # base URL of all Spotify API endpoints
    BASE_URL = 'https://api.spotify.com/v1/'

    # actual GET request with proper header
    headers = spotify_authentification()
    r = requests.get(BASE_URL + 'playlists/' + playlist_id + '/tracks', headers=headers).json()
    d = {}
    for track in range(len(r['items'])):
        content = r['items'][track]['track']
        track_id = content['id']
        if track_id != None:
            if track_id not in d:
                d[track_id] = {'artist_feature' : content['artists'][0]['name'],
                               'track_name' : content['name'],
                               'popularity': content['popularity']} 
                d[track_id].update(sp.audio_features(track_id)[0])

                # add genres
                headers = spotify_authentification()
                re = requests.get(BASE_URL + 'tracks/' + track_id, headers=headers).json()
                layer = requests.get(re['artists'][0]['href'], headers=headers).json()
                if 'genres' in layer.keys():
                    d[track_id].update({'genres':layer['genres']})
                else :
                    d[track_id].update({'genres':'None'})

    df = pd.DataFrame.from_dict(d, orient='index')
    return df


In [18]:
df = playlist_track_features(playlist_id = '501WYBYmwFxO4sZfY2ZimM')
df.head()

KeyError: 'artists'

In [None]:
test = requests.get('https://api.spotify.com/v1/audio-analysis/7qHFYjFaYfk5HEV5rpZKDT', headers=headers).json()
test

In [None]:
test.keys()

## Playlist by names

In [None]:
from urllib.request import urlopen
from bs4 import BeautifulSoup

In [None]:
def playlist_names(genre='alternative-rock'):
    base_url = "https://www.indieshuffle.com/playlists/genre/"
    url = f"{base_url}/{genre}"
  
    html = urlopen(url)
    soup = BeautifulSoup(html, 'lxml')
    playlist_titles = soup.find_all('h5', attrs={"class":"mrg-bottom-10"})
    
    names = []
    for playlist in range(len(playlist_titles)):
        title = playlist_titles[playlist].text
        title = title.replace('\n', '')
        names.append(title)
        print(title)
    return names


## Extra features

In [None]:
# base URL of all Spotify API endpoints
BASE_URL = 'https://api.spotify.com/v1/'

# Track ID from the URI
track_id = '6y0igZArWVi6Iz0rj35c1Y'

# actual GET request with proper header
r = requests.get(BASE_URL + 'audio-analysis/' + track_id, headers=headers)
r.json()

In [None]:
# base URL of all Spotify API endpoints
BASE_URL = 'https://api.spotify.com/v1/'

# Track ID from the URI
track_id = '6y0igZArWVi6Iz0rj35c1Y'

# actual GET request with proper header
r = requests.get(BASE_URL + 'tracks/' + track_id, headers=headers).json()
requests.get(r['artists'][0]['href'], headers=headers).json()['genres']

In [None]:
requests.get(r['artists'][0]['href'], headers=headers).json()['genres']

In [None]:
r['artists'][0]['href']

In [None]:
r

# Playground

In [None]:
content.columns

In [None]:
features = ['artist_feature','track_name','track_name', 'popularity', 'danceability', 'energy',
       'key', 'loudness', 'mode', 'speechiness', 'acousticness',
       'instrumentalness', 'liveness', 'valence', 'tempo','type','duration_ms', 'time_signature',
       'genres']

In [None]:
df = content[features]

In [None]:
df = df[['danceability', 'energy','speechiness', 'acousticness',
       'instrumentalness', 'liveness']]

In [None]:
df.describe()

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from math import pi

categories = list(df)
values = df.mean().values.flatten().tolist()
values += values[:1] # repeat the first value to close the circular graph
angles = [n / float(len(categories)) * 2 * pi for n in range(len(categories))]
angles += angles[:1]

fig, ax = plt.subplots(nrows=1, ncols=1, figsize=(8, 8),
                       subplot_kw=dict(polar=True))

plt.xticks(angles[:-1], categories, color='grey', size=12)
plt.yticks(size=12)
plt.ylim(0, 1)
ax.set_rlabel_position(10)
ax.plot(angles, values, linewidth=1, linestyle='solid')
ax.fill(angles, values, 'skyblue', alpha=0.8)

plt.show()

# User to user recommendation

In [None]:
def get_user_tracks(user_name = 'cedricrenel'):
    '''
    Retrieves a users tracks based on his publicly available playlist
    input : spotify user_name
    output : dataframe containing a users songs and the songs features
    '''
    # Authentification
    headers = spotify_authentification()
    re = requests.get(f'https://api.spotify.com/v1/users/{user_name}/playlists', headers=headers).json()
    
    # Playlist id's
    playlist_ids = []
    names = []
    for i in range(len(re['items'])):
        playlist_ids.append(re['items'][i]['id'])
        names.append(re['items'][i]['name'])
    
    # Get song features
    for i, playlist in enumerate(playlist_ids):
        if i == 0:
            df = playlist_track_features(playlist)
            print(f"[{round(i/len(names) *100)} %] Playlist : {names[i]} ✔")
        else :
            new_df = playlist_track_features(playlist)
            df = pd.concat([df, new_df])
            print(f"[{round(i/len(names) *100)} %] Playlist : {names[i]} ✔")
    return df

In [None]:
toff = get_user_tracks(user_name = 'toff-93')

In [None]:
toff['genres'][3]

In [None]:
from sklearn.feature_extraction.text import CountVectorizer
vectorizer = CountVectorizer()

In [None]:
[x.replace(' ', '') for x in toff['genres'][3]]

In [None]:
[x.strip(' ') for x in toff['genres'][3]]

In [None]:
def clean_genres(liste):
    liste = [x.replace(' ', '') for x in liste]
    liste = ", ".join(liste)
    return liste

In [None]:
toff['clean_genres'] = toff['genres'].apply(clean_genres)

In [None]:
toff.shape

In [None]:
", ".join(toff['genres'][3])

In [None]:
ce = get_user_tracks(user_name = 'cedricrenel')

In [None]:
ce