Alright! I'm gonna try to get some info about my Spotify playlists

In [29]:
%matplotlib inline

import numpy as np
import pandas as pd
import ujson
import spotipy
import spotipy.util

Please note that I had to configure my Spotify Dev account credentials (https://spotipy.readthedocs.io/en/latest/#authorization-code-flow) in order to fetch some of the following requests.

Setting up the scope (https://developer.spotify.com/web-api/using-scopes/), the username and then request the songs in my library filtering some fields (I'm only gonna work with the following info: song names, artists, song duration, the date I added that song to my library and its popularity).

In [66]:
scope = 'user-library-read'
username = 'giuseppe.vincenzo'

token = spotipy.util.prompt_for_user_token(username, scope)

if token:
    spotipy_obj = spotipy.Spotify(auth=token)
    saved_tracks_resp = spotipy_obj.current_user_saved_tracks(limit=50)
else:
    print('Couldn\'t get token for that username')
    
number_of_tracks = saved_tracks_resp['total']
print('%d tracks' % number_of_tracks)

def save_only_some_fields(track_response):
    return {
        'name': str(track_response['track']['name']),
        'artists': [artist['name'] for artist in track_response['track']['artists']],
        'duration_ms': track_response['track']['duration_ms'],
        'popularity': track_response['track']['popularity'],
        'added_at': track_response['added_at']
    }

tracks = [save_only_some_fields(track) for track in saved_tracks_resp['items']]

while saved_tracks_resp['next']:
    saved_tracks_resp = spotipy_obj.next(saved_tracks_resp)
    tracks.extend([save_only_some_fields(track) for track in saved_tracks_resp['items']])

print(ujson.dumps(tracks, indent=4))



375 tracks
[
    {
        "popularity":61,
        "added_at":"2017-03-10T23:43:24Z",
        "name":"Yesterday - Remastered 2009",
        "artists":[
            "The Beatles"
        ],
        "duration_ms":125666
    },
    {
        "popularity":64,
        "added_at":"2017-03-10T23:43:19Z",
        "name":"Hey Jude - Remastered 2015",
        "artists":[
            "The Beatles"
        ],
        "duration_ms":425653
    },
    {
        "popularity":64,
        "added_at":"2017-03-10T23:43:15Z",
        "name":"Let It Be - Remastered 2009",
        "artists":[
            "The Beatles"
        ],
        "duration_ms":243026
    },
    {
        "popularity":65,
        "added_at":"2017-03-10T23:43:11Z",
        "name":"Come Together - Remastered 2009",
        "artists":[
            "The Beatles"
        ],
        "duration_ms":259946
    },
    {
        "popularity":59,
        "added_at":"2017-03-10T21:22:03Z",
        "name":"West End Girls - 2001 Remastered Version",

Let's modify the data collected to work more easily with it.

In [87]:
tracks_df = pd.DataFrame(tracks)
pd.set_option('display.max_rows', len(tracks))
tracks_df

Unnamed: 0,added_at,artists,duration_ms,name,popularity
0,2017-03-10T23:43:24Z,[The Beatles],125666,Yesterday - Remastered 2009,61
1,2017-03-10T23:43:19Z,[The Beatles],425653,Hey Jude - Remastered 2015,64
2,2017-03-10T23:43:15Z,[The Beatles],243026,Let It Be - Remastered 2009,64
3,2017-03-10T23:43:11Z,[The Beatles],259946,Come Together - Remastered 2009,65
4,2017-03-10T21:22:03Z,[Pet Shop Boys],245360,West End Girls - 2001 Remastered Version,59
5,2017-03-07T17:03:30Z,[Novedades Carminha],157221,Antigua Pero Moderna,31
6,2017-03-07T16:36:30Z,[Toy Dolls],180000,Nellie the Elephant - 1984 Version,21
7,2017-03-07T14:59:19Z,[Toby Fox],57600,Bonetrousle,50
8,2017-03-06T10:46:25Z,[SBI Audio Karaoke],376146,Whatever (Karaoke Version),0
9,2017-03-05T18:59:24Z,"[Kygo, Selena Gomez]",220780,It Ain't Me (with Selena Gomez),91


In case there are more than one artists, I only care for the first one

In [89]:
pd.reset_option('display.max_rows')

tracks_df['artists'] = tracks_df['artists'].apply(lambda artists: artists[0])

Unnamed: 0,added_at,artists,duration_ms,name,popularity
0,2017-03-10T23:43:24Z,The Beatles,125666,Yesterday - Remastered 2009,61
1,2017-03-10T23:43:19Z,The Beatles,425653,Hey Jude - Remastered 2015,64
2,2017-03-10T23:43:15Z,The Beatles,243026,Let It Be - Remastered 2009,64
3,2017-03-10T23:43:11Z,The Beatles,259946,Come Together - Remastered 2009,65
4,2017-03-10T21:22:03Z,Pet Shop Boys,245360,West End Girls - 2001 Remastered Version,59
5,2017-03-07T17:03:30Z,Novedades Carminha,157221,Antigua Pero Moderna,31
6,2017-03-07T16:36:30Z,Toy Dolls,180000,Nellie the Elephant - 1984 Version,21
7,2017-03-07T14:59:19Z,Toby Fox,57600,Bonetrousle,50
8,2017-03-06T10:46:25Z,SBI Audio Karaoke,376146,Whatever (Karaoke Version),0
9,2017-03-05T18:59:24Z,Kygo,220780,It Ain't Me (with Selena Gomez),91


Let's make some plots

In [196]:
from bokeh.charts import Histogram, Scatter, Donut
from bokeh.charts import show
from bokeh.io import output_notebook
from bokeh.palettes import Spectral6
from bokeh.models import HoverTool

output_notebook()

show(Histogram(tracks_df['popularity'], title='Tracks popularity', bins=25, density=False, plot_width=800))
show(Scatter(tracks_df, x='popularity', y='duration_ms', title='popularity vs duration', color='navy', plot_width=800))

show(Donut(tracks_df['artists'], plot_width=800, plot_height=800, color=Spectral6, title='Number of tracks by artist'))

