# Spotify Data Project

The goal of this project is to transform my Spotify data into a series of visual graphics that can tell me about my listening habits, including who I listen to, when, and how often.

In [4]:
# Import libraries

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns; sns.set()

import json
from pprint import pprint
import chart_studio
import plotly
chart_studio.tools.set_credentials_file(username="bart23", api_key="NlBGNl61meV8DnlKCCGq")
import chart_studio.plotly as py
import plotly.graph_objs as go

pd.set_option('display.max_rows', 500)

In [5]:
# Read in the data from a Json format and look at the results

with open('data/StreamingHistory5.json', encoding='utf8') as f:
    data = json.load(f)

pprint(data[:5])

[{'artistName': 'Freddie Dredd',
  'endTime': '2019-11-17 19:05',
  'msPlayed': 0,
  'trackName': 'Oh Darling'},
 {'artistName': '$uicideBoy$',
  'endTime': '2019-11-17 19:05',
  'msPlayed': 3170,
  'trackName': 'Back From The Dead'},
 {'artistName': '$uicideBoy$',
  'endTime': '2019-11-17 19:05',
  'msPlayed': 622,
  'trackName': 'Back From The Dead'},
 {'artistName': 'Chetta',
  'endTime': '2019-11-17 19:05',
  'msPlayed': 41140,
  'trackName': 'Jazzland'},
 {'artistName': '$uicideBoy$',
  'endTime': '2019-11-17 19:05',
  'msPlayed': 568,
  'trackName': 'Tony Hawk Pro Skater 4'}]


In [6]:
# Convert from JSON to a data frame

spotify_data = pd.DataFrame()

def extract_json_value(column_name):
    
    return [i[column_name] for i in data]

spotify_data['artist_name'] = extract_json_value('artistName')
spotify_data['end_time'] = extract_json_value('endTime')
spotify_data['ms_played'] = extract_json_value('msPlayed')
spotify_data['track_name'] = extract_json_value('trackName')

In [7]:
spotify_data.sample(10)

Unnamed: 0,artist_name,end_time,ms_played,track_name
294,GRXGVR,2019-11-19 07:30,187791,Scholar
549,Ramirez,2019-11-21 07:26,124500,Life
82,Miss K8,2019-11-17 19:29,722,Liquid8 - Ophidian Remix Radio Edit
282,Ramirez,2019-11-19 06:06,36042,Articuno Wrist
845,Iceberg Black,2019-11-24 20:16,640,TWISTED
395,NERVO,2019-11-20 05:14,370702,Worlds Collide - Matt Fax Remix
154,Scarlxrd,2019-11-18 05:40,64890,Face
552,Iceberg Black,2019-11-21 07:31,1839,Somethin' Wicked
92,Knife Party,2019-11-17 19:31,390,Superstar
87,Knife Party,2019-11-17 19:31,0,Resistance


In [8]:
spotify_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 869 entries, 0 to 868
Data columns (total 4 columns):
artist_name    869 non-null object
end_time       869 non-null object
ms_played      869 non-null int64
track_name     869 non-null object
dtypes: int64(1), object(3)
memory usage: 27.3+ KB


# Format columns and add variables

In [9]:
# Convert the timestamp column to datetime

spotify_data['end_time'] = pd.to_datetime(spotify_data['end_time'])

In [10]:
# Add additional time columns for more interpretable times; drop the MS column

spotify_data['seconds_played'] = spotify_data.ms_played.divide(1000)
spotify_data['minutes_played'] = spotify_data.seconds_played.divide(60)

spotify_data.drop('ms_played', axis=1, inplace=True)

In [11]:
spotify_data.sample(10)

Unnamed: 0,artist_name,end_time,track_name,seconds_played,minutes_played
515,Pouya,2019-11-20 13:48:00,FIVE FIVE,1.002,0.0167
103,Miss K8,2019-11-17 19:31:00,Battlefield - Radio Edit,0.777,0.01295
643,Miss K8,2019-11-22 05:16:00,Elevate,66.305,1.105083
590,Iceberg Black,2019-11-21 11:18:00,Phoenix,376.45,6.274167
593,Night Lovell,2019-11-21 11:20:00,JOAN OF ARC,4.051,0.067517
142,Solstice,2019-11-18 05:29:00,Lights - Radio Edit,29.873,0.497883
694,Ramirez,2019-11-22 07:54:00,Fbgm,129.57,2.1595
826,Unaverage Gang,2019-11-24 08:47:00,Silly Boys,10.768,0.179467
557,Devilish Trio,2019-11-21 07:44:00,Time Will Tell,214.259,3.570983
391,Martin Garrix,2019-11-20 05:08:00,Used To Love (with Dean Lewis),2.848,0.047467


# Most populat artists and songs

In [12]:
# Find the most popular artists by number of times played

most_popular_artists_by_count = spotify_data.groupby(by='artist_name')['track_name'].count().sort_values(ascending=False)[:25]

print('The most played artists by count were: \n\n{}'.format(most_popular_artists_by_count))

The most played artists by count were: 

artist_name
Scarlxrd            86
Ramirez             82
Iceberg Black       82
Sybyr               62
$uicideBoy$         38
Solstice            27
BONES               24
Devilish Trio       22
Pouya               21
drip-133            18
Miss K8             16
Futuristic          15
Night Lovell        15
Ghostemane          14
Oliver Francis      13
Triple One          13
Burnout MacGyver    11
Aero Chord          11
Denzel Curry        10
Suicideyear         10
Knife Party          9
City Morgue          8
Germ                 8
Shakewell            8
Chetta               8
Name: track_name, dtype: int64


In [13]:
# Look at most popular artists by amount of time played

most_popular_artists_by_time = spotify_data.groupby(by='artist_name')['minutes_played'].sum().sort_values(ascending=False)[:20]

most_popular_artists_by_time

artist_name
Iceberg Black       121.940850
Ramirez             120.390867
Scarlxrd            113.877550
Solstice             61.338233
Sybyr                59.501667
Devilish Trio        50.896683
$uicideBoy$          44.088967
Pouya                36.795983
BONES                36.168300
Suicideyear          34.712917
Night Lovell         33.012983
NERVO                21.112383
Burnout MacGyver     20.219833
Ghostemane           15.508933
Chetta               14.957167
Bill $Aber           13.966233
Germ                 13.924750
drip-133             12.943033
Shakewell            12.938700
John Dahlbäck        11.872483
Name: minutes_played, dtype: float64

In [14]:
# Look at the most popular songs played

most_popular_songs = spotify_data.track_name.value_counts().sort_values(ascending=False)[:20]

most_popular_songs

Take My Hand                                           12
Wherever You Are                                       10
Demons Taking Over Me                                   9
Way Out of the City                                     6
U Naut Like Me                                          6
Piss on the Kitchen Floor                               5
Worlds Collide - Matt Fax Remix                         5
All I Want                                              5
RIPPINOUTYASPINE                                        5
Time Will Tell                                          5
HELL IS XN EARTH                                        5
The Sevenfold Minister                                  5
a BRAINDEAD civilisatixn                                5
Where Roads End                                         4
Japan                                                   4
ShrimpPizza                                             4
It's Early. My Head Is Throbbing                        4
HER BIO SAID D

# Time of day to listen

In [15]:
# Create time of day variable

def time_of_day(datetime_column, df=spotify_data):
    
    """
    Takes in a datetime column and returns the time of day that the datetime occurs.
    
    Before 12 PM is considered morning, between 12 PM and 5 PM afternoon, and after 5 PM evening.
    """
    
    time_of_day = []
    
    for i in df[datetime_column]:
        
        i = i.hour
        
        if i <= 12:
            
            time_of_day.append('morning')
            
        elif i < 17:
            
            time_of_day.append('afternoon')
            
        else:
            
            time_of_day.append('night')
    
    time_of_day = pd.Categorical(time_of_day, categories=['morning','afternoon','night'], ordered=True)
            
    return time_of_day

In [16]:
from datetime import datetime
import time

def datetime_from_utc_to_local(utc_datetime):
    
    """
    Converts a column from a UTC timestamp to local time, then returns the local time.
    """
    now_timestamp = time.time()
    offset = datetime.fromtimestamp(now_timestamp) - datetime.utcfromtimestamp(now_timestamp)
    return utc_datetime + offset

In [17]:
## Convert from UTC time to eastern time

spotify_data['local_time'] = datetime_from_utc_to_local(spotify_data.end_time)
spotify_data['local_time_of_day'] = time_of_day('local_time')

In [18]:
# Add day of week and organize days as categories

spotify_data['local_day_of_week'] = spotify_data['local_time'].dt.day_name()

spotify_data['local_day_of_week'] = pd.Categorical(spotify_data['local_day_of_week'], 
                                   categories=['Monday','Tuesday','Wednesday','Thursday','Friday','Saturday', 'Sunday'], 
                                   ordered=True)

In [19]:
## Create a pivot table by time of day and day of week

time_of_day_local_pivot = spotify_data.pivot_table(columns='local_time_of_day', index='local_day_of_week', 
                                             values='minutes_played', aggfunc=np.sum)

start_date = spotify_data.local_time.min()
end_date = spotify_data.local_time.max()

difference_in_weeks = (end_date - start_date).days / (7)
time_of_day_utc_pivot = time_of_day_local_pivot.divide(difference_in_weeks)

In [20]:
trace = go.Heatmap(z=time_of_day_utc_pivot.values,
                  x=time_of_day_utc_pivot.columns,
                  y=time_of_day_utc_pivot.index,
                  colorscale='Greens',
                  reversescale=True)
data=[trace]
py.iplot(data, filename='spotify_heatmap')

# Higher likelihood to play a song

In [21]:
## Read in playlsit data

with open('data/Playlist1.json', encoding='utf8') as f:
    playlist_data = json.load(f)

In [22]:
my_playlist = pd.DataFrame()

def extract_json_value_playlist(column_name, data_source = playlist_data):
    
    """
    Takes in a column from a JSON format, extracts the values, and returns a column that can be used
    in a data frame
    """
    
    column_values = []
    
    for i in range([len(i) for i in data_source['playlists']][0]):
        try:
            
            for k in range(len(data_source['playlists'][i]['tracks'])):

                column_values.append(data_source['playlists'][i]['tracks'][k][column_name])
                
        except:
            continue

    return column_values

my_playlist['artist_name'] = extract_json_value_playlist('artistName')
my_playlist['track_name'] = extract_json_value_playlist('trackName')

In [23]:
my_playlist.sample(frac=0.1, replace=True)

Unnamed: 0,artist_name,track_name


In [24]:
# Read in the library information

with open('data/YourLibrary.json', encoding='utf8') as f:
    library_data = json.load(f)

In [25]:
library_df = pd.DataFrame()

def extract_json_value_library(column_name, data_source=library_data):
    
    """
    Takes in a JSON series of values and converts them to a list
    """
    
    return [i[column_name] for i in data_source['tracks']]

library_df['artist_name'] = extract_json_value_library('artist')
library_df['album'] = extract_json_value_library('album')
library_df['track_name'] = extract_json_value_library('track')

In [26]:
library_df.sample(10)

Unnamed: 0,artist_name,album,track_name
1824,ILLENIUM,Awake,Beautiful Creatures (feat. MAX)
2192,Denzel Curry,RICKY,RICKY
1904,Syringe,Deathhead,Ugh
1154,Lost Tribe,Live at Ultra Music Festival Miami 2018 (Highl...,Gamemaster (Mix Cut) - Lightform Reborn Mix
1942,Virtual Riot,Disciple Alliance Vol. 4,Dog Fight
1625,Devilish Trio,The Collection,The Collection
204,Swedish House Mafia,One (Your Name),One (Your Name) - Radio Edit
801,$uicideBoy$,LIVE FAST DIE WHENEVER,Killing 2 Birds With 22 Stones ($UICIDEBOY$ X ...
45,Wavy Jone$,Death Note (feat. Bones),Death Note (feat. Bones)
415,Knife Party,Abandon Ship,Resistance


In [27]:
## Create a series of zipped artist and track names to be able to match values and see song overlap

artist_name_track_name_all = list(zip(spotify_data.artist_name, spotify_data.track_name))
artist_name_track_name_playlist = list(zip(my_playlist.artist_name, my_playlist.track_name))
artist_name_track_name_library = list(zip(library_df.artist_name, library_df.track_name))

In [28]:
# Determine if a given song/artist combination is in a playlist or library

song_in_playlist = [1 if i in artist_name_track_name_playlist else 0 for i in artist_name_track_name_all]
song_in_library = [1 if i in artist_name_track_name_library else 0 for i in artist_name_track_name_all]

In [29]:
# Add lists created above to the spotify data frame

spotify_data['song_in_playlist'] = song_in_playlist
spotify_data['song_in_library'] = song_in_library

In [30]:
print('{0:.0%} of the total songs played were also featured in a playlist'.format(spotify_data.song_in_playlist.sum()\
                                                                                 / (spotify_data.shape[0])))
print('{0:.0%} of the total songs played were also featured in my library'.format(spotify_data.song_in_library.sum()\
                                                                                 / (spotify_data.shape[0])))

0% of the total songs played were also featured in a playlist
71% of the total songs played were also featured in my library


In [31]:
# Determine the number of times a song was played if it's in a playlist or library compared to not being in one

num_times_played_playlist = []
num_times_played_nonplaylist = []

num_times_played_library = []
num_times_played_nonlibrary = []

for artist_song in set(artist_name_track_name_all):
    
    song_df = spotify_data[(spotify_data.artist_name == artist_song[0]) & (spotify_data.track_name == artist_song[1])]
        
    if (song_df.song_in_playlist.sum() > 0):
        
        num_times_played_playlist.append(len(song_df))
        
    else:
        
        num_times_played_nonplaylist.append(len(song_df))
        
    if (song_df.song_in_library.sum() > 0):
        
        num_times_played_library.append(len(song_df))
        
    else:
        
        num_times_played_nonlibrary.append(len(song_df))

In [32]:
x_data = ['Non Library', 'Library', 'Non Playlist', 'Playlist']

y_data = [num_times_played_nonlibrary, num_times_played_library, num_times_played_nonplaylist, num_times_played_playlist]

colors = ['rgba(93, 164, 214, 0.5)', 'rgba(255, 144, 14, 0.5)', 'rgba(44, 160, 101, 0.5)', 'rgba(255, 65, 54, 0.5)', 'rgba(207, 114, 255, 0.5)', 'rgba(127, 96, 0, 0.5)']

traces = []

for xd, yd, cls in zip(x_data, y_data, colors):
        traces.append(go.Box(
            y=yd,
            name=xd,
            boxpoints='all',
            jitter=0.5,
            whiskerwidth=0.2,
            fillcolor=cls,
            marker=dict(
                size=2,
            ),
            line=dict(width=1),
        ))

layout = go.Layout(
    title='Frequency of Song Playing by Category',
    yaxis=dict(
        autorange=True,
        showgrid=True,
        zeroline=True,
        dtick=5,
        gridcolor='rgb(255, 255, 255)',
        gridwidth=1,
        zerolinecolor='rgb(255, 255, 255)',
        zerolinewidth=2,
        title='Number of Plays'
    ),
    margin=dict(
        l=40,
        r=30,
        b=80,
        t=100,
    ),
    paper_bgcolor='rgb(243, 243, 243)',
    plot_bgcolor='rgb(243, 243, 243)',
    showlegend=False
)

fig = go.Figure(data=traces, layout=layout)
py.iplot(fig)

# Number of minutes listened per day

In [33]:
# Determine the number of minutes per day that I listened to songs

number_of_minutes_per_day = spotify_data.set_index('local_time')
number_of_minutes_per_day = pd.DataFrame(number_of_minutes_per_day.groupby(\
                            by=number_of_minutes_per_day.index.date)['minutes_played'].sum())

In [34]:
number_of_minutes_per_day.head()

Unnamed: 0,minutes_played
2019-11-17,30.796683
2019-11-18,202.467617
2019-11-19,199.0429
2019-11-20,166.180683
2019-11-21,206.03735


In [35]:
from chart_studio.grid_objs import Grid, Column
import plotly.figure_factory as ff

import time
from datetime import datetime

In [36]:
# Create a plot.ly-friendly table for minutes per day

table = ff.create_table(number_of_minutes_per_day)
py.iplot(table, filename='minutes_per_day_spotify')

In [37]:
# Formatting for the plot.ly chart based on code adapted from an example in their gallery

minutes_played = list(number_of_minutes_per_day['minutes_played'])

my_columns = []

for k in range(len(number_of_minutes_per_day.index) - 1):
    my_columns.append(Column(number_of_minutes_per_day.index[:k + 1], 'x{}'.format(k + 1)))   
    my_columns.append(Column(minutes_played[:k + 1], 'y{}'.format(k + 1)))
grid = Grid(my_columns)
py.grid_ops.upload(grid, 'minutes_played' + str(time.time()), auto_open=False)

'https://plot.ly/~bart23/60/'

In [38]:
data=[dict(type='scatter',
           xsrc=grid.get_column_reference('x1'),
           ysrc= grid.get_column_reference('y1'),
           name='Spotify',
           mode='lines',
           line=dict(color= 'rgb(0, 153, 255)'),
           fill='tozeroy',
           fillcolor='rgba(153, 204, 255, 0.5)')]

axis=dict(ticklen=4,
          mirror=True,
          zeroline=False,
          showline=True,
          autorange=False,
          showgrid=False)

layout = dict(title='Minutes Played per Day Spotify',
              font=dict(family='Balto'),
              showlegend=False,
              autosize=False,
              width=800,
              height=400,
              xaxis=dict(axis, **{'nticks':12, 'tickangle':-45,
                                  'range': [min(number_of_minutes_per_day.index),
                                            max(number_of_minutes_per_day.index)]}),
              yaxis=dict(axis, **{'title': 'Minutes', 'range':[0,max(number_of_minutes_per_day.minutes_played)+5]}),
              updatemenus=[dict(type='buttons',
                                showactive=False,
                                y=1,
                                x=1.1,
                                xanchor='right',
                                yanchor='top',
                                pad=dict(t=0, r=10),
                                buttons=[dict(label='Play',
                                              method='animate',
                                              args=[None, dict(frame=dict(duration=50, redraw=False), 
                                                               transition=dict(duration=0),
                                                               fromcurrent=True,
                                                               mode='immediate')])])])

frames=[{'data':[{'xsrc': grid.get_column_reference('x{}'.format(k + 1)),
                  'ysrc': grid.get_column_reference('y{}'.format(k + 1))}],
         'traces': [0]
        } for k in range(len(number_of_minutes_per_day.index) - 1)]

fig=dict(data=data, layout=layout, frames=frames)
py.icreate_animations(fig, 'Minutes_Per_Day_Spotify' + str(time.time()))

# Listening by Day of Week

In [39]:
# Set the index as the column for easier slicing

date_index = spotify_data.set_index('local_time')
date_index.index = date_index.index.date

In [40]:
# Merge the date index with day of week; remove duplicated values

merged_minutes_per_day = number_of_minutes_per_day.merge(date_index[['local_day_of_week']], left_index=True, right_index=True,
                                                        how='left', validate='1:m')

merged_minutes_per_day = merged_minutes_per_day[~merged_minutes_per_day.index.duplicated(keep='first')]

In [41]:
merged_minutes_per_day.head()

Unnamed: 0,minutes_played,local_day_of_week
2019-11-17,30.796683,Sunday
2019-11-18,202.467617,Monday
2019-11-19,199.0429,Tuesday
2019-11-20,166.180683,Wednesday
2019-11-21,206.03735,Thursday


In [42]:
# Find the number of minutes played per day of week

days = ['Monday','Tuesday','Wednesday','Thursday','Friday','Saturday','Sunday']

time_per_day = []

for day in days:
    
    df_days = merged_minutes_per_day[merged_minutes_per_day.local_day_of_week == day]
    
    time_per_day.append(list(df_days['minutes_played']))

In [43]:
traces = []

for i, j in enumerate(days):
    
    trace = go.Box(
    y=time_per_day[i],
    name = j,
    boxmean=True
    )
    traces.append(trace)

data = traces
py.iplot(data)

# Scatter Plot of Artist Plays

In [44]:
def time_of_day_scatter_df(df = spotify_data, time_of_day=None):
    
    """
    Takes in a data frame, filters it for the time of day if the variable is not None, and finds the 
    frequency of plays, minutes played, and unique songs by artists. The function returns a data frame
    with this information.
    """
    
    if time_of_day:
        df = df[df['local_time_of_day'] == time_of_day]
        
    grouped_artists = df.groupby(by='artist_name')
    
    number_of_plays = grouped_artists['track_name'].count()
    minutes_played = grouped_artists['minutes_played'].sum()
    unique_songs_played = [len(i) for i in grouped_artists['track_name'].unique()]
    
    scatter_df = pd.DataFrame({'number_of_plays':number_of_plays, 
                           'minutes_played':minutes_played,
                           'unique_songs_played':unique_songs_played},
                         index = number_of_plays.index)
    
    return scatter_df

In [45]:
scatter_df = time_of_day_scatter_df()

In [46]:
scatter_df.head()

Unnamed: 0_level_0,number_of_plays,minutes_played,unique_songs_played
artist_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
$uicideBoy$,38,44.088967,31
12th Planet,1,0.850133,1
16BL,1,0.013233,1
5 Seconds of Summer,2,0.04075,1
6 Dogs,1,0.01265,1


In [47]:
trace = go.Scatter(
    
    x = scatter_df.number_of_plays,
    
    y = [round(i, 1) for i in scatter_df.minutes_played],
    
    text = ['Artist: ' + str(i) + '<br>Minutes Played: ' + str(round(scatter_df.loc[i,'minutes_played'], 1)) +
            '<br>Unique Songs: ' + str(scatter_df.loc[i, 'unique_songs_played']) for i in scatter_df.index],
   
    marker = dict(size = [round(i, 1) for i in scatter_df.unique_songs_played], cmin=0, cmax = len(scatter_df),
                  colorscale='Viridis', color=[i for i in range(len(scatter_df))]),
    
    mode = 'markers'
)

data = [trace]

layout = go.Layout(
    title='Amount of Time Listened and Songs by Artist',
    xaxis=dict(
        title='Number of Songs Played',
        gridcolor='rgb(255, 255, 255)',
        zerolinewidth=1,
        ticklen=5,
        gridwidth=2,
    ),
    yaxis=dict(
        title='Minutes Played',
        gridcolor='rgb(255, 255, 255)',
        zerolinewidth=1,
        ticklen=5,
        gridwidth=2,
    ),
    paper_bgcolor='rgb(243, 243, 243)',
    plot_bgcolor='rgb(243, 243, 243)',
    
    annotations=[
        dict(
            
            text='Size = Number of Unique Songs Played',
            x=130,
            y=20,
            font={'color':'black'},
            bordercolor='#c7c7c7',
            borderwidth=2,
            borderpad=4,
            bgcolor='#fffaf0',
            opacity=0.8,
            showarrow=False

        )
    ]
)


fig = go.Figure(data=data, layout=layout)

py.iplot(fig, filename='artist-scatter')

In [48]:
button_layer_1_height = 1.12
button_layer_2_height = 1.065

scatter_morning = time_of_day_scatter_df(time_of_day='morning')
scatter_afternoon = time_of_day_scatter_df(time_of_day='afternoon')
scatter_evening = time_of_day_scatter_df(time_of_day='night')

trace_all = go.Scatter(
    
    x = scatter_df.number_of_plays,
    
    y = [round(i, 1) for i in scatter_df.minutes_played],
    
    text = ['Artist: ' + str(i) + '<br>Minutes Played: ' + str(round(scatter_df.loc[i,'minutes_played'], 1)) +
            '<br>Unique Songs: ' + str(scatter_df.loc[i, 'unique_songs_played']) for i in scatter_df.index],
   
    marker = dict(size = [round(i, 1) for i in scatter_df.unique_songs_played], cmin=0, cmax = len(scatter_df),
                  colorscale='Viridis', color=[i for i in range(len(scatter_df))]),
    
    mode = 'markers',
    visible=True,
    name='All'
)

trace_morning = go.Scatter(
    
    x = scatter_morning.number_of_plays,
    
    y = [round(i, 1) for i in scatter_morning.minutes_played],
    
    text = ['Artist: ' + str(i) + '<br>Minutes Played: ' + str(round(scatter_morning.loc[i,'minutes_played'], 1)) +
            '<br>Unique Songs: ' + str(scatter_morning.loc[i, 'unique_songs_played']) for i in scatter_morning.index],
   
    marker = dict(size = [round(i, 1) for i in scatter_morning.unique_songs_played], cmin=0, cmax = len(scatter_morning),
                  colorscale='Viridis', color=[i for i in range(len(scatter_morning))]),
    
    mode = 'markers',
    visible=False,
    name='Morning'
)

trace_afternoon = go.Scatter(
    
    x = scatter_afternoon.number_of_plays,
    
    y = [round(i, 1) for i in scatter_afternoon.minutes_played],
    
    text = ['Artist: ' + str(i) + '<br>Minutes Played: ' + str(round(scatter_afternoon.loc[i,'minutes_played'], 1)) +
            '<br>Unique Songs: ' + str(scatter_afternoon.loc[i, 'unique_songs_played']) for i in scatter_afternoon.index],
   
    marker = dict(size = [round(i, 1) for i in scatter_afternoon.unique_songs_played], cmin=0, cmax = len(scatter_afternoon),
                  colorscale='Viridis', color=[i for i in range(len(scatter_afternoon))]),
    
    mode = 'markers',
    visible=False,
    name='Afternoon'
    
)

trace_evening = go.Scatter(
    
    x = scatter_evening.number_of_plays,
    
    y = [round(i, 1) for i in scatter_evening.minutes_played],
    
    text = ['Artist: ' + str(i) + '<br>Minutes Played: ' + str(round(scatter_evening.loc[i,'minutes_played'], 1)) +
            '<br>Unique Songs: ' + str(scatter_evening.loc[i, 'unique_songs_played']) for i in scatter_evening.index],
   
    marker = dict(size = [round(i, 1) for i in scatter_evening.unique_songs_played], cmin=0, cmax = len(scatter_evening),
                  colorscale='Viridis', color=[i for i in range(len(scatter_evening))]),
    
    mode = 'markers',
    visible=False,
    name='Evening'
    
)

data = [trace_all, trace_morning, trace_afternoon, trace_evening]

updatemenus = list([
    dict(
         buttons=list([   
            dict(label = 'All',
                 method = 'update',
                 args = [{'visible': [True, False, False, False]},
                         {'title': 'All Day'}]),
            dict(label = 'Morning',
                 method = 'update',
                 args = [{'visible': [False, True, False, False]},
                         {'title': 'Morning'}]),
            dict(label = 'Afternoon',
                 method = 'update',
                 args = [{'visible': [False, False, True, False]},
                         {'title': 'Afternoon'}]),
            dict(label = 'Evening',
                 method = 'update',
                 args = [{'visible': [False, False, False, True]},
                         {'title': 'Evening'}])
        ]),
        type='buttons',
        direction = 'right',
        pad = {'r': 10, 't': 10},
        showactive = True,
        x = 0.3,
        xanchor = 'left',
        y = button_layer_1_height,
        yanchor = 'top' )
])
                
layout = dict(title='Artist Variance by Time of Day', showlegend=False,
              updatemenus=updatemenus)

fig = dict(data=data, layout=layout)

py.iplot(fig, filename='artist_by_time_of_day')