# Spotify Data Project

The goal of this project is to transform my Spotify data into a series of visual graphics that can tell me about my listening habits, including who I listen to, when, and how often.

In [11]:
# Import libraries

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns; sns.set()

import json
from pprint import pprint

import plotly
plotly.tools.set_credentials_file(username=(your_username), api_key=(your_apikey))
import plotly.plotly as py
import plotly.graph_objs as go

pd.set_option('display.max_rows', 500)

In [12]:
# Read in the data from a Json format and look at the results

with open('C:\\Users\\acer\\Documents\\Tutorials\\Spotify\\spotify2\\StreamingHistory.json', encoding='utf8') as f:
    data = json.load(f)

pprint(data[:5])

[{'artistName': 'Emma Bale',
  'endTime': '2018-11-26 08:31',
  'msPlayed': 2520,
  'trackName': 'Run - Lost Frequencies Radio Edit'},
 {'artistName': 'Thomas Hayes',
  'endTime': '2018-11-26 08:33',
  'msPlayed': 137320,
  'trackName': 'We Can Be Beautiful - Radio Edit'},
 {'artistName': 'Black Smurf',
  'endTime': '2018-11-26 08:36',
  'msPlayed': 165432,
  'trackName': 'Sorry I Warned U'},
 {'artistName': 'Black Smurf',
  'endTime': '2018-11-26 08:39',
  'msPlayed': 176875,
  'trackName': 'Bankrolls'},
 {'artistName': 'Black Smurf',
  'endTime': '2018-11-26 08:41',
  'msPlayed': 132284,
  'trackName': '411'}]


In [13]:
# Convert from JSON to a data frame

spotify_data = pd.DataFrame()

def extract_json_value(column_name):
    
    return [i[column_name] for i in data]

spotify_data['artist_name'] = extract_json_value('artistName')
spotify_data['end_time'] = extract_json_value('endTime')
spotify_data['ms_played'] = extract_json_value('msPlayed')
spotify_data['track_name'] = extract_json_value('trackName')

In [14]:
spotify_data.sample(10)

Unnamed: 0,artist_name,end_time,ms_played,track_name
2021,Pouya,2018-11-30 11:37,145609,Daddy Issues
1958,JGRXXN,2018-11-30 07:03,74160,Mask & Tones
986,Dretti Franks,2018-11-28 21:31,194,The Mud
1154,Lights & Motion,2018-11-28 21:31,0,Everest
2611,Datsik,2018-12-04 06:44,106267,Monster
2452,Trap City,2018-12-03 10:02,187988,Wasted
1722,Denzel Curry,2018-11-29 07:38,175906,SWITCH IT UP | ZWITCH 1T UP
394,Ramirez,2018-11-28 04:46,144812,The Bodies Fall Just Like the Leaves
686,Denzel Curry,2018-11-28 07:41,189974,Ultimate
782,Ramirez,2018-11-28 15:16,7720,Na Na Na Na Na (Caught Slippin)


In [15]:
spotify_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2827 entries, 0 to 2826
Data columns (total 4 columns):
artist_name    2827 non-null object
end_time       2827 non-null object
ms_played      2827 non-null int64
track_name     2827 non-null object
dtypes: int64(1), object(3)
memory usage: 88.4+ KB


# Format columns and add variables

In [16]:
# Convert the timestamp column to datetime

spotify_data['end_time'] = pd.to_datetime(spotify_data['end_time'])

In [17]:
# Add additional time columns for more interpretable times; drop the MS column

spotify_data['seconds_played'] = spotify_data.ms_played.divide(1000)
spotify_data['minutes_played'] = spotify_data.seconds_played.divide(60)

spotify_data.drop('ms_played', axis=1, inplace=True)

In [18]:
spotify_data.sample(10)

Unnamed: 0,artist_name,end_time,track_name,seconds_played,minutes_played
1116,$uicideBoy$,2018-11-28 21:31:00,Magazine,0.682,0.011367
881,ILLENIUM,2018-11-28 21:31:00,Lost,0.0,0.0
2083,Devilish Trio,2018-12-01 04:04:00,Trinity,168.557,2.809283
1759,Denzel Curry,2018-11-29 09:27:00,Ice Age,56.76,0.946
13,Black Smurf,2018-11-26 09:01:00,Inner Conflict,131.213,2.186883
70,Husman,2018-11-26 11:19:00,We Won't Fall - Extended Mix,6.57,0.1095
1490,The Chainsmokers,2018-11-28 21:31:00,Paris - VINAI Remix,2.379,0.03965
518,Lukas Graham,2018-11-28 04:46:00,7 Years,0.356,0.005933
448,Armin van Buuren,2018-11-28 04:46:00,Blah Blah Blah (Mix Cut),0.0,0.0
1010,5 Seconds of Summer,2018-11-28 21:31:00,Woke Up In Japan,0.36,0.006


# Most populat artists and songs

In [19]:
# Find the most popular artists by number of times played

most_popular_artists_by_count = spotify_data.groupby(by='artist_name')['track_name'].count().sort_values(ascending=False)[:25]

print('The most played artists by count were: \n\n{}'.format(most_popular_artists_by_count))

The most played artists by count were: 

artist_name
Ramirez             298
Black Smurf         297
$uicideBoy$         260
Denzel Curry        189
Trap City           161
Armin van Buuren     89
Wasted Penguinz      64
deadmau5             50
Night Lovell         43
Krewella             35
BONES                35
Devilish Trio        32
Andrew Rayel         27
Joyner Lucas         27
Yung Simmie          27
Cyber                24
Miss K8              24
Triple One           23
Headhunterz          21
RL Grime             20
Lights & Motion      19
Sullivan King        18
Knife Party          16
PhaseOne             16
Excision             15
Name: track_name, dtype: int64


In [20]:
# Visualize the most popular artists with a standard bar chart

data = [
    
    go.Bar(
            x=most_popular_artists_by_count.index,
            y=most_popular_artists_by_count,
            text=most_popular_artists_by_count,
            textposition='auto',
            opacity=0.75
            
    )]

layout = go.Layout(
    title='Popularity of Artists by Count',
    
    yaxis= dict(
        title='Number of Times Played',
        gridcolor='rgb(255, 255, 255)',
        zerolinewidth=1,
        ticklen=5,
        gridwidth=2,
        titlefont=dict(size=15))
)

fig = go.Figure(data=data, layout=layout)
        
py.iplot(fig, filename='popular_artists')

High five! You successfully sent some data to your account on plotly. View your plot in your browser at https://plot.ly/~bart23/0 or inside your plot.ly account where it is named 'popular_artists'



Consider using IPython.display.IFrame instead



In [21]:
# Look at most popular artists by amount of time played

most_popular_artists_by_time = spotify_data.groupby(by='artist_name')['minutes_played'].sum().sort_values(ascending=False)[:20]

most_popular_artists_by_time

artist_name
Black Smurf        469.178133
Ramirez            467.044667
Trap City          339.477800
Denzel Curry       302.793900
$uicideBoy$        285.568917
Wasted Penguinz    185.365450
Devilish Trio       75.726867
Oolacile            53.443533
PhaseOne            48.977750
Sullivan King       47.944083
Excision            44.674167
Yung Simmie         44.671317
Krewella            42.790350
Andrew Rayel        29.095900
Triple One          29.032417
Solstice            25.285450
RL Grime            24.779767
Headhunterz         24.139583
Sephyx              24.123417
Fat Nick            21.665600
Name: minutes_played, dtype: float64

In [22]:
# Look at the most popular songs played

most_popular_songs = spotify_data.track_name.value_counts().sort_values(ascending=False)[:20]

most_popular_songs

Sarcophagus III                                  20
Bankrolls                                        18
Sick & Tired                                     15
Money Hypnosis                                   14
SIRENS | Z1RENZ [FEAT. J.I.D | J.1.D]            14
The Tears from Marys Eyes                        14
All Gas No Brakes                                14
New Pain                                         14
Budget Cuts                                      13
The Mystical Warlock                             13
Inner Conflict                                   13
Northern Lights                                  12
I Am Hustle God                                  12
Full Metal                                       11
Intro                                            11
Self Management                                  11
The River Will Remain Cold                       11
Knotty Head                                      11
THE BLACKEST BALLOON | THE 13LACKEZT 13ALLOON    11
Day in the L

# Time of day to listen

In [23]:
# Create time of day variable

def time_of_day(datetime_column, df=spotify_data):
    
    """
    Takes in a datetime column and returns the time of day that the datetime occurs.
    
    Before 12 PM is considered morning, between 12 PM and 5 PM afternoon, and after 5 PM evening.
    """
    
    time_of_day = []
    
    for i in df[datetime_column]:
        
        i = i.hour
        
        if i <= 12:
            
            time_of_day.append('morning')
            
        elif i < 17:
            
            time_of_day.append('afternoon')
            
        else:
            
            time_of_day.append('night')
    
    time_of_day = pd.Categorical(time_of_day, categories=['morning','afternoon','night'], ordered=True)
            
    return time_of_day

In [24]:
from datetime import datetime
import time

def datetime_from_utc_to_local(utc_datetime):
    
    """
    Converts a column from a UTC timestamp to local time, then returns the local time.
    """
    now_timestamp = time.time()
    offset = datetime.fromtimestamp(now_timestamp) - datetime.utcfromtimestamp(now_timestamp)
    return utc_datetime + offset

In [25]:
## Convert from UTC time to eastern time

spotify_data['local_time'] = datetime_from_utc_to_local(spotify_data.end_time)
spotify_data['local_time_of_day'] = time_of_day('local_time')

In [26]:
# Add day of week and organize days as categories

spotify_data['local_day_of_week'] = spotify_data['local_time'].dt.day_name()

spotify_data['local_day_of_week'] = pd.Categorical(spotify_data['local_day_of_week'], 
                                   categories=['Monday','Tuesday','Wednesday','Thursday','Friday','Saturday', 'Sunday'], 
                                   ordered=True)

In [27]:
## Create a pivot table by time of day and day of week

time_of_day_local_pivot = spotify_data.pivot_table(columns='local_time_of_day', index='local_day_of_week', 
                                             values='minutes_played', aggfunc=np.sum)

start_date = spotify_data.local_time.min()
end_date = spotify_data.local_time.max()

difference_in_weeks = (end_date - start_date).days / (7)
time_of_day_utc_pivot = time_of_day_local_pivot.divide(difference_in_weeks)

In [28]:
trace = go.Heatmap(z=time_of_day_utc_pivot.values,
                  x=time_of_day_utc_pivot.columns,
                  y=time_of_day_utc_pivot.index,
                  colorscale='Greens',
                  reversescale=True)
data=[trace]
py.iplot(data, filename='spotify_heatmap')

# Higher likelihood to play a song

In [29]:
## Read in playlsit data

with open('C:\\Users\\acer\\Documents\\Tutorials\\Spotify\\spotify2\\Playlist.json', encoding='utf8') as f:
    playlist_data = json.load(f)

In [30]:
my_playlist = pd.DataFrame()

def extract_json_value_playlist(column_name, data_source = playlist_data):
    
    """
    Takes in a column from a JSON format, extracts the values, and returns a column that can be used
    in a data frame
    """
    
    column_values = []
    
    for i in range([len(i) for i in data_source['playlists']][0]):
        try:
            
            for k in range(len(data_source['playlists'][i]['tracks'])):

                column_values.append(data_source['playlists'][i]['tracks'][k][column_name])
                
        except:
            continue

    return column_values

my_playlist['artist_name'] = extract_json_value_playlist('artistName')
my_playlist['track_name'] = extract_json_value_playlist('trackName')

In [31]:
my_playlist.sample(10)

Unnamed: 0,artist_name,track_name
40,Knife Party,Give It Up
111,Seven Lions,Leaving Earth
33,Knife Party,Rage Valley
1,Andrew Rayel,We Bring The Love
115,Seven Lions,Strangers
63,Datsik,Freakuency
5,Wasted Penguinz,Euphoria
184,Victor Niglio,Blackout (feat. Hype Turner) - Original Mix
131,Various Artists,Finished Symphony - deadmau5 Remix
23,Armin van Buuren,Gotta Be Love


In [32]:
# Read in the library information

with open('C:\\Users\\acer\\Documents\\Tutorials\\Spotify\\spotify2\\YourLibrary.json', encoding='utf8') as f:
    library_data = json.load(f)

In [33]:
library_df = pd.DataFrame()

def extract_json_value_library(column_name, data_source=library_data):
    
    """
    Takes in a JSON series of values and converts them to a list
    """
    
    return [i[column_name] for i in data_source['tracks']]

library_df['artist_name'] = extract_json_value_library('artist')
library_df['album'] = extract_json_value_library('album')
library_df['track_name'] = extract_json_value_library('track')

In [34]:
library_df.sample(10)

Unnamed: 0,artist_name,album,track_name
379,Armin van Buuren,Live at Ultra Music Festival Miami 2018 (Highl...,Live at Ultra Music Festival Miami 2018 (Mix C...
1334,Yves V,Blow,Blow
672,Inteus,Chapter III,Man on a Mission
498,Ayla,Live at Ultra Music Festival Miami 2018 (Highl...,Ayla (Mix Cut) - Ben Nicky & Luke Bond Remix
403,Sebastian Ingrosso,Calling (Lose My Mind),Calling (Lose My Mind) - Extended Club Mix
167,Trap City,The Greatest Beats,Future Trap
176,Miss K8,Magnet,Scream - Radio Edit
797,John Dahlbäck,We Were Gods,We Were Gods - Radio Edit
1446,San Holo,I Still See Your Face,I Still See Your Face
509,Lit Lords,Swings,Swings


In [35]:
## Create a series of zipped artist and track names to be able to match values and see song overlap

artist_name_track_name_all = list(zip(spotify_data.artist_name, spotify_data.track_name))
artist_name_track_name_playlist = list(zip(my_playlist.artist_name, my_playlist.track_name))
artist_name_track_name_library = list(zip(library_df.artist_name, library_df.track_name))

In [36]:
# Determine if a given song/artist combination is in a playlist or library

song_in_playlist = [1 if i in artist_name_track_name_playlist else 0 for i in artist_name_track_name_all]
song_in_library = [1 if i in artist_name_track_name_library else 0 for i in artist_name_track_name_all]

In [37]:
# Add lists created above to the spotify data frame

spotify_data['song_in_playlist'] = song_in_playlist
spotify_data['song_in_library'] = song_in_library

In [38]:
print('{0:.0%} of the total songs played were also featured in a playlist'.format(spotify_data.song_in_playlist.sum()\
                                                                                 / (spotify_data.shape[0])))
print('{0:.0%} of the total songs played were also featured in my library'.format(spotify_data.song_in_library.sum()\
                                                                                 / (spotify_data.shape[0])))

3% of the total songs played were also featured in a playlist
87% of the total songs played were also featured in my library


In [39]:
# Determine the number of times a song was played if it's in a playlist or library compared to not being in one

num_times_played_playlist = []
num_times_played_nonplaylist = []

num_times_played_library = []
num_times_played_nonlibrary = []

for artist_song in set(artist_name_track_name_all):
    
    song_df = spotify_data[(spotify_data.artist_name == artist_song[0]) & (spotify_data.track_name == artist_song[1])]
        
    if (song_df.song_in_playlist.sum() > 0):
        
        num_times_played_playlist.append(len(song_df))
        
    else:
        
        num_times_played_nonplaylist.append(len(song_df))
        
    if (song_df.song_in_library.sum() > 0):
        
        num_times_played_library.append(len(song_df))
        
    else:
        
        num_times_played_nonlibrary.append(len(song_df))

In [40]:
x_data = ['Non Library', 'Library', 'Non Playlist', 'Playlist']

y_data = [num_times_played_nonlibrary, num_times_played_library, num_times_played_nonplaylist, num_times_played_playlist]

colors = ['rgba(93, 164, 214, 0.5)', 'rgba(255, 144, 14, 0.5)', 'rgba(44, 160, 101, 0.5)', 'rgba(255, 65, 54, 0.5)', 'rgba(207, 114, 255, 0.5)', 'rgba(127, 96, 0, 0.5)']

traces = []

for xd, yd, cls in zip(x_data, y_data, colors):
        traces.append(go.Box(
            y=yd,
            name=xd,
            boxpoints='all',
            jitter=0.5,
            whiskerwidth=0.2,
            fillcolor=cls,
            marker=dict(
                size=2,
            ),
            line=dict(width=1),
        ))

layout = go.Layout(
    title='Frequency of Song Playing by Category',
    yaxis=dict(
        autorange=True,
        showgrid=True,
        zeroline=True,
        dtick=5,
        gridcolor='rgb(255, 255, 255)',
        gridwidth=1,
        zerolinecolor='rgb(255, 255, 255)',
        zerolinewidth=2,
        title='Number of Plays'
    ),
    margin=dict(
        l=40,
        r=30,
        b=80,
        t=100,
    ),
    paper_bgcolor='rgb(243, 243, 243)',
    plot_bgcolor='rgb(243, 243, 243)',
    showlegend=False
)

fig = go.Figure(data=traces, layout=layout)
py.iplot(fig)

# Number of minutes listened per day

In [41]:
# Determine the number of minutes per day that I listened to songs

number_of_minutes_per_day = spotify_data.set_index('local_time')
number_of_minutes_per_day = pd.DataFrame(number_of_minutes_per_day.groupby(\
                            by=number_of_minutes_per_day.index.date)['minutes_played'].sum())

In [42]:
number_of_minutes_per_day.head()

Unnamed: 0,minutes_played
2018-11-26,293.35345
2018-11-27,352.738717
2018-11-28,456.773817
2018-11-29,501.023233
2018-11-30,287.705083


In [43]:
from plotly.grid_objs import Grid, Column
import plotly.figure_factory as ff

import time
from datetime import datetime

In [44]:
# Create a plot.ly-friendly table for minutes per day

table = ff.create_table(number_of_minutes_per_day)
py.iplot(table, filename='minutes_per_day_spotify')

In [45]:
# Formatting for the plot.ly chart based on code adapted from an example in their gallery

minutes_played = list(number_of_minutes_per_day['minutes_played'])

my_columns = []

for k in range(len(number_of_minutes_per_day.index) - 1):
    my_columns.append(Column(number_of_minutes_per_day.index[:k + 1], 'x{}'.format(k + 1)))   
    my_columns.append(Column(minutes_played[:k + 1], 'y{}'.format(k + 1)))
grid = Grid(my_columns)
py.grid_ops.upload(grid, 'minutes_played' + str(time.time()), auto_open=False)

'https://plot.ly/~bart23/8/'

In [46]:
data=[dict(type='scatter',
           xsrc=grid.get_column_reference('x1'),
           ysrc= grid.get_column_reference('y1'),
           name='Spotify',
           mode='lines',
           line=dict(color= 'rgb(0, 153, 255)'),
           fill='tozeroy',
           fillcolor='rgba(153, 204, 255, 0.5)')]

axis=dict(ticklen=4,
          mirror=True,
          zeroline=False,
          showline=True,
          autorange=False,
          showgrid=False)

layout = dict(title='Minutes Played per Day Spotify',
              font=dict(family='Balto'),
              showlegend=False,
              autosize=False,
              width=800,
              height=400,
              xaxis=dict(axis, **{'nticks':12, 'tickangle':-45,
                                  'range': [min(number_of_minutes_per_day.index),
                                            max(number_of_minutes_per_day.index)]}),
              yaxis=dict(axis, **{'title': 'Minutes', 'range':[0,max(number_of_minutes_per_day.minutes_played)+5]}),
              updatemenus=[dict(type='buttons',
                                showactive=False,
                                y=1,
                                x=1.1,
                                xanchor='right',
                                yanchor='top',
                                pad=dict(t=0, r=10),
                                buttons=[dict(label='Play',
                                              method='animate',
                                              args=[None, dict(frame=dict(duration=50, redraw=False), 
                                                               transition=dict(duration=0),
                                                               fromcurrent=True,
                                                               mode='immediate')])])])

frames=[{'data':[{'xsrc': grid.get_column_reference('x{}'.format(k + 1)),
                  'ysrc': grid.get_column_reference('y{}'.format(k + 1))}],
         'traces': [0]
        } for k in range(len(number_of_minutes_per_day.index) - 1)]

fig=dict(data=data, layout=layout, frames=frames)
py.icreate_animations(fig, 'Minutes_Per_Day_Spotify' + str(time.time()))

# Listening by Day of Week

In [47]:
# Set the index as the column for easier slicing

date_index = spotify_data.set_index('local_time')
date_index.index = date_index.index.date

In [48]:
# Merge the date index with day of week; remove duplicated values

merged_minutes_per_day = number_of_minutes_per_day.merge(date_index[['local_day_of_week']], left_index=True, right_index=True,
                                                        how='left', validate='1:m')

merged_minutes_per_day = merged_minutes_per_day[~merged_minutes_per_day.index.duplicated(keep='first')]

In [49]:
merged_minutes_per_day.head()

Unnamed: 0,minutes_played,local_day_of_week
2018-11-26,293.35345,Monday
2018-11-27,352.738717,Tuesday
2018-11-28,456.773817,Wednesday
2018-11-29,501.023233,Thursday
2018-11-30,287.705083,Friday


In [50]:
# Find the number of minutes played per day of week

days = ['Monday','Tuesday','Wednesday','Thursday','Friday','Saturday','Sunday']

time_per_day = []

for day in days:
    
    df_days = merged_minutes_per_day[merged_minutes_per_day.local_day_of_week == day]
    
    time_per_day.append(list(df_days['minutes_played']))

In [51]:
traces = []

for i, j in enumerate(days):
    
    trace = go.Box(
    y=time_per_day[i],
    name = j,
    boxmean=True
    )
    traces.append(trace)

data = traces
py.iplot(data)

# Scatter Plot of Artist Plays

In [52]:
def time_of_day_scatter_df(df = spotify_data, time_of_day=None):
    
    """
    Takes in a data frame, filters it for the time of day if the variable is not None, and finds the 
    frequency of plays, minutes played, and unique songs by artists. The function returns a data frame
    with this information.
    """
    
    if time_of_day:
        df = df[df['local_time_of_day'] == time_of_day]
        
    grouped_artists = df.groupby(by='artist_name')
    
    number_of_plays = grouped_artists['track_name'].count()
    minutes_played = grouped_artists['minutes_played'].sum()
    unique_songs_played = [len(i) for i in grouped_artists['track_name'].unique()]
    
    scatter_df = pd.DataFrame({'number_of_plays':number_of_plays, 
                           'minutes_played':minutes_played,
                           'unique_songs_played':unique_songs_played},
                         index = number_of_plays.index)
    
    return scatter_df

In [53]:
scatter_df = time_of_day_scatter_df()

In [54]:
scatter_df.head()

Unnamed: 0_level_0,number_of_plays,minutes_played,unique_songs_played
artist_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
$uicideBoy$,260,285.568917,129
12th Planet,3,10.88,2
3LAU,2,0.026367,1
5 Seconds of Summer,9,0.018433,9
6 Dogs,2,0.035417,2


In [55]:
trace = go.Scatter(
    
    x = scatter_df.number_of_plays,
    
    y = [round(i, 1) for i in scatter_df.minutes_played],
    
    text = ['Artist: ' + str(i) + '<br>Minutes Played: ' + str(round(scatter_df.loc[i,'minutes_played'], 1)) +
            '<br>Unique Songs: ' + str(scatter_df.loc[i, 'unique_songs_played']) for i in scatter_df.index],
   
    marker = dict(size = [round(i, 1) for i in scatter_df.unique_songs_played], cmin=0, cmax = len(scatter_df),
                  colorscale='Viridis', color=[i for i in range(len(scatter_df))]),
    
    mode = 'markers'
)

data = [trace]

layout = go.Layout(
    title='Amount of Time Listened and Songs by Artist',
    xaxis=dict(
        title='Number of Songs Played',
        gridcolor='rgb(255, 255, 255)',
        zerolinewidth=1,
        ticklen=5,
        gridwidth=2,
    ),
    yaxis=dict(
        title='Minutes Played',
        gridcolor='rgb(255, 255, 255)',
        zerolinewidth=1,
        ticklen=5,
        gridwidth=2,
    ),
    paper_bgcolor='rgb(243, 243, 243)',
    plot_bgcolor='rgb(243, 243, 243)',
    
    annotations=[
        dict(
            
            text='Size = Number of Unique Songs Played',
            x=130,
            y=20,
            font={'color':'black'},
            bordercolor='#c7c7c7',
            borderwidth=2,
            borderpad=4,
            bgcolor='#fffaf0',
            opacity=0.8,
            showarrow=False

        )
    ]
)


fig = go.Figure(data=data, layout=layout)

py.iplot(fig, filename='artist-scatter')

In [56]:
button_layer_1_height = 1.12
button_layer_2_height = 1.065

scatter_morning = time_of_day_scatter_df(time_of_day='morning')
scatter_afternoon = time_of_day_scatter_df(time_of_day='afternoon')
scatter_evening = time_of_day_scatter_df(time_of_day='night')

trace_all = go.Scatter(
    
    x = scatter_df.number_of_plays,
    
    y = [round(i, 1) for i in scatter_df.minutes_played],
    
    text = ['Artist: ' + str(i) + '<br>Minutes Played: ' + str(round(scatter_df.loc[i,'minutes_played'], 1)) +
            '<br>Unique Songs: ' + str(scatter_df.loc[i, 'unique_songs_played']) for i in scatter_df.index],
   
    marker = dict(size = [round(i, 1) for i in scatter_df.unique_songs_played], cmin=0, cmax = len(scatter_df),
                  colorscale='Viridis', color=[i for i in range(len(scatter_df))]),
    
    mode = 'markers',
    visible=True,
    name='All'
)

trace_morning = go.Scatter(
    
    x = scatter_morning.number_of_plays,
    
    y = [round(i, 1) for i in scatter_morning.minutes_played],
    
    text = ['Artist: ' + str(i) + '<br>Minutes Played: ' + str(round(scatter_morning.loc[i,'minutes_played'], 1)) +
            '<br>Unique Songs: ' + str(scatter_morning.loc[i, 'unique_songs_played']) for i in scatter_morning.index],
   
    marker = dict(size = [round(i, 1) for i in scatter_morning.unique_songs_played], cmin=0, cmax = len(scatter_morning),
                  colorscale='Viridis', color=[i for i in range(len(scatter_morning))]),
    
    mode = 'markers',
    visible=False,
    name='Morning'
)

trace_afternoon = go.Scatter(
    
    x = scatter_afternoon.number_of_plays,
    
    y = [round(i, 1) for i in scatter_afternoon.minutes_played],
    
    text = ['Artist: ' + str(i) + '<br>Minutes Played: ' + str(round(scatter_afternoon.loc[i,'minutes_played'], 1)) +
            '<br>Unique Songs: ' + str(scatter_afternoon.loc[i, 'unique_songs_played']) for i in scatter_afternoon.index],
   
    marker = dict(size = [round(i, 1) for i in scatter_afternoon.unique_songs_played], cmin=0, cmax = len(scatter_afternoon),
                  colorscale='Viridis', color=[i for i in range(len(scatter_afternoon))]),
    
    mode = 'markers',
    visible=False,
    name='Afternoon'
    
)

trace_evening = go.Scatter(
    
    x = scatter_evening.number_of_plays,
    
    y = [round(i, 1) for i in scatter_evening.minutes_played],
    
    text = ['Artist: ' + str(i) + '<br>Minutes Played: ' + str(round(scatter_evening.loc[i,'minutes_played'], 1)) +
            '<br>Unique Songs: ' + str(scatter_evening.loc[i, 'unique_songs_played']) for i in scatter_evening.index],
   
    marker = dict(size = [round(i, 1) for i in scatter_evening.unique_songs_played], cmin=0, cmax = len(scatter_evening),
                  colorscale='Viridis', color=[i for i in range(len(scatter_evening))]),
    
    mode = 'markers',
    visible=False,
    name='Evening'
    
)

data = [trace_all, trace_morning, trace_afternoon, trace_evening]

updatemenus = list([
    dict(
         buttons=list([   
            dict(label = 'All',
                 method = 'update',
                 args = [{'visible': [True, False, False, False]},
                         {'title': 'All Day'}]),
            dict(label = 'Morning',
                 method = 'update',
                 args = [{'visible': [False, True, False, False]},
                         {'title': 'Morning'}]),
            dict(label = 'Afternoon',
                 method = 'update',
                 args = [{'visible': [False, False, True, False]},
                         {'title': 'Afternoon'}]),
            dict(label = 'Evening',
                 method = 'update',
                 args = [{'visible': [False, False, False, True]},
                         {'title': 'Evening'}])
        ]),
        type='buttons',
        direction = 'right',
        pad = {'r': 10, 't': 10},
        showactive = True,
        x = 0.3,
        xanchor = 'left',
        y = button_layer_1_height,
        yanchor = 'top' )
])
                
layout = dict(title='Artist Variance by Time of Day', showlegend=False,
              updatemenus=updatemenus)

fig = dict(data=data, layout=layout)

py.iplot(fig, filename='artist_by_time_of_day')