In [257]:
import pandas as pd
import numpy as np
from matplotlib import pyplot as plt
from collections import Counter
import datetime

import plotly.graph_objs as go
import plotly.offline as pyo
from plotly import subplots 

In [180]:
df = pd.read_json('my_spotify_data/endsong_0.json')
df1 = pd.read_json('my_spotify_data/endsong_1.json')
df2 = pd.read_json('my_spotify_data/endsong_2.json')
df3 = pd.read_json('my_spotify_data/endsong_3.json')

frames = [df, df1, df2, df3]

df = pd.concat(frames)

df.drop(['username','ip_addr_decrypted', 'platform', 'conn_country', 'user_agent_decrypted', 'spotify_track_uri', 'spotify_episode_uri', 'shuffle','skipped','offline_timestamp','offline','incognito_mode', 'reason_start'], axis=1, inplace=True)
df.columns = ['Date', 'Duration','Track', 'Artist','Album', 'Episode','Podcast', 'Reason to End']

df['Time'] = df['Date'].str.split('T', expand=True)[1].str[:-1]
df['Date'] = df['Date'].str.split('T', expand=True)[0]
df[['Hours','Minutes','Seconds']] = df['Time'].str.split(':',n=2, expand=True)
df['Hours'] = df['Hours'].astype(int)

df['Date'] = pd.to_datetime(df['Date'], format='%Y-%m-%d')

df['Duration'] = pd.to_numeric(df['Duration'])/(60*1000)

df.sort_values(by='Date',inplace=True)
df.reset_index(drop=True, inplace=True)

df_podcasts = df[df['Episode'].notna()].copy()
df_podcasts.drop(['Track','Artist','Album'], axis=1, inplace=True)
df_podcasts.reset_index(drop=True, inplace=True)

df_tracks = df[df['Track'].notna()].copy()
df_tracks.drop(['Episode','Podcast'], axis=1, inplace=True)
df_tracks.reset_index(drop=True, inplace=True)

In [181]:
artists_freq =  pd.DataFrame(Counter(df_tracks['Artist']).most_common(100))

data = [go.Bar(y=artists_freq[1],
               x=artists_freq[0],
               orientation='v')]

layout = go.Layout(dict(
    title='Top 100 - Artists',
    barmode='stack',
    font=dict(size=10)
))

fig = go.Figure(data=data, layout=layout)
pyo.plot(fig, filename='top100_artists.html')

'top100_artists.html'

In [182]:
tracks_freq =  pd.DataFrame(Counter(df_tracks['Track']).most_common(100))

data = [go.Bar(y=tracks_freq[1],
               x=tracks_freq[0],
               orientation='v')]

layout = go.Layout(dict(
    title='Top 100 - Tracks',
    barmode='stack',
    font=dict(size=10)
))

fig = go.Figure(data=data, layout=layout)
pyo.plot(fig, filename='top100_tracks.html')

'top100_tracks.html'

In [183]:
data = [go.Heatmap(x=df_tracks['Date'].dt.day_of_week,
                   y=df_tracks['Hours'],
                   z=df_tracks['Duration'],
                   colorscale='YlOrRd')]


layout = go.Layout(dict(
    title='Time heard during the days of a week',
    xaxis=dict(title='Day of the week',
               tickmode='array',
               tickvals=[0,1,2,3,4,5,6],
               ticktext=['Monday','Tuesday', 'Wednesday','Thursday','Friday','Saturday','Sunday']),
    yaxis=dict(title='Time of the day',
               autorange='reversed')
))

fig = go.Figure(data=data, layout=layout)
pyo.plot(fig, filename='heatmap_duration_week_time.html')

'heatmap_duration_week_time.html'

In [184]:
data = [go.Heatmap(x=df_tracks['Date'].dt.day_of_year,
                   y=df_tracks['Hours'],
                   z=df_tracks['Duration'],
                   colorscale='YlOrRd')]


layout = go.Layout(dict(
    title='Time heard during the days of a year',
    xaxis=dict(title='Day of the year',
               tickmode='array',
               tickvals=[31,59,90,120,151,181,212,243,273,304,334,365],
               ticktext=['Jan','Fev','Mar','Abr','Mai','Jun','Jul','Ago','Set','Out','Nov','Dez']),
    yaxis=dict(title='Time of the day',
               autorange='reversed')
))

fig = go.Figure(data=data, layout=layout)
pyo.plot(fig, filename='heatmap_duration_year_time.html')

'heatmap_duration_year_time.html'

In [185]:
data = [go.Bar(x=df_tracks['Date'].dt.day_of_year,
                   y=df_tracks['Duration'])]


layout = go.Layout(dict(
    title='Time heard by day of the year',
    xaxis=dict(title='Day of the year',
               tickmode='array',
               tickvals=[31-15,59-15,90-15,120-15,151-15,181-15,212-15,243-15,273-15,304-15,334-15,365-15],
               ticktext=['Jan','Fev','Mar','Abr','Mai','Jun','Jul','Ago','Set','Out','Nov','Dez']),
    yaxis=dict(title='Duration')
))

fig = go.Figure(data=data, layout=layout)
pyo.plot(fig, filename='bar_duration_year.html')

'bar_duration_year.html'

In [186]:
df_2014 = df_tracks[(df_tracks['Date'] >= '2014-01-01') & (df_tracks['Date'] <= '2014-12-31')]
df_2015 = df_tracks[(df_tracks['Date'] >= '2015-01-01') & (df_tracks['Date'] <= '2015-12-31')]
df_2016 = df_tracks[(df_tracks['Date'] >= '2016-01-01') & (df_tracks['Date'] <= '2016-12-31')]
df_2017 = df_tracks[(df_tracks['Date'] >= '2017-01-01') & (df_tracks['Date'] <= '2017-12-31')]
df_2018 = df_tracks[(df_tracks['Date'] >= '2018-01-01') & (df_tracks['Date'] <= '2018-12-31')]
df_2019 = df_tracks[(df_tracks['Date'] >= '2019-01-01') & (df_tracks['Date'] <= '2019-12-31')]
df_2020 = df_tracks[(df_tracks['Date'] >= '2020-01-01') & (df_tracks['Date'] <= '2020-12-31')]
df_2021 = df_tracks[(df_tracks['Date'] >= '2021-01-01') & (df_tracks['Date'] <= '2021-12-31')]
df_2022 = df_tracks[(df_tracks['Date'] >= '2022-01-01') & (df_tracks['Date'] <= '2022-12-31')]

data = [go.Bar(x=df_2014['Date'].dt.day_of_year,
               y=df_2014['Duration'],
               name='2014',
               width=1),
        go.Bar(x=df_2015['Date'].dt.day_of_year,
               y=df_2015['Duration'],
               name='2015',
               width=1),
        go.Bar(x=df_2016['Date'].dt.day_of_year,
               y=df_2016['Duration'],
               name='2016',
               width=1),
        go.Bar(x=df_2017['Date'].dt.day_of_year,
               y=df_2017['Duration'],
               name='2017',
               width=1),
        go.Bar(x=df_2018['Date'].dt.day_of_year,
               y=df_2018['Duration'],
               name='2018',
               width=1),
        go.Bar(x=df_2019['Date'].dt.day_of_year,
               y=df_2019['Duration'],
               name='2019',
               width=1),
        go.Bar(x=df_2020['Date'].dt.day_of_year,
               y=df_2020['Duration'],
               name='2020',
               width=1),
        go.Bar(x=df_2021['Date'].dt.day_of_year,
               y=df_2021['Duration'],
               name='2021',
               width=1),
        go.Bar(x=df_2022['Date'].dt.day_of_year,
               y=df_2022['Duration'],
               name='2022',
               width=1)
        ]


layout = go.Layout(dict(
    title='Time heard by day of the year',
    xaxis=dict(title='Day of the year',
               tickmode='array',
               tickvals=[31-15,59-15,90-15,120-15,151-15,181-15,212-15,243-15,273-15,304-15,334-15,365-15],
               ticktext=['Jan','Fev','Mar','Abr','Mai','Jun','Jul','Ago','Set','Out','Nov','Dez']),
    yaxis=dict(title='Duration'),
    barmode='stack',
#     barnorm='percent'
))

fig = go.Figure(data=data, layout=layout)
pyo.plot(fig, filename='bar_duration_per_year.html')

'bar_duration_per_year.html'

In [187]:
df_2014 = df_tracks[(df_tracks['Date'] >= '2014-01-01') & (df_tracks['Date'] <= '2014-12-31')]
df_2015 = df_tracks[(df_tracks['Date'] >= '2015-01-01') & (df_tracks['Date'] <= '2015-12-31')]
df_2016 = df_tracks[(df_tracks['Date'] >= '2016-01-01') & (df_tracks['Date'] <= '2016-12-31')]
df_2017 = df_tracks[(df_tracks['Date'] >= '2017-01-01') & (df_tracks['Date'] <= '2017-12-31')]
df_2018 = df_tracks[(df_tracks['Date'] >= '2018-01-01') & (df_tracks['Date'] <= '2018-12-31')]
df_2019 = df_tracks[(df_tracks['Date'] >= '2019-01-01') & (df_tracks['Date'] <= '2019-12-31')]
df_2020 = df_tracks[(df_tracks['Date'] >= '2020-01-01') & (df_tracks['Date'] <= '2020-12-31')]
df_2021 = df_tracks[(df_tracks['Date'] >= '2021-01-01') & (df_tracks['Date'] <= '2021-12-31')]
df_2022 = df_tracks[(df_tracks['Date'] >= '2022-01-01') & (df_tracks['Date'] <= '2022-12-31')]

duration_2014 = df_2014.groupby(df_tracks.Date.dt.day_of_year)['Duration'].sum()
duration_2015 = df_2015.groupby(df_tracks.Date.dt.day_of_year)['Duration'].sum()
duration_2016 = df_2016.groupby(df_tracks.Date.dt.day_of_year)['Duration'].sum()
duration_2017 = df_2017.groupby(df_tracks.Date.dt.day_of_year)['Duration'].sum()
duration_2018 = df_2018.groupby(df_tracks.Date.dt.day_of_year)['Duration'].sum()
duration_2019 = df_2019.groupby(df_tracks.Date.dt.day_of_year)['Duration'].sum()
duration_2020 = df_2020.groupby(df_tracks.Date.dt.day_of_year)['Duration'].sum()
duration_2021 = df_2021.groupby(df_tracks.Date.dt.day_of_year)['Duration'].sum()
duration_2022 = df_2022.groupby(df_tracks.Date.dt.day_of_year)['Duration'].sum()
time = np.arange(1,367,1)

data = [go.Bar(x=time,
               y=duration_2014,
               name='2014'),
        go.Bar(x=time,
               y=duration_2015,
               name='2015'),
        go.Bar(x=time,
               y=duration_2016,
               name='2016'),
        go.Bar(x=time,
               y=duration_2017,
               name='2017'),
        go.Bar(x=time,
               y=duration_2018,
               name='2018'),
        go.Bar(x=time,
               y=duration_2019,
               name='2019'),
        go.Bar(x=time,
               y=duration_2020,
               name='2020'),
        go.Bar(x=time,
               y=duration_2021,
               name='2021'),
        go.Bar(x=time,
               y=duration_2022,
               name='2022'),
        ]


layout = go.Layout(dict(
    title='Time heard by day of the year',
    xaxis=dict(title='Day of the year',
               tickmode='array',
               tickvals=[31-15,59-15,90-15,120-15,151-15,181-15,212-15,243-15,273-15,304-15,334-15,365-15],
               ticktext=['Jan','Fev','Mar','Abr','Mai','Jun','Jul','Ago','Set','Out','Nov','Dez']),
    yaxis=dict(title='Duration'),
    barmode='stack'
))

fig = go.Figure(data=data, layout=layout)
pyo.plot(fig, filename='bar_grouped_duration_year.html')

'bar_grouped_duration_year.html'

In [188]:
df_2014 = df_tracks[(df_tracks['Date'] >= '2014-01-01') & (df_tracks['Date'] <= '2014-12-31')]
df_2015 = df_tracks[(df_tracks['Date'] >= '2015-01-01') & (df_tracks['Date'] <= '2015-12-31')]
df_2016 = df_tracks[(df_tracks['Date'] >= '2016-01-01') & (df_tracks['Date'] <= '2016-12-31')]
df_2017 = df_tracks[(df_tracks['Date'] >= '2017-01-01') & (df_tracks['Date'] <= '2017-12-31')]
df_2018 = df_tracks[(df_tracks['Date'] >= '2018-01-01') & (df_tracks['Date'] <= '2018-12-31')]
df_2019 = df_tracks[(df_tracks['Date'] >= '2019-01-01') & (df_tracks['Date'] <= '2019-12-31')]
df_2020 = df_tracks[(df_tracks['Date'] >= '2020-01-01') & (df_tracks['Date'] <= '2020-12-31')]
df_2021 = df_tracks[(df_tracks['Date'] >= '2021-01-01') & (df_tracks['Date'] <= '2021-12-31')]
df_2022 = df_tracks[(df_tracks['Date'] >= '2022-01-01') & (df_tracks['Date'] <= '2022-12-31')]

duration_2014 = df_2014.groupby(df_tracks.Date.dt.day_of_week)['Duration'].sum()
duration_2015 = df_2015.groupby(df_tracks.Date.dt.day_of_week)['Duration'].sum()
duration_2016 = df_2016.groupby(df_tracks.Date.dt.day_of_week)['Duration'].sum()
duration_2017 = df_2017.groupby(df_tracks.Date.dt.day_of_week)['Duration'].sum()
duration_2018 = df_2018.groupby(df_tracks.Date.dt.day_of_week)['Duration'].sum()
duration_2019 = df_2019.groupby(df_tracks.Date.dt.day_of_week)['Duration'].sum()
duration_2020 = df_2020.groupby(df_tracks.Date.dt.day_of_week)['Duration'].sum()
duration_2021 = df_2021.groupby(df_tracks.Date.dt.day_of_week)['Duration'].sum()
duration_2022 = df_2022.groupby(df_tracks.Date.dt.day_of_week)['Duration'].sum()
time = np.arange(1,8,1)

data = [go.Bar(x=time,
               y=duration_2014,
               name='2014'),
        go.Bar(x=time,
               y=duration_2015,
               name='2015'),
        go.Bar(x=time,
               y=duration_2016,
               name='2016'),
        go.Bar(x=time,
               y=duration_2017,
               name='2017'),
        go.Bar(x=time,
               y=duration_2018,
               name='2018'),
        go.Bar(x=time,
               y=duration_2019,
               name='2019'),
        go.Bar(x=time,
               y=duration_2020,
               name='2020'),
        go.Bar(x=time,
               y=duration_2021,
               name='2021'),
        go.Bar(x=time,
               y=duration_2022,
               name='2022'),
        ]


layout = go.Layout(dict(
    title='Time heard by day of the week',
    xaxis=dict(title='Day of the week',
               tickmode='array',
               tickvals=time,
               ticktext=['Monday','Tuesday', 'Wednesday','Thursday','Friday','Saturday','Sunday']),
    yaxis=dict(title='Duration'),
    barmode='stack'
))

fig = go.Figure(data=data, layout=layout)
pyo.plot(fig, filename='bar_grouped_duration_week.html')

'bar_grouped_duration_week.html'

In [189]:
artist = 'Imagine Dragons'
df_artist = df_tracks[df_tracks['Artist'].str.contains(artist)]

df_2014 = df_artist[(df_artist['Date'] >= '2014-01-01') & (df_artist['Date'] <= '2014-12-31')]
df_2015 = df_artist[(df_artist['Date'] >= '2015-01-01') & (df_artist['Date'] <= '2015-12-31')]
df_2016 = df_artist[(df_artist['Date'] >= '2016-01-01') & (df_artist['Date'] <= '2016-12-31')]
df_2017 = df_artist[(df_artist['Date'] >= '2017-01-01') & (df_artist['Date'] <= '2017-12-31')]
df_2018 = df_artist[(df_artist['Date'] >= '2018-01-01') & (df_artist['Date'] <= '2018-12-31')]
df_2019 = df_artist[(df_artist['Date'] >= '2019-01-01') & (df_artist['Date'] <= '2019-12-31')]
df_2020 = df_artist[(df_artist['Date'] >= '2020-01-01') & (df_artist['Date'] <= '2020-12-31')]
df_2021 = df_artist[(df_artist['Date'] >= '2021-01-01') & (df_artist['Date'] <= '2021-12-31')]
df_2022 = df_artist[(df_artist['Date'] >= '2022-01-01') & (df_artist['Date'] <= '2022-12-31')]

duration_2014 = df_2014.groupby(df_2014.Date.dt.month)['Duration'].sum()
duration_2015 = df_2015.groupby(df_2015.Date.dt.month)['Duration'].sum()
duration_2016 = df_2016.groupby(df_2016.Date.dt.month)['Duration'].sum()
duration_2017 = df_2017.groupby(df_2017.Date.dt.month)['Duration'].sum()
duration_2018 = df_2018.groupby(df_2018.Date.dt.month)['Duration'].sum()
duration_2019 = df_2019.groupby(df_2019.Date.dt.month)['Duration'].sum()
duration_2020 = df_2020.groupby(df_2020.Date.dt.month)['Duration'].sum()
duration_2021 = df_2021.groupby(df_2021.Date.dt.month)['Duration'].sum()
duration_2022 = df_2022.groupby(df_2022.Date.dt.month)['Duration'].sum()
time = np.arange(1,13,1)

data = [go.Bar(x=time,
               y=duration_2014,
               name='2014'),
        go.Bar(x=time,
               y=duration_2015,
               name='2015'),
        go.Bar(x=time,
               y=duration_2016,
               name='2016'),
        go.Bar(x=time,
               y=duration_2017,
               name='2017'),
        go.Bar(x=time,
               y=duration_2018,
               name='2018'),
        go.Bar(x=time,
               y=duration_2019,
               name='2019'),
        go.Bar(x=time,
               y=duration_2020,
               name='2020'),
        go.Bar(x=time,
               y=duration_2021,
               name='2021'),
        go.Bar(x=time,
               y=duration_2022,
               name='2022'),
        ]


layout = go.Layout(dict(
    title='Time heard by month of ' + artist,
    xaxis=dict(title='Month',
               tickmode='array',
               tickvals=time,
               ticktext=['Jan','Fev','Mar','Abr','Mai','Jun','Jul','Ago','Set','Out','Nov','Dez']),
    yaxis=dict(title='Duration'),
    barmode='stack'
))

fig = go.Figure(data=data, layout=layout)
pyo.plot(fig, filename= artist+'_duration_month.html')

'Imagine Dragons_duration_month.html'

In [233]:
artist1 = 'AC'
artist2 = 'Metallica'

df_artist1 = df_tracks[df_tracks['Artist'].str.contains(artist1)]
df_artist2 = df_tracks[df_tracks['Artist'].str.contains(artist2)]

duration1 = df_artist1.groupby(df_artist1.Date.dt.month)['Duration'].sum()
duration2 = df_artist2.groupby(df_artist2.Date.dt.month)['Duration'].sum()

time = np.arange(1,13,1)

data = [go.Bar(x=time,
               y=duration1,
               name=df_artist1['Artist'].iloc[0]),
        go.Bar(x=time,
               y=duration2,
               name=df_artist2['Artist'].iloc[0])]

layout = go.Layout(title='Comparison between ' + artist1 + ' and ' + artist2,
                   xaxis=dict(
                       title='Months',
                       tickvals=time,
                       ticktext=['Jan','Fev','Mar','Abr','Mai','Jun','Jul','Ago','Set','Out','Nov','Dez'],),
                   yaxis=dict(title='Duration'),
                   barmode='group')

fig = go.Figure(data, layout)
pyo.plot(fig, filename='comparison_between_'+artist1+'_and_'+artist2+'.html')

'comparison_between_AC_and_Metallica.html'

In [234]:
TOP = 10

artists = np.asarray(Counter(df_tracks['Artist']).most_common(TOP))
    
data = [go.Bar(x=df_tracks[df_tracks['Artist'].str.contains(artist)].groupby(df_tracks.Date.dt.year)['Duration'].sum().index,
               y=df_tracks[df_tracks['Artist'].str.contains(artist)].groupby(df_tracks.Date.dt.year)['Duration'].sum(),
               name=artist) for artist in artists[:,0]]

layout = go.Layout(title='Comparison between top ' + str(TOP) + ' artists',
                   xaxis=dict(
                       title='Years'
                    ),
                   yaxis=dict(title='Duration'),
                   barmode='group')

fig = go.Figure(data, layout)
pyo.plot(fig, filename='comparison_between_top'+str(TOP)+'.html')

'comparison_between_top10.html'

In [226]:
TOP = 10

artists = np.asarray(Counter(df_tracks['Artist']).most_common(TOP))

data = [go.Scatter(x=df_tracks[df_tracks['Artist'].str.contains(artist)]['Date'],
                   y=df_tracks[df_tracks['Artist'].str.contains(artist)]['Duration'].cumsum(),
                   name=artist) for artist in artists[:,0]]

layout = go.Layout(title='Comparison between top '+str(TOP)+' artists',
                   xaxis=dict(
                       title='Time',
                    ),
                   yaxis=dict(title='Total Duration'),
                   barmode='group')

fig = go.Figure(data, layout)
pyo.plot(fig, filename='comparison_between_top'+str(TOP)+'_all_time.html')

'comparison_between_top10_all_time.html'

In [286]:
TOP = 10

df_2014 = df_tracks[(df_tracks['Date'] >= '2014-01-01') & (df_tracks['Date'] <= '2014-12-31')]
df_2015 = df_tracks[(df_tracks['Date'] >= '2015-01-01') & (df_tracks['Date'] <= '2015-12-31')]
df_2016 = df_tracks[(df_tracks['Date'] >= '2016-01-01') & (df_tracks['Date'] <= '2016-12-31')]
df_2017 = df_tracks[(df_tracks['Date'] >= '2017-01-01') & (df_tracks['Date'] <= '2017-12-31')]
df_2018 = df_tracks[(df_tracks['Date'] >= '2018-01-01') & (df_tracks['Date'] <= '2018-12-31')]
df_2019 = df_tracks[(df_tracks['Date'] >= '2019-01-01') & (df_tracks['Date'] <= '2019-12-31')]
df_2020 = df_tracks[(df_tracks['Date'] >= '2020-01-01') & (df_tracks['Date'] <= '2020-12-31')]
df_2021 = df_tracks[(df_tracks['Date'] >= '2021-01-01') & (df_tracks['Date'] <= '2021-12-31')]
df_2022 = df_tracks[(df_tracks['Date'] >= '2022-01-01') & (df_tracks['Date'] <= '2022-12-31')]

artists_2014 = np.asarray(Counter(df_2014['Artist']).most_common(TOP))
artists_2015 = np.asarray(Counter(df_2015['Artist']).most_common(TOP))
artists_2016 = np.asarray(Counter(df_2016['Artist']).most_common(TOP))
artists_2017 = np.asarray(Counter(df_2017['Artist']).most_common(TOP))
artists_2018 = np.asarray(Counter(df_2018['Artist']).most_common(TOP))
artists_2019 = np.asarray(Counter(df_2019['Artist']).most_common(TOP))
artists_2020 = np.asarray(Counter(df_2020['Artist']).most_common(TOP))
artists_2021 = np.asarray(Counter(df_2021['Artist']).most_common(TOP))
artists_2022 = np.asarray(Counter(df_2022['Artist']).most_common(TOP))


fig = subplots.make_subplots(rows=1, cols=9, subplot_titles=['2014', '2015', '2016','2017','2018','2019','2020','2021','2022'])

data_2014 = go.Bar(x=[2014, 2014, 2014],
                    y=[df_2014[df_2014['Artist'].str.contains(artists_2014[0,0])]['Duration'].sum(),
                       df_2014[df_2014['Artist'].str.contains(artists_2014[1,0])]['Duration'].sum(), 
                       df_2014[df_2014['Artist'].str.contains(artists_2014[2,0])]['Duration'].sum()],
                    name='2014',
                    hover_name=artists_2014[:,0])
fig.add_trace(data_2014,1,1)

data_2015 = go.Bar(x=[2015, 2015, 2015],
                    y=[df_2015[df_2015['Artist'].str.contains(artists_2015[0,0])]['Duration'].sum(),
                       df_2015[df_2015['Artist'].str.contains(artists_2015[1,0])]['Duration'].sum(), 
                       df_2015[df_2015['Artist'].str.contains(artists_2015[2,0])]['Duration'].sum()],
                    name='2015')
fig.add_trace(data_2015,1,2)

data_2016 = go.Bar(x=[2016, 2016, 2016],
                    y=[df_2016[df_2016['Artist'].str.contains(artists_2016[0,0])]['Duration'].sum(),
                       df_2016[df_2016['Artist'].str.contains(artists_2016[1,0])]['Duration'].sum(), 
                       df_2016[df_2016['Artist'].str.contains(artists_2016[2,0])]['Duration'].sum()],
                    name='2016')
fig.add_trace(data_2016,1,3)

data_2017 = go.Bar(x=[2017, 2017, 2017],
                    y=[df_2017[df_2017['Artist'].str.contains(artists_2017[0,0])]['Duration'].sum(),
                       df_2017[df_2017['Artist'].str.contains(artists_2017[1,0])]['Duration'].sum(), 
                       df_2017[df_2017['Artist'].str.contains(artists_2017[2,0])]['Duration'].sum()],
                    name='2017')
fig.add_trace(data_2017,1,4)

data_2018 = go.Bar(x=[2018, 2018, 2018],
                    y=[df_2018[df_2018['Artist'].str.contains(artists_2018[0,0])]['Duration'].sum(),
                       df_2018[df_2018['Artist'].str.contains(artists_2018[1,0])]['Duration'].sum(), 
                       df_2018[df_2018['Artist'].str.contains(artists_2018[2,0])]['Duration'].sum()],
                    name='2018')
fig.add_trace(data_2018,1,5)

data_2019 = go.Bar(x=[2019, 2019, 2019],
                    y=[df_2019[df_2019['Artist'].str.contains(artists_2019[0,0])]['Duration'].sum(),
                       df_2019[df_2019['Artist'].str.contains(artists_2019[1,0])]['Duration'].sum(), 
                       df_2019[df_2019['Artist'].str.contains(artists_2019[2,0])]['Duration'].sum()],
                    name='2019')
fig.add_trace(data_2019,1,6)

data_2020 = go.Bar(x=[2020, 2020, 2020],
                    y=[df_2020[df_2020['Artist'].str.contains(artists_2020[0,0])]['Duration'].sum(),
                       df_2020[df_2020['Artist'].str.contains(artists_2020[1,0])]['Duration'].sum(), 
                       df_2020[df_2020['Artist'].str.contains(artists_2020[2,0])]['Duration'].sum()],
                    name='2020')
fig.add_trace(data_2020,1,7)

data_2021 = go.Bar(x=[2021, 2021, 2021],
                    y=[df_2021[df_2021['Artist'].str.contains(artists_2021[0,0])]['Duration'].sum(),
                       df_2021[df_2021['Artist'].str.contains(artists_2021[1,0])]['Duration'].sum(), 
                       df_2021[df_2021['Artist'].str.contains(artists_2021[2,0])]['Duration'].sum()],
                    name='2021')
fig.add_trace(data_2021,1,8)

data_2022 = go.Bar(x=[2022, 2022, 2022],
                    y=[df_2022[df_2022['Artist'].str.contains(artists_2022[0,0])]['Duration'].sum(),
                       df_2022[df_2022['Artist'].str.contains(artists_2022[1,0])]['Duration'].sum(), 
                       df_2022[df_2022['Artist'].str.contains(artists_2022[2,0])]['Duration'].sum()],
                    name='2022')
fig.add_trace(data_2022,1,9)

layout = go.Layout(title='Comparison between top ' + str(TOP) + ' artists',
                   xaxis=dict(
                       title='Years'
                    ),
                   yaxis=dict(title='Duration'),
                   barmode='group')

fig['layout'].update(layout)

pyo.plot(fig, filename='comparison_between_top'+str(TOP)+'.html')

ValueError: Invalid property specified for object of type plotly.graph_objs.Bar: 'hover'

Did you mean "base"?

    Valid properties:
        alignmentgroup
            Set several traces linked to the same position axis or
            matching axes to the same alignmentgroup. This controls
            whether bars compute their positional range dependently
            or independently.
        base
            Sets where the bar base is drawn (in position axis
            units). In "stack" or "relative" barmode, traces that
            set "base" will be excluded and drawn in "overlay" mode
            instead.
        basesrc
            Sets the source reference on Chart Studio Cloud for
            `base`.
        cliponaxis
            Determines whether the text nodes are clipped about the
            subplot axes. To show the text nodes above axis lines
            and tick labels, make sure to set `xaxis.layer` and
            `yaxis.layer` to *below traces*.
        constraintext
            Constrain the size of text inside or outside a bar to
            be no larger than the bar itself.
        customdata
            Assigns extra data each datum. This may be useful when
            listening to hover, click and selection events. Note
            that, "scatter" traces also appends customdata items in
            the markers DOM elements
        customdatasrc
            Sets the source reference on Chart Studio Cloud for
            `customdata`.
        dx
            Sets the x coordinate step. See `x0` for more info.
        dy
            Sets the y coordinate step. See `y0` for more info.
        error_x
            :class:`plotly.graph_objects.bar.ErrorX` instance or
            dict with compatible properties
        error_y
            :class:`plotly.graph_objects.bar.ErrorY` instance or
            dict with compatible properties
        hoverinfo
            Determines which trace information appear on hover. If
            `none` or `skip` are set, no information is displayed
            upon hovering. But, if `none` is set, click and hover
            events are still fired.
        hoverinfosrc
            Sets the source reference on Chart Studio Cloud for
            `hoverinfo`.
        hoverlabel
            :class:`plotly.graph_objects.bar.Hoverlabel` instance
            or dict with compatible properties
        hovertemplate
            Template string used for rendering the information that
            appear on hover box. Note that this will override
            `hoverinfo`. Variables are inserted using %{variable},
            for example "y: %{y}" as well as %{xother}, {%_xother},
            {%_xother_}, {%xother_}. When showing info for several
            points, "xother" will be added to those with different
            x positions from the first point. An underscore before
            or after "(x|y)other" will add a space on that side,
            only when this field is shown. Numbers are formatted
            using d3-format's syntax %{variable:d3-format}, for
            example "Price: %{y:$.2f}".
            https://github.com/d3/d3-format/tree/v1.4.5#d3-format
            for details on the formatting syntax. Dates are
            formatted using d3-time-format's syntax
            %{variable|d3-time-format}, for example "Day:
            %{2019-01-01|%A}". https://github.com/d3/d3-time-
            format/tree/v2.2.3#locale_format for details on the
            date formatting syntax. The variables available in
            `hovertemplate` are the ones emitted as event data
            described at this link
            https://plotly.com/javascript/plotlyjs-events/#event-
            data. Additionally, every attributes that can be
            specified per-point (the ones that are `arrayOk: true`)
            are available. variables `value` and `label`. Anything
            contained in tag `<extra>` is displayed in the
            secondary box, for example
            "<extra>{fullData.name}</extra>". To hide the secondary
            box completely, use an empty tag `<extra></extra>`.
        hovertemplatesrc
            Sets the source reference on Chart Studio Cloud for
            `hovertemplate`.
        hovertext
            Sets hover text elements associated with each (x,y)
            pair. If a single string, the same string appears over
            all the data points. If an array of string, the items
            are mapped in order to the this trace's (x,y)
            coordinates. To be seen, trace `hoverinfo` must contain
            a "text" flag.
        hovertextsrc
            Sets the source reference on Chart Studio Cloud for
            `hovertext`.
        ids
            Assigns id labels to each datum. These ids for object
            constancy of data points during animation. Should be an
            array of strings, not numbers or any other type.
        idssrc
            Sets the source reference on Chart Studio Cloud for
            `ids`.
        insidetextanchor
            Determines if texts are kept at center or start/end
            points in `textposition` "inside" mode.
        insidetextfont
            Sets the font used for `text` lying inside the bar.
        legendgroup
            Sets the legend group for this trace. Traces part of
            the same legend group hide/show at the same time when
            toggling legend items.
        legendgrouptitle
            :class:`plotly.graph_objects.bar.Legendgrouptitle`
            instance or dict with compatible properties
        legendrank
            Sets the legend rank for this trace. Items and groups
            with smaller ranks are presented on top/left side while
            with `*reversed* `legend.traceorder` they are on
            bottom/right side. The default legendrank is 1000, so
            that you can use ranks less than 1000 to place certain
            items before all unranked items, and ranks greater than
            1000 to go after all unranked items.
        legendwidth
            Sets the width (in px or fraction) of the legend for
            this trace.
        marker
            :class:`plotly.graph_objects.bar.Marker` instance or
            dict with compatible properties
        meta
            Assigns extra meta information associated with this
            trace that can be used in various text attributes.
            Attributes such as trace `name`, graph, axis and
            colorbar `title.text`, annotation `text`
            `rangeselector`, `updatemenues` and `sliders` `label`
            text all support `meta`. To access the trace `meta`
            values in an attribute in the same trace, simply use
            `%{meta[i]}` where `i` is the index or key of the
            `meta` item in question. To access trace `meta` in
            layout attributes, use `%{data[n[.meta[i]}` where `i`
            is the index or key of the `meta` and `n` is the trace
            index.
        metasrc
            Sets the source reference on Chart Studio Cloud for
            `meta`.
        name
            Sets the trace name. The trace name appear as the
            legend item and on hover.
        offset
            Shifts the position where the bar is drawn (in position
            axis units). In "group" barmode, traces that set
            "offset" will be excluded and drawn in "overlay" mode
            instead.
        offsetgroup
            Set several traces linked to the same position axis or
            matching axes to the same offsetgroup where bars of the
            same position coordinate will line up.
        offsetsrc
            Sets the source reference on Chart Studio Cloud for
            `offset`.
        opacity
            Sets the opacity of the trace.
        orientation
            Sets the orientation of the bars. With "v" ("h"), the
            value of the each bar spans along the vertical
            (horizontal).
        outsidetextfont
            Sets the font used for `text` lying outside the bar.
        selected
            :class:`plotly.graph_objects.bar.Selected` instance or
            dict with compatible properties
        selectedpoints
            Array containing integer indices of selected points.
            Has an effect only for traces that support selections.
            Note that an empty array means an empty selection where
            the `unselected` are turned on for all points, whereas,
            any other non-array values means no selection all where
            the `selected` and `unselected` styles have no effect.
        showlegend
            Determines whether or not an item corresponding to this
            trace is shown in the legend.
        stream
            :class:`plotly.graph_objects.bar.Stream` instance or
            dict with compatible properties
        text
            Sets text elements associated with each (x,y) pair. If
            a single string, the same string appears over all the
            data points. If an array of string, the items are
            mapped in order to the this trace's (x,y) coordinates.
            If trace `hoverinfo` contains a "text" flag and
            "hovertext" is not set, these elements will be seen in
            the hover labels.
        textangle
            Sets the angle of the tick labels with respect to the
            bar. For example, a `tickangle` of -90 draws the tick
            labels vertically. With "auto" the texts may
            automatically be rotated to fit with the maximum size
            in bars.
        textfont
            Sets the font used for `text`.
        textposition
            Specifies the location of the `text`. "inside"
            positions `text` inside, next to the bar end (rotated
            and scaled if needed). "outside" positions `text`
            outside, next to the bar end (scaled if needed), unless
            there is another bar stacked on this one, then the text
            gets pushed inside. "auto" tries to position `text`
            inside the bar, but if the bar is too small and no bar
            is stacked on this one the text is moved outside. If
            "none", no text appears.
        textpositionsrc
            Sets the source reference on Chart Studio Cloud for
            `textposition`.
        textsrc
            Sets the source reference on Chart Studio Cloud for
            `text`.
        texttemplate
            Template string used for rendering the information text
            that appear on points. Note that this will override
            `textinfo`. Variables are inserted using %{variable},
            for example "y: %{y}". Numbers are formatted using
            d3-format's syntax %{variable:d3-format}, for example
            "Price: %{y:$.2f}".
            https://github.com/d3/d3-format/tree/v1.4.5#d3-format
            for details on the formatting syntax. Dates are
            formatted using d3-time-format's syntax
            %{variable|d3-time-format}, for example "Day:
            %{2019-01-01|%A}". https://github.com/d3/d3-time-
            format/tree/v2.2.3#locale_format for details on the
            date formatting syntax. Every attributes that can be
            specified per-point (the ones that are `arrayOk: true`)
            are available. variables `value` and `label`.
        texttemplatesrc
            Sets the source reference on Chart Studio Cloud for
            `texttemplate`.
        uid
            Assign an id to this trace, Use this to provide object
            constancy between traces during animations and
            transitions.
        uirevision
            Controls persistence of some user-driven changes to the
            trace: `constraintrange` in `parcoords` traces, as well
            as some `editable: true` modifications such as `name`
            and `colorbar.title`. Defaults to `layout.uirevision`.
            Note that other user-driven trace attribute changes are
            controlled by `layout` attributes: `trace.visible` is
            controlled by `layout.legend.uirevision`,
            `selectedpoints` is controlled by
            `layout.selectionrevision`, and `colorbar.(x|y)`
            (accessible with `config: {editable: true}`) is
            controlled by `layout.editrevision`. Trace changes are
            tracked by `uid`, which only falls back on trace index
            if no `uid` is provided. So if your app can add/remove
            traces before the end of the `data` array, such that
            the same trace has a different index, you can still
            preserve user-driven changes if you give each trace a
            `uid` that stays with it as it moves.
        unselected
            :class:`plotly.graph_objects.bar.Unselected` instance
            or dict with compatible properties
        visible
            Determines whether or not this trace is visible. If
            "legendonly", the trace is not drawn, but can appear as
            a legend item (provided that the legend itself is
            visible).
        width
            Sets the bar width (in position axis units).
        widthsrc
            Sets the source reference on Chart Studio Cloud for
            `width`.
        x
            Sets the x coordinates.
        x0
            Alternate to `x`. Builds a linear space of x
            coordinates. Use with `dx` where `x0` is the starting
            coordinate and `dx` the step.
        xaxis
            Sets a reference between this trace's x coordinates and
            a 2D cartesian x axis. If "x" (the default value), the
            x coordinates refer to `layout.xaxis`. If "x2", the x
            coordinates refer to `layout.xaxis2`, and so on.
        xcalendar
            Sets the calendar system to use with `x` date data.
        xhoverformat
            Sets the hover text formatting rulefor `x`  using d3
            formatting mini-languages which are very similar to
            those in Python. For numbers, see:
            https://github.com/d3/d3-format/tree/v1.4.5#d3-format.
            And for dates see: https://github.com/d3/d3-time-
            format/tree/v2.2.3#locale_format. We add two items to
            d3's date formatter: "%h" for half of the year as a
            decimal number as well as "%{n}f" for fractional
            seconds with n digits. For example, *2016-10-13
            09:15:23.456* with tickformat "%H~%M~%S.%2f" would
            display *09~15~23.46*By default the values are
            formatted using `xaxis.hoverformat`.
        xperiod
            Only relevant when the axis `type` is "date". Sets the
            period positioning in milliseconds or "M<n>" on the x
            axis. Special values in the form of "M<n>" could be
            used to declare the number of months. In this case `n`
            must be a positive integer.
        xperiod0
            Only relevant when the axis `type` is "date". Sets the
            base for period positioning in milliseconds or date
            string on the x0 axis. When `x0period` is round number
            of weeks, the `x0period0` by default would be on a
            Sunday i.e. 2000-01-02, otherwise it would be at
            2000-01-01.
        xperiodalignment
            Only relevant when the axis `type` is "date". Sets the
            alignment of data points on the x axis.
        xsrc
            Sets the source reference on Chart Studio Cloud for
            `x`.
        y
            Sets the y coordinates.
        y0
            Alternate to `y`. Builds a linear space of y
            coordinates. Use with `dy` where `y0` is the starting
            coordinate and `dy` the step.
        yaxis
            Sets a reference between this trace's y coordinates and
            a 2D cartesian y axis. If "y" (the default value), the
            y coordinates refer to `layout.yaxis`. If "y2", the y
            coordinates refer to `layout.yaxis2`, and so on.
        ycalendar
            Sets the calendar system to use with `y` date data.
        yhoverformat
            Sets the hover text formatting rulefor `y`  using d3
            formatting mini-languages which are very similar to
            those in Python. For numbers, see:
            https://github.com/d3/d3-format/tree/v1.4.5#d3-format.
            And for dates see: https://github.com/d3/d3-time-
            format/tree/v2.2.3#locale_format. We add two items to
            d3's date formatter: "%h" for half of the year as a
            decimal number as well as "%{n}f" for fractional
            seconds with n digits. For example, *2016-10-13
            09:15:23.456* with tickformat "%H~%M~%S.%2f" would
            display *09~15~23.46*By default the values are
            formatted using `yaxis.hoverformat`.
        yperiod
            Only relevant when the axis `type` is "date". Sets the
            period positioning in milliseconds or "M<n>" on the y
            axis. Special values in the form of "M<n>" could be
            used to declare the number of months. In this case `n`
            must be a positive integer.
        yperiod0
            Only relevant when the axis `type` is "date". Sets the
            base for period positioning in milliseconds or date
            string on the y0 axis. When `y0period` is round number
            of weeks, the `y0period0` by default would be on a
            Sunday i.e. 2000-01-02, otherwise it would be at
            2000-01-01.
        yperiodalignment
            Only relevant when the axis `type` is "date". Sets the
            alignment of data points on the y axis.
        ysrc
            Sets the source reference on Chart Studio Cloud for
            `y`.
        
Did you mean "base"?

Bad property path:
hover_name
^^^^^