### Import packages

In [1]:
import pandas as pd

import plotly.io as pio
import plotly.graph_objects as go
from plotly.subplots import make_subplots

pio.renderers.default = 'notebook_connected'    # other options include 'browser'
pio.templates.default = "plotly_dark"

### Import data
- library data from `YourLibrary.json`
- streaming history from `StreamingHistory{i}.json`

only contains data from late 2020 to early December 2023

In [2]:
# read library
your_library = pd.read_json('YourLibrary.json', orient='index') # Index(['tracks', 'albums', 'shows', 'episodes', 'bannedTracks', 'artists', 'bannedArtists', 'other'])
your_library

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,2247,2248,2249,2250,2251,2252,2253,2254,2255,2256
tracks,"{'artist': 'Denise Ho', 'album': '梁祝下世傳奇', 'tr...","{'artist': 'MY FIRST STORY', 'album': '最終回STOR...","{'artist': 'SID', 'album': 'Renai', 'track': '...","{'artist': 'Yoko Oginome', 'album': 'ダンシング・ヒーロ...","{'artist': 'SECHSKIES', 'album': 'Special', 't...","{'artist': 'Leo Ieiri', 'album': 'Sabrina -10t...","{'artist': 'FERN PLANET', 'album': 'Soldier Gi...","{'artist': 'SCANDAL', 'album': 'BABY ACTION', ...","{'artist': 'chilldspot', 'album': 'Around Dusk...","{'artist': 'Flower', 'album': 'J-Pop Diva', 't...",...,"{'artist': 'Hump Back', 'album': 'また会う日まで', 't...","{'artist': 'Acid Black Cherry', 'album': 'Acid...","{'artist': 'SiM', 'album': 'i AGAINST i', 'tra...","{'artist': 'Aimer', 'album': 'Penny Rain', 'tr...","{'artist': 'The Low Mays', 'album': '人生贏家 LIFE...","{'artist': 'VIVIZ', 'album': '<Queendom2> Part...","{'artist': 'Maniac', 'album': '仍有心跳脈搏', 'track...","{'artist': 'SCANDAL', 'album': 'Kiss from the ...","{'artist': 'SCANDAL', 'album': 'BABY ACTION', ...","{'artist': 'IU', 'album': 'Love poem', 'track'..."
albums,"{'artist': 'Rammstein', 'album': 'Rammstein', ...","{'artist': 'Stereopony', 'album': 'BEST of STE...","{'artist': 'SILENT SIREN', 'album': 'サイサイ', 'u...","{'artist': 'SCANDAL', 'album': 'HELLO WORLD', ...","{'artist': 'SCANDAL', 'album': 'BEST★SCANDAL',...","{'artist': 'AKB48', 'album': '1830m', 'uri': '...","{'artist': 'SCANDAL', 'album': 'TEMPTATION　BOX...","{'artist': 'AKAIKO-EN', 'album': 'The Park', '...","{'artist': 'Apink', 'album': 'HORN', 'uri': 's...","{'artist': 'Acid Black Cherry', 'album': 'Acid...",...,,,,,,,,,,
shows,"{'name': '9号酒馆', 'publisher': '9号酒馆', 'uri': '...","{'name': 'Allgemein gebildet ', 'publisher': '...","{'name': 'Auf Deutsch gesagt!', 'publisher': '...","{'name': 'Auf den Punkt', 'publisher': 'Süddeu...","{'name': 'DW Langsam Gesprochene Nachrichten',...","{'name': 'Deutsch Denken: Learn German Easy', ...","{'name': 'Deutsch Podcast - Deutsch lernen', '...",{'name': 'Deutsche im Alltag – Alltagsdeutsch ...,{'name': 'FOMO – Was habe ich heute verpasst?'...,"{'name': 'Fest & Flauschig', 'publisher': 'Jan...",...,,,,,,,,,,
episodes,{'name': '12: 3 Hacks For Rapid Reading (How T...,{'name': 'EP3 / 其實我是個自私的人 | 不知不覺成為了自己不想成為的那個樣子...,,,,,,,,,...,,,,,,,,,,
bannedTracks,,,,,,,,,,,...,,,,,,,,,,
artists,"{'name': '(G)I-DLE', 'uri': 'spotify:artist:2A...","{'name': '015B', 'uri': 'spotify:artist:4uU7Kf...","{'name': '3rd Line Butterfly', 'uri': 'spotify...","{'name': 'A9', 'uri': 'spotify:artist:6CY2I2M5...","{'name': 'AKAIKO-EN', 'uri': 'spotify:artist:5...","{'name': 'AKB48', 'uri': 'spotify:artist:01wau...","{'name': 'ASIAN KUNG-FU GENERATION', 'uri': 's...","{'name': 'Acid Black Cherry', 'uri': 'spotify:...","{'name': 'Aimer', 'uri': 'spotify:artist:0bAsR...","{'name': 'Aimyon', 'uri': 'spotify:artist:5kVZ...",...,,,,,,,,,,
bannedArtists,,,,,,,,,,,...,,,,,,,,,,
other,,,,,,,,,,,...,,,,,,,,,,


In [3]:
# select only 'tracks' (liked songs)
liked_songs_json = your_library.loc['tracks'].to_json(orient='index')   # json
liked_songs = pd.read_json(liked_songs_json, orient='index')            # json to dataframe

liked_songs

Unnamed: 0,artist,album,track,uri
0,Denise Ho,梁祝下世傳奇,勞斯.萊斯,spotify:track:37JJeZrHX3fsk2sdCkdiMN
1,MY FIRST STORY,最終回STORY,最終回STORY,spotify:track:5rBnunsAR3wS0XM5d493O3
2,SID,Renai,Ao,spotify:track:5yKPeEsX4AgCjbQNL1ZZDi
3,Yoko Oginome,ダンシング・ヒーロー -ALL EAT YOU UP-,ダンシング・ヒーロー(Eat You Up),spotify:track:4uedOJajNogY115gktPcvI
4,SECHSKIES,Special,Couple,spotify:track:3VuQoOFwJdcQ5IOamXlWZa
...,...,...,...,...
2252,VIVIZ,<Queendom2> Part.1-1,Time for the glory,spotify:track:0VCWeQuUFTz2MkivIbJVXo
2253,Maniac,仍有心跳脈搏,仍有心跳脈搏,spotify:track:2P2RU46sJ0P6IkUN86t8Ta
2254,SCANDAL,Kiss from the darkness,Tsuki,spotify:track:3ty4W79emz7BBUj4pgDDdQ
2255,SCANDAL,BABY ACTION,GLAMOROUS YOU,spotify:track:3O8NBVZyw3BTJ8l04S6VUD


In [4]:
# dict of dataframes
streaming_dict = {}

# read streaming history by parts
parts = 9   # change this to the number of parts you have

for i in range(parts):
    streaming_dict[f'df_{i}'] = pd.read_json(f"StreamingHistory{i}.json")

# combine dataframes
streaming_history = pd.concat(streaming_dict.values(), ignore_index=True)
print(f'Total: {len(streaming_history)}')

# remove duplicates
streaming_history.drop_duplicates(inplace=True)
print(f'Remove duplicates: {len(streaming_history)}')

# remove entries less than 30 seconds (spotify only counts streams longer than 30 seconds)
streaming_history = streaming_history[streaming_history['msPlayed'] > 30000]
print(f'Remove entries less than 30 seconds: {len(streaming_history)}')

# convert 'endTime' column from str to datetime
streaming_history['endTime'] = pd.to_datetime(streaming_history['endTime'], format = '%Y-%m-%d %H:%M')

streaming_history

Total: 75473
Remove duplicates: 73547
Remove entries less than 30 seconds: 61052


Unnamed: 0,endTime,artistName,trackName,msPlayed
0,2020-12-24 21:05:00,IU,Dear Name,142145
1,2020-12-26 00:02:00,Royal Philharmonic Orchestra,"Part Of Your World - From ""The Little Mermaid""",252702
2,2020-12-26 00:06:00,Kaori Muraji,"Merry Go Round of Life (Arr. Koseki) - From ""H...",242935
3,2020-12-26 00:12:00,Yoko Kanno,Hana wa Saku,209247
5,2020-12-26 16:00:00,Ryuichi Sakamoto,Merry Christmas Mr. Lawrence,329212
...,...,...,...,...
75439,2023-12-01 16:42:00,Kay Tse,港女的幸福星期日,194746
75469,2023-12-01 23:51:00,Yerin Baek,Bye Bye My Blue,203409
75470,2023-12-01 23:53:00,데이먼스 이어 Damons year,josee!,123913
75471,2023-12-01 23:56:00,10cm,My Eyes,157000


In [5]:
# filter dataframe by year
df_2021 = streaming_history[streaming_history['endTime'].dt.year == 2021]
df_2022 = streaming_history[streaming_history['endTime'].dt.year == 2022]
df_2023 = streaming_history[streaming_history['endTime'].dt.year == 2023]

df_2023

Unnamed: 0,endTime,artistName,trackName,msPlayed
51535,2023-01-01 01:14:00,SECHSKIES,Couple,250066
51536,2023-01-01 01:18:00,Yuinishio,Tasty Tasks,247733
51537,2023-01-01 01:23:00,Hitsujibungaku,夜を越えて,309093
51538,2023-01-01 01:27:00,Leo Ieiri,Pain,232537
51539,2023-01-01 01:31:00,Apink,Dumhdurum,208053
...,...,...,...,...
75439,2023-12-01 16:42:00,Kay Tse,港女的幸福星期日,194746
75469,2023-12-01 23:51:00,Yerin Baek,Bye Bye My Blue,203409
75470,2023-12-01 23:53:00,데이먼스 이어 Damons year,josee!,123913
75471,2023-12-01 23:56:00,10cm,My Eyes,157000


### Compare top artists and tracks by *frequency* and *minutes*
- *frequency* seems to be the more sensible metric

In [6]:
def get_top_artists_tracks_frequency(df_year):
    # create empty dataframe to contain all the information
    df = pd.DataFrame(columns=['Favourite artists (up to Nov)', 'Favourite artists (up to Dec)', 'Favourite tracks (up to Nov)', 'Favourite tracks (up to Dec)'])

    # number of top artists/tracks to get
    top_num = 10

    # filter by time
    streaming_history_dec = df_year # up to Dec
    streaming_history_nov = streaming_history_dec[streaming_history_dec['endTime'].dt.month < 12]   # up to Nov
    #############################################################################
    # get top artists
    top_artists_nov = streaming_history_nov['artistName'].value_counts().head(top_num)  # up to Nov
    top_artists_dec = streaming_history_dec['artistName'].value_counts().head(top_num)  # up to Dec

    # get top tracks
    top_tracks_nov = streaming_history_nov['trackName'].value_counts().head(top_num)    # up to Nov
    top_tracks_dec = streaming_history_dec['trackName'].value_counts().head(top_num)    # up to Dec
    #############################################################################
    # add to dataframe
    df['Favourite artists (up to Nov)'] = [(top_artists_nov.index[i], round(top_artists_nov[i])) for i in range(top_num)]
    df['Favourite artists (up to Dec)'] = [(top_artists_dec.index[i], round(top_artists_dec[i])) for i in range(top_num)]
    df['Favourite tracks (up to Nov)'] = [(top_tracks_nov.index[i], round(top_tracks_nov[i])) for i in range(top_num)]
    df['Favourite tracks (up to Dec)'] = [(top_tracks_dec.index[i], round(top_tracks_dec[i])) for i in range(top_num)]

    return df


def get_top_artists_tracks_minutes(df_year):
    # create empty dataframe to contain all the information
    df = pd.DataFrame(columns=['Favourite artists (up to Nov)', 'Favourite artists (up to Dec)', 'Favourite tracks (up to Nov)', 'Favourite tracks (up to Dec)'])

    # number of top artists/tracks to get
    top_num = 10

    # filter by time
    streaming_history_dec = df_year # up to Dec
    streaming_history_nov = streaming_history_dec[streaming_history_dec['endTime'].dt.month < 12]   # up to Nov
    #############################################################################
    # count minutes per artist
    minutes_per_artist_nov = streaming_history_nov.groupby('artistName')['msPlayed'].sum() / 60000  # up to Nov
    minutes_per_artist_dec = streaming_history_dec.groupby('artistName')['msPlayed'].sum() / 60000  # up to Dec

    # sort by minutes
    minutes_per_artist_nov.sort_values(ascending=False, inplace=True)
    minutes_per_artist_dec.sort_values(ascending=False, inplace=True)

    # get top artists
    top_artists_nov = minutes_per_artist_nov.head(top_num)
    top_artists_dec = minutes_per_artist_dec.head(top_num)
    #############################################################################
    # count minutes per track
    minutes_per_track_nov = streaming_history_nov.groupby('trackName')['msPlayed'].sum() / 60000    # up to Nov
    minutes_per_track_dec = streaming_history_dec.groupby('trackName')['msPlayed'].sum() / 60000    # up to Dec

    # sort by minutes
    minutes_per_track_nov.sort_values(ascending=False, inplace=True)
    minutes_per_track_dec.sort_values(ascending=False, inplace=True)

    # get top tracks
    top_tracks_nov = minutes_per_track_nov.head(top_num)
    top_tracks_dec = minutes_per_track_dec.head(top_num)
    #############################################################################
    # add to dataframe
    df['Favourite artists (up to Nov)'] = [(top_artists_nov.index[i], round(top_artists_nov[i])) for i in range(top_num)]
    df['Favourite artists (up to Dec)'] = [(top_artists_dec.index[i], round(top_artists_dec[i])) for i in range(top_num)]
    df['Favourite tracks (up to Nov)'] = [(top_tracks_nov.index[i], round(top_tracks_nov[i])) for i in range(top_num)]
    df['Favourite tracks (up to Dec)'] = [(top_tracks_dec.index[i], round(top_tracks_dec[i])) for i in range(top_num)]

    return df

In [7]:
top_artists_tracks_freq_2021 = get_top_artists_tracks_frequency(df_2021)
top_artists_tracks_freq_2022 = get_top_artists_tracks_frequency(df_2022)
top_artists_tracks_freq_2023 = get_top_artists_tracks_frequency(df_2023)

top_artists_tracks_min_2021 = get_top_artists_tracks_minutes(df_2021)
top_artists_tracks_min_2022 = get_top_artists_tracks_minutes(df_2022)
top_artists_tracks_min_2023 = get_top_artists_tracks_minutes(df_2023)

In [8]:
# frequency
top_artists_tracks_freq_2023

Unnamed: 0,Favourite artists (up to Nov),Favourite artists (up to Dec),Favourite tracks (up to Nov),Favourite tracks (up to Dec)
0,"(SCANDAL, 648)","(SCANDAL, 653)","(FEARLESS, 50)","(FEARLESS, 50)"
1,"(TWICE, 327)","(TWICE, 328)","(LOVE DIVE, 48)","(LOVE DIVE, 48)"
2,"(my little airport, 305)","(my little airport, 308)","(Attention, 48)","(Attention, 48)"
3,"(IVE, 225)","(IVE, 226)","(After LIKE, 48)","(After LIKE, 48)"
4,"(NewJeans, 224)","(NewJeans, 224)","(Dreams Come True, 46)","(Dreams Come True, 46)"
5,"(Mr., 215)","(Mr., 218)","(Talk that Talk, 46)","(Talk that Talk, 46)"
6,"(aespa, 215)","(BLACKPINK, 217)","(ANTIFRAGILE, 45)","(ANTIFRAGILE, 45)"
7,"(BLACKPINK, 215)","(aespa, 215)","(ELEVEN, 44)","(ELEVEN, 44)"
8,"(Dear Jane, 204)","(Dear Jane, 209)","(Hype Boy, 44)","(Hype Boy, 44)"
9,"(Hitsujibungaku, 199)","(Hitsujibungaku, 200)","(Ditto, 42)","(Ditto, 42)"


In [9]:
# minutes
top_artists_tracks_min_2023

Unnamed: 0,Favourite artists (up to Nov),Favourite artists (up to Dec),Favourite tracks (up to Nov),Favourite tracks (up to Dec)
0,"(SCANDAL, 2610)","(SCANDAL, 2627)","(Dreams Come True, 162)","(Dreams Come True, 162)"
1,"(TWICE, 1053)","(TWICE, 1057)","(Girls, 150)","(Girls, 150)"
2,"(Mr., 922)","(Mr., 935)","(Attention, 144)","(Attention, 144)"
3,"(Hitsujibungaku, 844)","(Dear Jane, 859)","(After LIKE, 142)","(After LIKE, 142)"
4,"(Dear Jane, 837)","(Hitsujibungaku, 848)","(LOVE DIVE, 140)","(LOVE DIVE, 140)"
5,"(my little airport, 823)","(my little airport, 832)","(FEARLESS, 136)","(FEARLESS, 136)"
6,"(AKB48, 819)","(AKB48, 819)","(Talk that Talk, 136)","(Talk that Talk, 136)"
7,"(SID, 774)","(SID, 775)","(ANTIFRAGILE, 136)","(ANTIFRAGILE, 136)"
8,"(aespa, 738)","(aespa, 738)","(ELEVEN, 131)","(ELEVEN, 131)"
9,"(SILENT SIREN, 719)","(SILENT SIREN, 723)","(Kitsch, 130)","(Kitsch, 130)"


### Get top artists in the library
- based on *frequency*

In [10]:
# filter out top artists and its corresponding counts
artist_count = 30
top_artists = liked_songs['artist'].value_counts().head(artist_count)

# total number of liked songs
total_songs = len(liked_songs)

# add an entry 'Others' to top_artists
top_artists['Others'] = total_songs - top_artists.sum()

top_artists

SCANDAL                      137
AKB48                         54
SID                           51
[Alexandros]                  43
Mr.                           42
my little airport             41
Apink                         40
the GazettE                   39
Dear Jane                     35
Leo Ieiri                     35
SILENT SIREN                  34
Aimyon                        30
ONE OK ROCK                   30
TWICE                         28
TAEYEON                       25
ASIAN KUNG-FU GENERATION      23
Hitsujibungaku                19
Eason Chan                    19
BLACKPINK                     18
YUI                           17
Endy Chow                     17
IU                            17
SUPER BEAVER                  16
THE ORAL CIGARETTES           15
L'Arc-en-Ciel                 15
LiSA                          15
Supper Moment                 15
back number                   15
Yerin Baek                    14
Dreamcatcher                  14
Others    

### Get time played each year

In [11]:
def get_time_played(df):
    # dataframe containing time played per month
    time_df = df.groupby(df['endTime'].dt.month)['msPlayed'].sum() / 60000    # convert ms to minutes

    # total time played in that year
    total_time = time_df.sum()

    return time_df, total_time

In [12]:
time_df_2021, total_time_2021 = get_time_played(df_2021)
time_df_2022, total_time_2022 = get_time_played(df_2022)
time_df_2023, total_time_2023 = get_time_played(df_2023)

print(f'Total time played in 2021: {total_time_2021} minutes')
print(f'Total time played in 2022: {total_time_2022} minutes')
print(f'Total time played in 2023: {total_time_2023} minutes')

Total time played in 2021: 70989.4234 minutes
Total time played in 2022: 80059.65818333332 minutes
Total time played in 2023: 74123.9725 minutes


### Get favourite artists and tracks each year
- based on *frequency*

In [13]:
def get_favourite_artists(df):
    # get the top 10 artists by name
    favourite_artists = df['artistName'].value_counts().head(10)
    favourite_artist_names = favourite_artists.index.tolist()

    # for each artist, get the number of times it was played in each month, store the cumulative count

    # create dataframe with month as the first column
    artist_count = pd.DataFrame({'Month': range(1, 13)}, index=range(1, 13))

    for artist in favourite_artist_names:
        # filter df for the current artist
        artist_df = df[df['artistName'] == artist]
                    
        # group by month and calculate the cumulative count
        artist_count[artist] = artist_df.groupby(df['endTime'].dt.month)['endTime'].count().cumsum()

        # check if there are missing months, in case that artist was not played in that month
        # fill the NaN values with previous month's count
        artist_count = artist_count.fillna(method='ffill')

        # if there are still NaN values, it should be in the first month
        # fill the NaN values with 0
        artist_count = artist_count.fillna(0)

        # convert counts to integers
        artist_count = artist_count.astype(int)

    return artist_count

def get_favourite_tracks(df):
    # get the top 10 tracks by name
    favourite_tracks = df['trackName'].value_counts().head(10)
    favourite_track_names = favourite_tracks.index.tolist()

    # for each track, get the number of times it was played in each month, store the cumulative count

    # create dataframe with month as the first column
    track_count = pd.DataFrame({'Month': range(1, 13)}, index=range(1, 13))

    for track in favourite_track_names:
        # filter df for the current track
        track_df = df[df['trackName'] == track]
                    
        # group by month and calculate the cumulative count
        track_count[track] = track_df.groupby(df['endTime'].dt.month)['endTime'].count().cumsum()

        # check if there are missing months, in case that track was not played in that month
        # fill the NaN values with previous month's count
        track_count = track_count.fillna(method='ffill')

        # if there are still NaN values, it should be in the first month
        # fill the NaN values with 0
        track_count = track_count.fillna(0)

        # convert counts to integers
        track_count = track_count.astype(int)

    return track_count

In [14]:
artist_count_2021 = get_favourite_artists(df_2021)
artist_count_2022 = get_favourite_artists(df_2022)
artist_count_2023 = get_favourite_artists(df_2023)

track_count_2021 = get_favourite_tracks(df_2021)
track_count_2022 = get_favourite_tracks(df_2022)
track_count_2023 = get_favourite_tracks(df_2023)

In [15]:
artist_count_2023

Unnamed: 0,Month,SCANDAL,TWICE,my little airport,IVE,NewJeans,Mr.,BLACKPINK,aespa,Dear Jane,Hitsujibungaku
1,1,49,21,14,17,31,13,13,9,10,29
2,2,80,35,24,21,36,26,22,12,12,38
3,3,130,66,42,38,63,43,49,23,33,60
4,4,169,88,67,70,93,63,73,43,45,73
5,5,267,153,88,122,134,96,117,88,61,100
6,6,331,196,115,159,167,118,147,124,77,121
7,7,393,228,165,178,181,139,167,147,107,140
8,8,442,272,215,204,211,157,190,178,119,160
9,9,505,306,241,221,222,174,204,207,142,173
10,10,571,317,259,222,223,202,210,212,175,194


### Plot everything together (without animation)
- Top artists in library
- Time played
- Favourite artists
- Favourite songs

In [16]:
# without animation
#############################################################################
# define ranges
total_months = len(artist_count_2021)
total_artists = len(artist_count_2021.columns[1:])
total_tracks = len(track_count_2021.columns[1:])
#############################################################################
# subplots titles
# (1,1): Top artists in library
title_1_1 = f"Top 30 artists (out of {total_songs} songs in library)"
# (2,1): Time played
title_2_1 = "Time played"
# (1,2): Favourite artists
title_1_2 = "Favourite artists"
# (2,2): Favourite songs
title_2_2 = "Favourite songs"
# Use subplots to make a 2x2 plot
fig = make_subplots(rows=2, cols=2, specs=[[{'type': 'domain'}, {'type': 'xy'}], 
                                           [{'type': 'xy'}, {'type': 'xy'}]],
                    subplot_titles=(title_1_1, title_1_2, title_2_1, title_2_2))

# Overall title
fig.update_layout(title ={'text': "My Spotify Wrapped",
                          'font': {'size': 25},
                          'x': 0.45})
#############################################################################
# Add traces

# (1,1): Top artists in library
fig.add_trace(go.Pie(labels=top_artists.index, values=top_artists.values), row=1, col=1)
# hide legend for this pie chart, only show legend for artists/songs
fig.update_traces(textposition='inside', textinfo='percent+label', showlegend=False, row=1, col=1)
#############################################################################
# (2,1): Time played
# 2021
fig.add_scatter(x = time_df_2021.index, y = time_df_2021.values, line=dict(color="#1DB954"), row=2, col=1)
fig.add_annotation(x=time_df_2021.index[9], y=time_df_2021.values[9], text=f"2021 total: {total_time_2021:.0f}", font=dict(color="#1DB954"), arrowcolor = '#1DB954', row = 2, col = 1)

# 2022
fig.add_scatter(x = time_df_2022.index, y = time_df_2022.values, line=dict(color="#104F89"), row=2, col=1)
fig.add_annotation(x=time_df_2022.index[2], y=time_df_2022.values[2], text=f"2022 total: {total_time_2022:.0f}", font=dict(color="#104F89"), arrowcolor = '#104F89', row = 2, col = 1)

# 2023 (only plot up to Nov due to lack of data in Dec)
fig.add_scatter(x = time_df_2023[:-1].index, y = time_df_2023[:-1].values, line=dict(color="#445AE9"), row=2, col=1)
fig.add_annotation(x=time_df_2023.index[7], y=time_df_2023.values[7], text=f"2023 total: {total_time_2023:.0f}", font=dict(color="#445AE9"), arrowcolor = '#445AE9', row = 2, col = 1)

# hide legend for this line chart, only show legend for artists/songs
fig.update_traces(showlegend=False, row=2, col=1)

fig.update_xaxes(title_text = "Month", dtick = 1, row=2, col=1)
fig.update_yaxes(title_text = "Minutes", row=2, col=1)
#############################################################################
# (1,2): Favourite artists
# 2021
for artist in artist_count_2021.columns[1:]:
    fig.add_trace(
        go.Scatter(x=artist_count_2021['Month'],
                   y=artist_count_2021[artist],
                   name=artist,
                   legendgroup='1',
                   visible=True,
                   line=dict(dash="solid")),
                   row=1, col=2)

# 2022
for artist in artist_count_2022.columns[1:]:
    fig.add_trace(
        go.Scatter(x=artist_count_2022['Month'],
                   y=artist_count_2022[artist],
                   name=artist,
                   legendgroup='1',
                   visible=False,
                   line=dict(dash="solid")),
                   row=1, col=2)
    
# 2023
for artist in artist_count_2023.columns[1:]:
    fig.add_trace(
        go.Scatter(x=artist_count_2023['Month'],
                   y=artist_count_2023[artist],
                   name=artist,
                   legendgroup='1',
                   visible=False,
                   line=dict(dash="solid")),
                   row=1, col=2)

fig.update_xaxes(title_text = "Month", dtick = 1, row=1, col=2)
fig.update_yaxes(title_text = "Frequency", row=1, col=2)
#############################################################################
# (2,2): Favourite songs
# 2021
for track in track_count_2021.columns[1:]:
    fig.add_trace(
        go.Scatter(x=track_count_2021['Month'],
                   y=track_count_2021[track],
                   name=track,
                   legendgroup='2',
                   visible=True,
                   line=dict(dash="solid")),
                   row=2, col=2)

# 2022
for track in track_count_2022.columns[1:]:
    fig.add_trace(
        go.Scatter(x=track_count_2022['Month'],
                   y=track_count_2022[track],
                   name=track,
                   legendgroup='2',
                   visible=False,
                   line=dict(dash="solid")),
                   row=2, col=2)

# 2023
for track in track_count_2023.columns[1:]:
    fig.add_trace(
        go.Scatter(x=track_count_2023['Month'],
                   y=track_count_2023[track],
                   name=track,
                   legendgroup='2',
                   visible=False,
                   line=dict(dash="solid")),
                   row=2, col=2)

fig.update_xaxes(title_text = "Month", dtick = 1, row=2, col=2)
fig.update_yaxes(title_text = "Frequency", row=2, col=2)
#############################################################################
# button settings

# button coordinates
button_x = 1.2
button_y = 1.0

# visible traces: control which plots are visible; use this to show plots for a specific year
# total number of traces: top artists in library (1) + time played (3 years) + favourite artists (10 x 3 years) + favourite songs (10 x 3 years) = 64 traces
total_traces = 64

# initialise all traces to be invisible, except for top artists in library (1) and time played (3 years) which are always visible
visible_all = [True] * (1 + 3) + [False] * (30 + 30)

# turn on the traces for each year
visible_2021 = visible_all[:]
visible_2021[4:14] = [True] * 10
visible_2021[34:44] = [True] * 10

visible_2022 = visible_all[:]
visible_2022[14:24] = [True] * 10
visible_2022[44:54] = [True] * 10

visible_2023 = visible_all[:]
visible_2023[24:34] = [True] * 10
visible_2023[54:64] = [True] * 10
#############################################################################
fig.update_layout(
    updatemenus=[
        dict(
            x = button_x,
            y = button_y,
            type="buttons",
            direction="down",
            showactive=False,
            buttons=list([
                dict(label="2021",
                    method="update",
                    args=[{"visible": visible_2021},    # set visible traces for 2021
                          {"showlegend": True}]),
                dict(label="2022",
                    method="update",
                    args=[{"visible": visible_2022},    # set visible traces for 2022
                          {"showlegend": True}]),
                dict(label="2023",
                    method="update",
                    args=[{"visible": visible_2023},    # set visible traces for 2023
                          {"showlegend": True}]),
            ]))])
#############################################################################
# plotly does not support individual legend for each subplot... 
# so we are using this gap between the legends to pretend we have 2 separate legends...
fig.update_layout(legend_tracegroupgap = 250)

fig.write_html("My Spotify Wrapped_notanimated.html", include_plotlyjs = 'cdn', auto_play = False)

fig.show()

### Plot everything together (with animation)
- Top artists in library
- Time played
- Favourite artists
- Favourite songs

In [17]:
# with animation
#############################################################################
# define ranges
total_months = len(artist_count_2021)
total_artists = len(artist_count_2021.columns[1:])
total_tracks = len(track_count_2021.columns[1:])
#############################################################################
# subplots titles
# (1,1): Top artists in library
title_1_1 = f"Top 30 artists (out of {total_songs} songs in library)"
# (2,1): Time played
title_2_1 = "Time played"
# (1,2): Favourite artists
title_1_2 = "Favourite artists"
# (2,2): Favourite songs
title_2_2 = "Favourite songs"
# Use subplots to make a 2x2 plot
fig = make_subplots(rows=2, cols=2, specs=[[{'type': 'domain'}, {'type': 'xy'}], 
                                           [{'type': 'xy'}, {'type': 'xy'}]],
                    subplot_titles=(title_1_1, title_1_2, title_2_1, title_2_2))

# Overall title
fig.update_layout(title ={'text': "My Spotify Wrapped",
                          'font': {'size': 25},
                          'x': 0.45})
#############################################################################
# Add traces

# (1,1): Top artists in library
fig.add_trace(go.Pie(labels=top_artists.index, values=top_artists.values), row=1, col=1)
# hide legend for this pie chart, only show legend for artists/songs
fig.update_traces(textposition='inside', textinfo='percent+label', showlegend=False, row=1, col=1)
#############################################################################
# (2,1): Time played
# 2021
fig.add_scatter(x = time_df_2021.index, y = time_df_2021.values, line=dict(color="#1DB954"), row=2, col=1)
fig.add_annotation(x=time_df_2021.index[9], y=time_df_2021.values[9], text=f"2021 total: {total_time_2021:.0f}", font=dict(color="#1DB954"), arrowcolor = '#1DB954', row = 2, col = 1)

# 2022
fig.add_scatter(x = time_df_2022.index, y = time_df_2022.values, line=dict(color="#104F89"), row=2, col=1)
fig.add_annotation(x=time_df_2022.index[2], y=time_df_2022.values[2], text=f"2022 total: {total_time_2022:.0f}", font=dict(color="#104F89"), arrowcolor = '#104F89', row = 2, col = 1)

# 2023 (only plot up to Nov due to lack of data in Dec)
fig.add_scatter(x = time_df_2023[:-1].index, y = time_df_2023[:-1].values, line=dict(color="#445AE9"), row=2, col=1)
fig.add_annotation(x=time_df_2023.index[7], y=time_df_2023.values[7], text=f"2023 total: {total_time_2023:.0f}", font=dict(color="#445AE9"), arrowcolor = '#445AE9', row = 2, col = 1)

# hide legend for this line chart, only show legend for artists/songs
fig.update_traces(showlegend=False, row=2, col=1)

fig.update_xaxes(title_text = "Month", dtick = 1, row=2, col=1)
fig.update_yaxes(title_text = "Minutes", row=2, col=1)
#############################################################################
# (1,2): Favourite artists
# 2021
for artist in artist_count_2021.columns[1:]:
    fig.add_trace(
        go.Scatter(x=artist_count_2021['Month'],
                   y=artist_count_2021[artist],
                   name=artist,
                   legendgroup='1',
                   visible=True,
                   line=dict(dash="solid")),
                   row=1, col=2)

# 2022
for artist in artist_count_2022.columns[1:]:
    fig.add_trace(
        go.Scatter(x=artist_count_2022['Month'],
                   y=artist_count_2022[artist],
                   name=artist,
                   legendgroup='1',
                   visible=False,
                   line=dict(dash="solid")),
                   row=1, col=2)
    
# 2023
for artist in artist_count_2023.columns[1:]:
    fig.add_trace(
        go.Scatter(x=artist_count_2023['Month'],
                   y=artist_count_2023[artist],
                   name=artist,
                   legendgroup='1',
                   visible=False,
                   line=dict(dash="solid")),
                   row=1, col=2)

fig.update_xaxes(title_text = "Month", dtick = 1, row=1, col=2)
fig.update_yaxes(title_text = "Frequency", row=1, col=2)
#############################################################################
# (2,2): Favourite songs
# 2021
for track in track_count_2021.columns[1:]:
    fig.add_trace(
        go.Scatter(x=track_count_2021['Month'],
                   y=track_count_2021[track],
                   name=track,
                   legendgroup='2',
                   visible=True,
                   line=dict(dash="solid")),
                   row=2, col=2)

# 2022
for track in track_count_2022.columns[1:]:
    fig.add_trace(
        go.Scatter(x=track_count_2022['Month'],
                   y=track_count_2022[track],
                   name=track,
                   legendgroup='2',
                   visible=False,
                   line=dict(dash="solid")),
                   row=2, col=2)

# 2023
for track in track_count_2023.columns[1:]:
    fig.add_trace(
        go.Scatter(x=track_count_2023['Month'],
                   y=track_count_2023[track],
                   name=track,
                   legendgroup='2',
                   visible=False,
                   line=dict(dash="solid")),
                   row=2, col=2)

fig.update_xaxes(title_text = "Month", dtick = 1, row=2, col=2)
fig.update_yaxes(title_text = "Frequency", row=2, col=2)
#############################################################################
# button settings

# button coordinates
button_x = 1.2
button_y = 1.0

# visible traces: control which plots are visible; use this to show plots for a specific year
# total number of traces: top artists in library (1) + time played (3 years) + favourite artists (10 x 3 years) + favourite songs (10 x 3 years) = 64 traces
total_traces = 64

# initialise all traces to be invisible, except for top artists in library (1) and time played (3 years) which are always visible
visible_all = [True] * (1 + 3) + [False] * (30 + 30)

# turn on the traces for each year
visible_2021 = visible_all[:]
visible_2021[4:14] = [True] * 10
visible_2021[34:44] = [True] * 10

visible_2022 = visible_all[:]
visible_2022[14:24] = [True] * 10
visible_2022[44:54] = [True] * 10

visible_2023 = visible_all[:]
visible_2023[24:34] = [True] * 10
visible_2023[54:64] = [True] * 10
#############################################################################
# animation settings

# duration: control the speed of the animation
frame_duration = 250    # ms
transition_duration = 100   # ms

# add animation frames
frames = [go.Frame(data=[go.Pie(visible=True)] +    # top artists in library
                        [go.Scatter(visible=True)] * 3 +    # time played
                        [go.Scatter(x=artist_count_2021['Month'][:k], y=artist_count_2021[artist][:k]) for artist in artist_count_2021.columns[1:]] +   # favourite artists
                        [go.Scatter(x=artist_count_2022['Month'][:k], y=artist_count_2022[artist][:k]) for artist in artist_count_2022.columns[1:]] +
                        [go.Scatter(x=artist_count_2023['Month'][:k], y=artist_count_2023[artist][:k]) for artist in artist_count_2023.columns[1:]] +
                        [go.Scatter(x=track_count_2021['Month'][:k], y=track_count_2021[track][:k]) for track in track_count_2021.columns[1:]] +    # favourite songs
                        [go.Scatter(x=track_count_2022['Month'][:k], y=track_count_2022[track][:k]) for track in track_count_2022.columns[1:]] +
                        [go.Scatter(x=track_count_2023['Month'][:k], y=track_count_2023[track][:k]) for track in track_count_2023.columns[1:]],
                        traces=list(range(total_traces)))   # total number of traces
                        for k in range(total_months + 1)]   # x axis range: 1 to 12 months

fig.frames=frames
#############################################################################
fig.update_layout(
    updatemenus=[
        dict(
            x = button_x,
            y = button_y,
            type="buttons",
            direction="down",
            showactive=False,
            buttons=list([
                dict(label="Play",
                        method="animate",
                    args=[None, dict(frame=dict(duration=frame_duration, redraw=False), # animate button
                                    transition=dict(duration=transition_duration))]),
                dict(label="2021",
                    method="update",
                    args=[{"visible": visible_2021},    # set visible traces for 2021
                          {"showlegend": True}]),
                dict(label="2022",
                    method="update",
                    args=[{"visible": visible_2022},    # set visible traces for 2022
                          {"showlegend": True}]),
                dict(label="2023",
                    method="update",
                    args=[{"visible": visible_2023},    # set visible traces for 2023
                          {"showlegend": True}]),
            ]))])
#############################################################################
# plotly does not support individual legend for each subplot... 
# so we are using this gap between the legends to pretend we have 2 separate legends...
fig.update_layout(legend_tracegroupgap = 250)

fig.write_html("My Spotify Wrapped_animated.html", include_plotlyjs = 'cdn', auto_play = False)

fig.show()