In [None]:
%pip install humanize

import json, glob, ipywidgets
import pandas as pd
import IPython.display as ipd
import humanize, datetime as dt

files = []
  
for file in glob.glob("./data/StreamingHistory*.json"):
  files.append(file)

In [None]:
data = []

for file in files:
  data.extend(json.load(open(file)))


In [None]:
dfs = (pd.read_json(f) for f in files)
df = pd.concat(dfs)

In [None]:
def renameColumns(df):
  df = df.reset_index()
  df.insert(0, "Position", range(1, 1+len(df)))
  df = df.rename(
    columns={
      "msPlayed": "Play time",
      "trackName": "Track name",
      "artistName": "Artist name"
      }
  )
  return df

In [None]:
display(
  ipd.Markdown(
    f"""
## Total play time:
**{
humanize.precisedelta(
  dt.timedelta(milliseconds = int(df["msPlayed"].sum())))
}**

## Money paid by Spotify to rightholders:
Probably very inaccurate, Spotify doesn't really pay per play. Based on 0.003 USD per play.

**{
len(df["msPlayed"].loc[df["msPlayed"] >= 30_000]) * 0.003
} USD**

## Data range: 
From **{df["endTime"].iloc[0]}** to **{df["endTime"].iloc[-1]}**
"""
  )
)


In [None]:
display(
  ipd.Markdown(
    """
## Most played songs:
(played over 30 seconds)
    """
  )
)

songCounts = df.loc[df["msPlayed"] >= 30_000]
songCounts = songCounts[["trackName", "artistName"]].value_counts().rename("Play count")

top_x = ipywidgets.IntSlider(
    value=10,
    min=0,
    max=100
)
ui = ipywidgets.HBox([ipywidgets.Label("Show top X songs (0 = all):"), top_x])

def countSongs(x):
  x = len(songCounts) if x == 0 else x
  display(ipywidgets.HTML(renameColumns(songCounts.to_frame()).head(x).to_html(index=False)))


out = ipywidgets.interactive_output(countSongs, {'x': top_x})

display(ui, out)


In [None]:
display(ipd.Markdown("## Most played songs by play time:"))

playtime = df.groupby(["trackName", "artistName"]).sum().sort_values("msPlayed", ascending=False)
playtime = playtime.drop("endTime", axis=1)
playtime["msPlayed"] = (
  playtime["msPlayed"].
  apply(
      lambda x: humanize.precisedelta(
          dt.timedelta(milliseconds = x)
          )
  )
)
playtime = renameColumns(playtime)

top_x = ipywidgets.IntSlider(
    value=10,
    min=0,
    max=100
)
ui = ipywidgets.HBox([ipywidgets.Label("Show top X songs (0 = all):"), top_x])

def countSongs(x):
  x = len(playtime) if x == 0 else x
  display(ipywidgets.HTML(playtime.head(x).to_html(index=False)))


out = ipywidgets.interactive_output(countSongs, {'x': top_x})

display(ui, out)
