In [None]:
%pip install humanize

import json, glob, ipywidgets
import pandas as pd
import IPython.display as ipd
import humanize, datetime as dt

In [None]:
files = []
  
for file in glob.glob("./data/**/StreamingHistory*.json", recursive=True):
  files.append(file)

#Extended history
for file in glob.glob("./data/**/Streaming_History_Audio*.json", recursive=True):
  files.append(file)


In [None]:

files_after_exclusions = []
for file in files:
  if file.find("/exclude/") == -1:
    files_after_exclusions.append(file)
files = files_after_exclusions
  

In [None]:
data = []

for file in files:
  print(f"Reading file: {file}")
  data.extend(json.load(open(file)))


In [None]:
def rename_columns(df):
  df = df.rename(
    columns={
      "msPlayed": "Play time",
      "ms_played": "Play time",
      
      "trackName": "Track name",
      "master_metadata_track_name": "Track name",
      
      "artistName": "Artist name",
      "master_metadata_album_artist_name": "Artist name",

      "endTime": "Timestamp",
      "ts": "Timestamp"
      }
  )
  df = df[["Timestamp", "Play time", "Track name", "Artist name"]]
  return df

def add_position(df):
  df = df.reset_index()
  df.insert(0, "Position", range(1, 1+len(df)))
  return df

In [None]:
dfs = (pd.read_json(f) for f in files)
df = pd.concat(dfs)

df = rename_columns(df)

In [None]:
display(
  ipd.Markdown(
    f"""
## Total play time:
**{
humanize.precisedelta(
  dt.timedelta(milliseconds = int(df["Play time"].sum())))
}**

## Money paid by Spotify to rightholders:
Probably very inaccurate, Spotify doesn't really pay per play. Based on 0.003 USD per play.

**{
len(df["Play time"].loc[df["Play time"] >= 30_000]) * 0.003
} USD**

## Data range: 
From **{df["Timestamp"].sort_values().iloc[0]}** to **{df["Timestamp"].sort_values().iloc[-1]}**
"""
  )
)


In [None]:
display(
  ipd.Markdown(
    """
## Most played songs:
(played over 30 seconds)
    """
  )
)

songCounts = df.loc[df["Play time"] >= 30_000]
songCounts = songCounts[["Track name", "Artist name"]].value_counts().rename("Play count")
songCounts = add_position(songCounts)

top_x = ipywidgets.IntSlider(
    value=10,
    min=0,
    max=100
)
ui = ipywidgets.HBox([ipywidgets.Label("Show top X songs (0 = all):"), top_x])

def countSongs(x):
  x = len(songCounts) if x == 0 else x
  display(ipywidgets.HTML(songCounts.head(x).to_html(index=False)))


out = ipywidgets.interactive_output(countSongs, {'x': top_x})

display(ui, out)


In [None]:
display(ipd.Markdown("## Most played songs by play time:"))

playtime = df.groupby(["Track name", "Artist name"]).sum().sort_values("Play time", ascending=False)
playtime = playtime.drop("Timestamp", axis=1)
playtime["Play time"] = (
  playtime["Play time"].
  apply(
      lambda x: humanize.precisedelta(
          dt.timedelta(milliseconds = x)
          )
  )
)
playtime = add_position(playtime)

top_x = ipywidgets.IntSlider(
    value=10,
    min=0,
    max=100
)
ui = ipywidgets.HBox([ipywidgets.Label("Show top X songs (0 = all):"), top_x])

def countSongs(x):
  x = len(playtime) if x == 0 else x
  display(ipywidgets.HTML(playtime.head(x).to_html(index=False)))


out = ipywidgets.interactive_output(countSongs, {'x': top_x})

display(ui, out)
