In [None]:
import numpy as np
import pandas as pd
import plotly.express as px
import spotipy
from spotipy import util
from tqdm.auto import tqdm

In [None]:
username = "lkinx"
scope = "playlist-read-private"
token = util.prompt_for_user_token(username, scope)
sp = spotipy.client.Spotify(auth=token)

In [None]:
def get_tracks(tracks, songs=[], artists=[], dates=[]):
    for _i, item in enumerate(tracks["items"]):
        track = item["track"]
        songs.append(track["name"])
        track["artists"]
        artists.append(track["artists"][0]["name"])
        dates.append(item["added_at"])

    return songs, artists, dates


# Loop over playlists
data = []
playlists = sp.current_user_playlists()
for playlist in playlists["items"]:
    if playlist["owner"]["id"] == username:
        # Get playlist
        results = sp.user_playlist(username, playlist["id"], fields="tracks,next")
        tracks = results["tracks"]
        songs, artists, dates = get_tracks(tracks, [], [], [])
        while tracks["next"]:
            tracks = sp.next(tracks)
            songs, artists, dates = get_tracks(tracks, songs, artists, dates)

        # Make df
        playlist_name = playlist["name"]
        df = pd.DataFrame(
            {
                "playlist": [playlist_name] * len(songs),
                "song": songs,
                "artist": artists,
                "date": dates,
            }
        )
        data.append(df)

# Concat and convert types
data = pd.concat(data, axis=0)
data["date"] = pd.to_datetime(data["date"])

print(data.shape)
data.head()

In [None]:
a = []
playlists = sp.current_user_playlists()
for playlist in playlists["items"]:
    if playlist["owner"]["id"] == username:
        # Get playlist
        results = sp.user_playlist(username, playlist["id"], fields="tracks,next")
        tracks = results["tracks"]
        songs, artists, dates = get_tracks(tracks, [], [], [])
        while tracks["next"]:
            tracks = sp.next(tracks)

            for _i, item in enumerate(tracks["items"]):
                track = item["track"]
                artist = track["artists"]
                if len(artist) > 1:
                    a.append([v["name"] for v in artist])

In [None]:
def get_genres(artist_name):
    result = sp.search(artist_name, type="artist")
    try:
        artist = result["artists"]["items"][0]
        return artist["genres"]
    except Exception:
        print(f"No results for `{artist_name}`")
        return []


# Get genres
genres_map = {}
for artist_name in tqdm(data["artist"].unique()):
    genres_map[artist_name] = get_genres(artist_name)

# Add genres
data["genre"] = data["artist"].map(genres_map)

In [None]:
sorted(data["artist"].unique())

In [None]:
# Subset to monthly playlists
months = data.loc[data["playlist"].str.match(r"\w+ \d{4}")]
print(f"Playlists: {months['playlist'].unique()}")

# Explode and round down to month
months = months.set_index(["playlist", "song", "artist", "date"])["genre"].explode().reset_index()
months["month"] = (months["date"] + pd.offsets.MonthBegin(-1)).dt.date

# Convert to fraction
by_song = True
if by_song:
    # Weight by song
    to_agg = months.copy()
    to_agg["temp"] = 1
    to_agg["temp"] = to_agg["temp"] / to_agg.groupby(["month", "artist", "song"])["genre"].transform("count")
else:
    # Weight by artist
    to_agg = months[["month", "artist", "genre"]].drop_duplicates()
    to_agg["temp"] = 1
    to_agg["temp"] = to_agg["temp"] / to_agg.groupby(["month", "artist"])["genre"].transform("count")
agg = to_agg.groupby(["month", "genre"], as_index=False)["temp"].sum()
agg["frac"] = agg["temp"] / agg.groupby("month")["temp"].transform("sum")

In [None]:
# View all genres
sorted(agg["genre"].unique())

In [None]:
def filter_frac(group, min_frac):
    # Copy df
    group = group.copy()

    # Filter
    min_val = group["frac"].min()
    while min_val < min_frac:
        group.loc[group["frac"] == min_val, "frac"] = np.nan
        group["frac"] = group["frac"] / group["frac"].sum()
        min_val = group["frac"].min()

    return group


# Filter fraction
to_plot = agg.groupby("month").apply(filter_frac, min_frac=0.05).dropna()

In [None]:
def _plot_helper(fig):
    title = "Song" if by_song else "Arist"
    fig.update_layout(
        title=f"Genres by {title}",
        width=1280,
        height=480,
        xaxis={"title": "Month"},
        yaxis={"title": "Percent", "tickformat": ".2%"},
    )
    fig.show()


# Get order
sorted_genres = sorted(to_plot["genre"].unique())

# Plot bar
fig = px.bar(
    to_plot,
    x="month",
    y="frac",
    color="genre",
    category_orders={"genre": sorted_genres},
)
_plot_helper(fig)

# Plot lines
fig = px.line(
    to_plot,
    x="month",
    y="frac",
    color="genre",
    markers=True,
    category_orders={"genre": sorted_genres},
)
fig.update_traces(connectgaps=False)
_plot_helper(fig)

# Plot area
# fig = px.area(to_plot, x="month", y="frac", color="genre", category_orders={"genre": sorted_genres})
# _plot_helper(fig)

In [None]:
# Plot pie charts by month
for month in sorted(to_plot["month"].unique())[-5::]:
    subset = to_plot.loc[to_plot["month"] == month]
    fig = px.pie(subset, values="frac", names="genre", category_orders={"genre": sorted_genres})
    title = "Song" if by_song else "Arist"
    fig.update_layout(
        title=f"Genres by {title}: {month}",
        width=720,
        height=480,
    )
    fig.show()