In [None]:
import functools
import operator

import plotly.express as px
import polars as pl
from IPython.display import display
from umap import UMAP

from music.models import Artist, Features, Track

# Specify columns to exclude
EXCLUDE_COLS = {"pkey", "create_ts", "update_ts"}

# Get Data

In [None]:
# Get tracks
tracks_objs = Track.read_many()
tracks = pl.DataFrame([t.model_dump(exclude=EXCLUDE_COLS) for t in tracks_objs])

print(tracks.shape)
display(tracks.head())

In [None]:
# Get artists
artists_ids = set(functools.reduce(operator.iadd, tracks["artist_ids"].to_list(), []))
artists_objs = Artist.read_many(ids=artists_ids)
artists = pl.DataFrame([a.model_dump(exclude=EXCLUDE_COLS) for a in artists_objs])

print(artists.shape)
display(artists.head())

In [None]:
# Get features
features_objs = Features.read_many(ids=tracks["id"])
features = pl.DataFrame([f.model_dump(exclude=EXCLUDE_COLS) for f in features_objs])
FEATURE_COLS = sorted(set(features.columns) - {"id"})

print(features.shape)
display(features.head())

In [None]:
# Get metadata lookup
to_join = tracks.select(["id", "name", "artist_ids"]).explode("artist_ids").rename({"name": "track"})
meta = to_join.join(artists.select(["id", "name"]), left_on="artist_ids", right_on="id", how="left")
meta = meta.select(["id", "track", "name"]).rename({"id": "track_id", "name": "artists"})
meta = meta.group_by(["track_id", "track"]).agg(pl.col("artists").unique(maintain_order=True))

print(meta.shape)
display(meta.head())

In [None]:
# Combine metadata and original features
df = features.join(meta, left_on="id", right_on="track_id", how="left")

print(df.shape)
display(df.head())

In [None]:
# Save data
df.write_parquet("features.parquet")

# Visualization

In [None]:
# Apply dimensionality reduction (for visualization)
reducer = UMAP(n_components=3)
reduced_raw = reducer.fit_transform(features.select(FEATURE_COLS))
reduced = pl.DataFrame(reduced_raw, schema=[f"reduced{i}" for i in range(reduced_raw.shape[1])])
reduced = reduced.with_columns(features["id"])

# Combine metadata
reduced = reduced.join(meta, left_on="id", right_on="track_id", how="left")

print(reduced.shape)
display(reduced.head())

In [None]:
# Plot
reduced = reduced.with_columns(artists_str=pl.col("artists").list.join(", "))
fig = px.scatter(reduced, x="reduced0", y="reduced1", color="reduced2", hover_data=["track", "artists_str"])
fig.update_layout(width=720, height=480)
fig.show()