In [25]:
from pathlib import Path
import scipy
import pandas as pd


In [26]:
def load_user_artists(user_artists_file: Path) -> scipy.sparse.csr_matrix:
    """Load the user artists file and return a user-artists matrix in csr
    fromat.
    """
    user_artists = pd.read_csv(user_artists_file, sep="\t")
    user_artists.set_index(["userID", "artistID"], inplace=True)
    coo = scipy.sparse.coo_matrix(
        (
            user_artists.weight.astype(float),
            (
                user_artists.index.get_level_values(0),
                user_artists.index.get_level_values(1),
            ),
        )
    )
    return coo.tocsr()

In [27]:
class ArtistRetriever:
    """The ArtistRetriever class gets the artist name from the artist ID."""

    def __init__(self):
        self._artists_df = None

    def get_artist_name_from_id(self, artist_id: int) -> str:
        """Return the artist name from the artist ID."""
        return self._artists_df.loc[artist_id, "name"]

    def load_artists(self, artists_file: Path) -> None:
        """Load the artists file and stores it as a Pandas dataframe in a
        private attribute.
        """
        artists_df = pd.read_csv(artists_file, sep="\t")
        artists_df = artists_df.set_index("id")
        self._artists_df = artists_df

In [32]:
if __name__ == "__main__":
    user_artists_matrix = load_user_artists("user_artists.dat")
    print(user_artists_matrix)

    artist_retriever = ArtistRetriever()
    artist_retriever.load_artists("artists.dat")
    artist = artist_retriever.get_artist_name_from_id(12)
    print(artist)

  (2, 51)	13883.0
  (2, 52)	11690.0
  (2, 53)	11351.0
  (2, 54)	10300.0
  (2, 55)	8983.0
  (2, 56)	6152.0
  (2, 57)	5955.0
  (2, 58)	4616.0
  (2, 59)	4337.0
  (2, 60)	4147.0
  (2, 61)	3923.0
  (2, 62)	3782.0
  (2, 63)	3735.0
  (2, 64)	3644.0
  (2, 65)	3579.0
  (2, 66)	3312.0
  (2, 67)	3301.0
  (2, 68)	2927.0
  (2, 69)	2720.0
  (2, 70)	2686.0
  (2, 71)	2654.0
  (2, 72)	2619.0
  (2, 73)	2584.0
  (2, 74)	2547.0
  (2, 75)	2397.0
  :	:
  (2100, 8320)	284.0
  (2100, 8322)	650.0
  (2100, 8323)	456.0
  (2100, 8324)	1068.0
  (2100, 8326)	626.0
  (2100, 8327)	613.0
  (2100, 8332)	655.0
  (2100, 8344)	640.0
  (2100, 8525)	232.0
  (2100, 8529)	429.0
  (2100, 8531)	607.0
  (2100, 8533)	724.0
  (2100, 9783)	793.0
  (2100, 10008)	228.0
  (2100, 10894)	705.0
  (2100, 13677)	278.0
  (2100, 13679)	346.0
  (2100, 13978)	535.0
  (2100, 16437)	443.0
  (2100, 18725)	758.0
  (2100, 18726)	337.0
  (2100, 18727)	297.0
  (2100, 18728)	281.0
  (2100, 18729)	280.0
  (2100, 18730)	263.0
Behemoth
