# Recommender System

## Exploratory Data Analysis

In [1]:
from pathlib import Path

import pandas as pd
import scipy

#### Loading the User Artist Interactions Data

In [None]:
user_artist_path = "data/user_artists.dat"
user_artist_plays = pd.read_csv(user_artist_path, sep="\t")


user_artist_plays.set_index(["userID", "artistID"], inplace=True)

In [7]:
user_artist_plays

Unnamed: 0_level_0,Unnamed: 1_level_0,weight
userID,artistID,Unnamed: 2_level_1
2,51,13883
2,52,11690
2,53,11351
2,54,10300
2,55,8983
...,...,...
2100,18726,337
2100,18727,297
2100,18728,281
2100,18729,280


### User Count

In [44]:
len(user_artist_plays.index.get_level_values(0).unique())

1892

### Artist Count

In [45]:
len(user_artist_plays.index.get_level_values(1).unique())

17632

#### Function to generate a COO matrix from the User-Artist interactions data

In [24]:
def load_user_artist_plays_matrix(user_artist_path):
    
    user_artist_plays = pd.read_csv(user_artist_path, sep="\t")

    user_artist_plays.set_index(["userID", "artistID"], inplace=True)

    plays_data = user_artist_plays.weight.astype(float)
    matrix_rows = user_artist_plays.index.get_level_values(0)
    matrix_columns = user_artist_plays.index.get_level_values(1)

    coo = scipy.sparse.coo_matrix(
        (
            plays_data, (matrix_rows, matrix_columns)
        )
    )

    return coo
    

#### Artist Retriever Class

In [61]:
class artist_retriever:
    
    def __init__(self):
        self.artists_df = None

    def load_artists(self, data_path):
        artists_df = pd.read_csv(data_path, sep="\t")
        artists_df = artists_df.set_index("id")
        artists_df = artists_df.drop(["url", "pictureURL"], axis=1)
        self.artists_df = artists_df

    def fetch_artist_name(self, artist_id):
        return self.artists_df.loc[artist_id, "name"]
        

#### Convert the COO matrix to a CSR (Compressed Sparse Row) matrix


In [75]:
user_artist_path = "data/user_artists.dat"

coo_matrix = load_user_artist_plays_matrix(user_artist_path)

csr_matrix = coo_matrix.tocsr()

print(csr_matrix[[2,3]])


<Compressed Sparse Row sparse matrix of dtype 'float64'
	with 100 stored elements and shape (2, 18746)>
  Coords	Values
  (0, 51)	13883.0
  (0, 52)	11690.0
  (0, 53)	11351.0
  (0, 54)	10300.0
  (0, 55)	8983.0
  (0, 56)	6152.0
  (0, 57)	5955.0
  (0, 58)	4616.0
  (0, 59)	4337.0
  (0, 60)	4147.0
  (0, 61)	3923.0
  (0, 62)	3782.0
  (0, 63)	3735.0
  (0, 64)	3644.0
  (0, 65)	3579.0
  (0, 66)	3312.0
  (0, 67)	3301.0
  (0, 68)	2927.0
  (0, 69)	2720.0
  (0, 70)	2686.0
  (0, 71)	2654.0
  (0, 72)	2619.0
  (0, 73)	2584.0
  (0, 74)	2547.0
  (0, 75)	2397.0
  :	:
  (1, 126)	94.0
  (1, 127)	89.0
  (1, 128)	89.0
  (1, 129)	86.0
  (1, 130)	85.0
  (1, 131)	84.0
  (1, 132)	83.0
  (1, 133)	83.0
  (1, 134)	77.0
  (1, 135)	77.0
  (1, 136)	76.0
  (1, 137)	75.0
  (1, 138)	72.0
  (1, 139)	72.0
  (1, 140)	71.0
  (1, 141)	70.0
  (1, 142)	70.0
  (1, 143)	70.0
  (1, 144)	69.0
  (1, 145)	68.0
  (1, 146)	67.0
  (1, 147)	67.0
  (1, 148)	66.0
  (1, 149)	66.0
  (1, 150)	65.0


In [63]:
# if __name__ == "__main__":

artist_data_path = "data/artists.dat"

artist_retriever_instance = artist_retriever()
artist_retriever_instance.load_artists(artist_data_path)

artist = artist_retriever_instance.fetch_artist_name(815)

print(artist)

Killswitch Engage
