In [None]:
!pip install lightfm

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [None]:
from google.colab import drive

drive.mount('/content/gdrive')

Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount("/content/gdrive", force_remount=True).


In [None]:
import numpy as np
import scipy.sparse as sparse
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from scipy.sparse import coo_matrix
from scipy.sparse import save_npz
import lightfm as lfm

In [None]:
data = pd.read_csv('gdrive/MyDrive/spotify_dataset.csv.zip',
            delimiter=',',
            usecols = [0, 1, 2],
            names = ['user_id', 'artist', 'track'])

In [None]:
data = data.iloc[1: , :].reset_index(drop=True)
data = data.dropna(axis='index', how='any')
data['artist__track'] = data['artist'] + '__' + data['track']
data = data.drop(columns = ['artist', 'track'], axis = 1) 
#data = data.drop(columns = ['track'], axis = 1) 
data = data.set_index('user_id')

data_user_pivot = pd.pivot_table(data, index = data.index, aggfunc='count')
data = data.drop( list(data_user_pivot[data_user_pivot['artist__track']<=10].index) , axis = 0)
data = data.rename_axis('user_id').reset_index()
data_artist_pivot = pd.pivot_table(data, index = data.artist__track, aggfunc='count')
data = data.set_index('artist__track')
data = data.drop( list(data_artist_pivot[data_artist_pivot['user_id'] <= 3 ].index) , axis = 0)
data = data.rename_axis('artist__track').reset_index()

data['user_cat_id'] = LabelEncoder().fit_transform(data.user_id).astype('int16')
data = data.drop(columns = ['user_id'], axis = 1)
data['connect'] = 1
data['connect'] = data['connect'].astype('int8')

data.head()
data = data[:len(data)//20]


In [None]:
data

Unnamed: 0,artist__track,user_cat_id,connect
0,Elvis Costello__(The Angels Wanna Wear My) Red...,9120,1
1,Elvis Costello & The Attractions__(What's So F...,9120,1
2,Elvis Costello & The Attractions__Accidents Wi...,9120,1
3,Elvis Costello__Alison,9120,1
4,Lissie__All Be Okay,9120,1
...,...,...,...
487533,The xx__Sunset,4244,1
487534,Marilyn Manson__Sweet Dreams (Are Made Of This),4244,1
487535,Massive Attack__Teardrop,4244,1
487536,Radiohead__The National Anthem,4244,1


In [None]:
from lightfm.datasets import fetch_movielens
movielens = fetch_movielens()
train = movielens['train']

In [None]:
pd.DataFrame.sparse.from_spmatrix(train)

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,1672,1673,1674,1675,1676,1677,1678,1679,1680,1681
0,5,3,4,3,3,5,4,1,5,3,...,0,0,0,0,0,0,0,0,0,0
1,4,0,0,0,0,0,0,0,0,2,...,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
938,0,0,0,0,0,0,0,0,5,0,...,0,0,0,0,0,0,0,0,0,0
939,0,0,0,2,0,0,4,5,3,0,...,0,0,0,0,0,0,0,0,0,0
940,5,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
941,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [None]:
pivot_df = pd.pivot_table(data, columns='artist__track', 
                         index='user_cat_id', values='connect', 
                         aggfunc='max', fill_value = 0).astype('int8')

In [None]:
coo_matrix_df = coo_matrix(pivot_df)

In [None]:
from lightfm import LightFM
from lightfm.evaluation import precision_at_k
from lightfm.evaluation import auc_score

In [None]:
model = LightFM(learning_rate=0.05, loss='bpr')
model.fit(coo_matrix_df, epochs=10)

<lightfm.lightfm.LightFM at 0x7f8383c50b80>

In [None]:
train_precision = precision_at_k(model, coo_matrix_df, k=10).mean()

In [None]:
train_precision

0.21874243